In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_excel("/content/finance_economics_dataset.csv.xlsx")
df

Unnamed: 0,Date,Stock Index,Open Price,Close Price,Daily High,Daily Low,Trading Volume,GDP Growth (%),Inflation Rate (%),Unemployment Rate (%),...,Forex USD/EUR,Forex USD/JPY,Crude Oil Price (USD per Barrel),Gold Price (USD per Ounce),Real Estate Index,Retail Sales (Billion USD),Bankruptcy Rate (%),Mergers & Acquisitions Deals,Venture Capital Funding (Billion USD),Consumer Spending (Billion USD)
0,2000-01-01,Dow Jones,2128.75,2138.48,2143.70,2100.55,2670411,-0.37,6.06,6.10,...,1.04,119.87,47.20,1052.34,390.23,2229,2.12,3,76.64,4589
1,2000-01-02,S&P 500,2046.82,2036.18,2082.83,2009.53,690220415,3.19,4.95,6.62,...,1.00,98.22,52.84,1957.73,346.23,4156,1.40,21,5.67,10101
2,2000-01-03,Dow Jones,1987.92,1985.26,2022.28,1978.37,315284661,5.54,9.13,2.60,...,0.83,80.13,78.80,2339.49,439.46,340,0.79,48,39.43,13665
3,2000-01-04,Dow Jones,4625.02,4660.47,4665.26,4595.46,13098297,10.00,3.77,2.20,...,0.95,149.15,28.18,1308.54,213.07,8456,4.22,16,12.83,5192
4,2000-01-05,S&P 500,1998.18,1982.18,2044.31,1966.44,385306746,1.53,2.20,8.20,...,1.43,113.71,92.20,2210.08,405.49,1596,2.21,34,86.37,10688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,2008-03-14,Dow Jones,1273.76,1235.44,1317.28,1192.34,86910001,8.84,7.00,10.44,...,1.18,113.52,61.98,2385.41,298.72,3033,8.75,35,66.01,2200
2996,2008-03-15,S&P 500,4589.59,4566.91,4607.56,4545.69,303579301,-1.88,9.15,5.61,...,1.17,147.10,31.61,2147.13,143.43,7950,5.07,18,25.49,5261
2997,2008-03-16,S&P 500,1477.52,1448.47,1523.86,1402.69,442387176,4.07,0.88,8.91,...,1.02,81.99,45.64,1943.61,105.81,3894,5.78,4,14.29,12039
2998,2008-03-17,NASDAQ,2311.37,2306.90,2346.30,2282.00,523425925,1.06,8.61,10.15,...,1.49,86.42,123.25,2211.40,236.07,2543,2.35,22,94.34,11979


# **Financial Analysis**

In [None]:
# 1. What is the shape of the dataset?

print(df.shape)

(3000, 24)


In [None]:
# 2. What are the column names and their data types?

print(df.dtypes)

Date                                     datetime64[ns]
Stock Index                                      object
Open Price                                      float64
Close Price                                     float64
Daily High                                      float64
Daily Low                                       float64
Trading Volume                                    int64
GDP Growth (%)                                  float64
Inflation Rate (%)                              float64
Unemployment Rate (%)                           float64
Interest Rate (%)                               float64
Consumer Confidence Index                         int64
Government Debt (Billion USD)                     int64
Corporate Profits (Billion USD)                   int64
Forex USD/EUR                                   float64
Forex USD/JPY                                   float64
Crude Oil Price (USD per Barrel)                float64
Gold Price (USD per Ounce)                      

In [None]:
# 3. How many unique stock indices are there?

print(df['Stock Index'].nunique())

3


In [None]:
# 4. What is the date range of the dataset?

print(df['Date'].min(), df['Date'].max())


2000-01-01 00:00:00 2008-03-18 00:00:00


In [None]:
# 5. Are there any missing values?

print(df.isnull().sum())


Date                                     0
Stock Index                              0
Open Price                               0
Close Price                              0
Daily High                               0
Daily Low                                0
Trading Volume                           0
GDP Growth (%)                           0
Inflation Rate (%)                       0
Unemployment Rate (%)                    0
Interest Rate (%)                        0
Consumer Confidence Index                0
Government Debt (Billion USD)            0
Corporate Profits (Billion USD)          0
Forex USD/EUR                            0
Forex USD/JPY                            0
Crude Oil Price (USD per Barrel)         0
Gold Price (USD per Ounce)               0
Real Estate Index                        0
Retail Sales (Billion USD)               0
Bankruptcy Rate (%)                      0
Mergers & Acquisitions Deals             0
Venture Capital Funding (Billion USD)    0
Consumer Sp

In [None]:
# 6. Are there negative values in columns that should be non-negative?

print((df['Trading Volume'] < 0).sum())


0


In [None]:
# 7. What is the summary of GDP Growth (%)?

print(df['GDP Growth (%)'].describe())


count    3000.000000
mean        2.608903
std         4.287337
min        -5.000000
25%        -1.012500
50%         2.725000
75%         6.242500
max        10.000000
Name: GDP Growth (%), dtype: float64


In [None]:
# 8.  Are there rows with zero or near-zero trading volume?

print(df[df['Trading Volume'] <= 1])


Empty DataFrame
Columns: [Date, Stock Index, Open Price, Close Price, Daily High, Daily Low, Trading Volume, GDP Growth (%), Inflation Rate (%), Unemployment Rate (%), Interest Rate (%), Consumer Confidence Index, Government Debt (Billion USD), Corporate Profits (Billion USD), Forex USD/EUR, Forex USD/JPY, Crude Oil Price (USD per Barrel), Gold Price (USD per Ounce), Real Estate Index, Retail Sales (Billion USD), Bankruptcy Rate (%), Mergers & Acquisitions Deals, Venture Capital Funding (Billion USD), Consumer Spending (Billion USD)]
Index: []

[0 rows x 24 columns]


In [None]:
# 9. Are there any duplicate rows?

print(df.duplicated().sum())


0


In [None]:
# 10. Are there outliers in GDP, Gold, or Oil prices?

Q1, Q3 = df['GDP Growth (%)'].quantile([0.25,0.75])
IQR = Q3 - Q1
outliers = df[(df['GDP Growth (%)'] < Q1-1.5*IQR) | (df['GDP Growth (%)'] > Q3+1.5*IQR)]
outliers.shape[0]


0

In [None]:
# 11. What is the summary of Inflation Rate (%)?

print(df['Inflation Rate (%)'].describe())



count    3000.000000
mean        5.096830
std         2.910513
min         0.010000
25%         2.607500
50%         5.110000
75%         7.600000
max        10.000000
Name: Inflation Rate (%), dtype: float64


In [None]:
# 12. What is the average unemployment rate?

print(df['Unemployment Rate (%)'].mean())

8.663786666666667


In [None]:
# 13. Which index has the highest trading volume?

print(df.groupby('Stock Index')['Trading Volume'].sum().idxmax())


S&P 500


In [None]:
# 14.  How many stock records are from each index?

print(df['Stock Index'].value_counts())

Stock Index
S&P 500      1036
NASDAQ       1007
Dow Jones     957
Name: count, dtype: int64


In [None]:
# 15.  What is the correlation between inflation and interest rate?

print(df['Inflation Rate (%)'].corr(df['Interest Rate (%)']))

0.00573292723557624


In [None]:
# 16. What is the average Consumer Confidence Index?

print(df['Consumer Confidence Index'].mean())

85.036


In [None]:
# 17. Which column has the highest standard deviation?

print(df.select_dtypes(include=np.number).std().idxmax())

Trading Volume


In [None]:
# 18. What is the highest gold price recorded?

print(df['Gold Price (USD per Ounce)'].max())


2499.66


In [None]:
# 19.  Which date had the highest crude oil price?

idx = df['Crude Oil Price (USD per Barrel)'].idxmax()
print(df.loc[idx, 'Date'])

2001-11-22 00:00:00


In [None]:
# 20. What is the average corporate profit?

print(df['Corporate Profits (Billion USD)'].mean())


2553.7563333333333


# **Insightful Analysis Questions**


In [None]:
# 1.  What percentage of the dataset shows negative GDP growth?

print((df['GDP Growth (%)'] < 0).mean() * 100)


31.566666666666666


In [None]:
# 2. Does high inflation correspond to higher interest rates?

print(df['Inflation Rate (%)'].corr(df['Interest Rate (%)']))


0.00573292723557624


In [None]:
# 3. Is there a relationship between unemployment and consumer spending?

print(df['Unemployment Rate (%)'].corr(df['Consumer Spending (Billion USD)']))


0.018786001938759088


In [None]:
# 4. Do higher corporate profits align with higher consumer confidence?

print(df['Corporate Profits (Billion USD)'].corr(df['Consumer Confidence Index']))


0.018008055888269585


In [None]:
# 5. What’s the trend of crude oil prices over time?

print(df.groupby(df['Date'].dt.year)['Crude Oil Price (USD per Barrel)'].mean())


Date
2000    84.658934
2001    84.038000
2002    89.407973
2003    83.356630
2004    86.253197
2005    85.585836
2006    87.158712
2007    84.741041
2008    81.420897
Name: Crude Oil Price (USD per Barrel), dtype: float64


In [None]:
# 6. Are gold prices inversely related to stock performance?

print(df['Gold Price (USD per Ounce)'].corr(df['Close Price']))


-0.014543960422329255


In [None]:
# 7. Does government debt impact consumer confidence?

print(df['Government Debt (Billion USD)'].corr(df['Consumer Confidence Index']))


-0.03637006085825002


In [None]:
# 8. How do mergers & acquisitions (M&A) activity correlate with stock index closing prices?

print(df['Mergers & Acquisitions Deals'].corr(df['Close Price']))


0.002637100789439038


In [None]:
# 9. Is retail sales growth associated with GDP growth?

print(df['Retail Sales (Billion USD)'].corr(df['GDP Growth (%)']))


-0.014287611445719916


In [None]:
# 10. Is stock market performance linked to consumer spending?

print(df['Close Price'].corr(df['Consumer Spending (Billion USD)']))


0.00037555939318850896


In [None]:
# 11. Which stock index had the highest average closing price?

print(df.groupby('Stock Index')['Close Price'].mean().idxmax())


Dow Jones


In [None]:
# 12. What is the relationship between interest rate and unemployment?

print(df['Interest Rate (%)'].corr(df['Unemployment Rate (%)']))


0.01800407213097277


In [None]:
# 13. Do lower consumer confidence values coincide with higher bankruptcy rates?

print(df['Consumer Confidence Index'].corr(df['Bankruptcy Rate (%)']))


-0.01631304347909408


In [None]:
# 14. Which indicator has the highest correlation with stock close price?

print(df.corr(numeric_only=True)['Close Price'].sort_values(ascending=False).head(3))


Close Price    1.000000
Daily High     0.999821
Daily Low      0.999820
Name: Close Price, dtype: float64


In [None]:
# 15. Are unemployment rates lower when corporate profits are high?

print(df['Corporate Profits (Billion USD)'].corr(df['Unemployment Rate (%)']))


0.008292414188235562
