# Import all the libraries here

In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fredapi import Fred
import yfinance as yf
from datetime import datetime
%matplotlib inline


# API keys
fred_api = "20ed5f709126670bc05567f49dbe8fae"

# Get Your Data Here

In [84]:
fred = Fred(api_key=fred_api)

In [85]:
def get_ticker_data(ticker_symbol, plot=False):
    data = yf.Ticker(ticker_symbol)
    ticker = data.history(period='max')
    ticker.drop(columns=['Dividends', 'Stock Splits'], inplace=True)
    ticker.reset_index(inplace=True)

    for i in range(len(ticker['Date'])):
        ticker.iloc[i, 0] = ticker.iloc[i, 0].date()

    if plot:
        plt.plot(np.arange(0, len(ticker['Date'])), ticker['Open'])
        plt.show()

    return ticker

In [134]:
gdp = fred.get_series("GDP") # gdp of US
inflation = fred.get_series("CPIAUCSL") # CPI
unemployment = fred.get_series("UNRATE") # unemployment rate
fed_int_rate = fred.get_series("FEDFUNDS") # federal funds rate
ten_y_tres_yield = fred.get_series("DGS10") # US 10 year bond yield
m2_mon_supp = fred.get_series("M2SL") # how much money is circulationg in the economy
cons_sent_idx = fred.get_series("UMCSENT") # measures consumer confidence
ind_pro_idx = fred.get_series("INDPRO") # industrial production, total index
exports = fred.get_series("EXPGS") # exports of goods and services
imports = fred.get_series("IMPGS") # imports of goods and services
new_home_const = fred.get_series("HOUST") # new privately owned housing units
mortgage_rate = fred.get_series("MORTGAGE30US") # 30-Year Fixed Rate Mortgage Average
volatility_index = fred.get_series("VIXCLS") # CBOE Volatility Index
crude_oil_wti = fred.get_series("DCOILWTICO") # crude oil price, wti

**Gross Domestic Product (GDP)**
- Gross domestic product (GDP), the featured measure of U.S. output, is the market value of the goods and services produced by labor and property located in the United States.For more information, see the Guide to the National Income and Product Accounts of the United States (NIPA) and the Bureau of Economic Analysis.

**Consumer Price Index for All Urban Consumers: All Items in U.S. City Average (CPIAUCSL)**
- The Consumer Price Index for All Urban Consumers: All Items (CPIAUCSL) is a price index of a basket of goods and services paid by urban consumers. Percent changes in the price index measure the inflation rate between any two time periods. The most common inflation metric is the percent change from one year ago. It can also represent the buying habits of urban consumers. This particular index includes roughly 88 percent of the total population, accounting for wage earners, clerical workers, technical workers, self-employed, short-term workers, unemployed, retirees, and those not in the labor force.

- The CPIs are based on prices for food, clothing, shelter, and fuels; transportation fares; service fees (e.g., water and sewer service); and sales taxes. Prices are collected monthly from about 4,000 housing units and approximately 26,000 retail establishments across 87 urban areas. To calculate the index, price changes are averaged with weights representing their importance in the spending of the particular group. The index measures price changes (as a percent change) from a predetermined reference date. In addition to the original unadjusted index distributed, the Bureau of Labor Statistics also releases a seasonally adjusted index. The unadjusted series reflects all factors that may influence a change in prices. However, it can be very useful to look at the seasonally adjusted CPI, which removes the effects of seasonal changes, such as weather, school year, production cycles, and holidays.

- The CPI can be used to recognize periods of inflation and deflation. Significant increases in the CPI within a short time frame might indicate a period of inflation, and significant decreases in CPI within a short time frame might indicate a period of deflation. However, because the CPI includes volatile food and oil prices, it might not be a reliable measure of inflationary and deflationary periods. For a more accurate detection, the core CPI (CPILFESL) is often used. When using the CPI, please note that it is not applicable to all consumers and should not be used to determine relative living costs. Additionally, the CPI is a statistical measure vulnerable to sampling error since it is based on a sample of prices and not the complete average.

**Unemployment Rate (UNRATE)**
- The unemployment rate represents the number of unemployed as a percentage of the labor force. Labor force data are restricted to people 16 years of age and older, who currently reside in 1 of the 50 states or the District of Columbia, who do not reside in institutions (e.g., penal and mental facilities, homes for the aged), and who are not on active duty in the Armed Forces. This rate is also defined as the U-3 measure of labor underutilization. The series comes from the 'Current Population Survey (Household Survey)'. The source code is: LNS14000000

# Know your data here

In [87]:
gdp

1946-01-01          NaN
1946-04-01          NaN
1946-07-01          NaN
1946-10-01          NaN
1947-01-01      243.164
                ...    
2023-10-01    28296.967
2024-01-01    28624.069
2024-04-01    29016.714
2024-07-01    29374.914
2024-10-01    29719.647
Length: 316, dtype: float64

In [88]:
inflation

1947-01-01     21.480
1947-02-01     21.620
1947-03-01     22.000
1947-04-01     22.000
1947-05-01     21.950
               ...   
2024-09-01    314.851
2024-10-01    315.564
2024-11-01    316.449
2024-12-01    317.603
2025-01-01    319.086
Length: 937, dtype: float64

In [89]:
unemployment

1948-01-01    3.4
1948-02-01    3.8
1948-03-01    4.0
1948-04-01    3.9
1948-05-01    3.5
             ... 
2024-09-01    4.1
2024-10-01    4.1
2024-11-01    4.2
2024-12-01    4.1
2025-01-01    4.0
Length: 925, dtype: float64

# Create dataframes here

In [151]:
df_gdp = gdp.to_frame(name="gdp").reset_index().rename(columns={"index" : "year"})
df_infl = inflation.to_frame(name="inflation").reset_index().rename(columns={"index" : "year"})
df_unem = unemployment.to_frame(name="unemployment").reset_index().rename(columns={"index" : "year"})
df_fed_rate = fed_int_rate.to_frame(name="fed_int_rate").reset_index().rename(columns={"index" : "year"})
df_ten_y_yield = ten_y_tres_yield.to_frame(name="ten_year_yield").reset_index().rename(columns={"index" : "year"})
df_m2 = m2_mon_supp.to_frame(name="m2_money_supp").reset_index().rename(columns={"index" : "year"})
df_consum_sent = cons_sent_idx.to_frame(name="consum_sent_idx").reset_index().rename(columns={"index" : "year"})
df_ind_pro = ind_pro_idx.to_frame(name="indus_pro_idx").reset_index().rename(columns={"index" : "year"})
df_exports = exports.to_frame(name="exports").reset_index().rename(columns={"index" : "year"})
df_imports = imports.to_frame(name="imports").reset_index().rename(columns={"index" : "year"})
df_new_home = new_home_const.to_frame(name="new_home_const").reset_index().rename(columns={"index" : "year"})
df_mortgage_rate = mortgage_rate.to_frame(name="mortgage_rate").reset_index().rename(columns={"index" : "year"})
df_volatility_idx = volatility_index.to_frame(name="vix").reset_index().rename(columns={"index" : "year"})
df_wti = crude_oil_wti.to_frame(name="wti").reset_index().rename(columns={"index" : "year"})

In [152]:
df_gdp

Unnamed: 0,year,gdp
0,1946-01-01,
1,1946-04-01,
2,1946-07-01,
3,1946-10-01,
4,1947-01-01,243.164
...,...,...
311,2023-10-01,28296.967
312,2024-01-01,28624.069
313,2024-04-01,29016.714
314,2024-07-01,29374.914


In [153]:
df_infl

Unnamed: 0,year,inflation
0,1947-01-01,21.480
1,1947-02-01,21.620
2,1947-03-01,22.000
3,1947-04-01,22.000
4,1947-05-01,21.950
...,...,...
932,2024-09-01,314.851
933,2024-10-01,315.564
934,2024-11-01,316.449
935,2024-12-01,317.603


In [154]:
df_unem

Unnamed: 0,year,unemployment
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5
...,...,...
920,2024-09-01,4.1
921,2024-10-01,4.1
922,2024-11-01,4.2
923,2024-12-01,4.1


In [155]:
df_merged = df_infl.merge(df_unem, how="left", on="year")
df_merged

Unnamed: 0,year,inflation,unemployment
0,1947-01-01,21.480,
1,1947-02-01,21.620,
2,1947-03-01,22.000,
3,1947-04-01,22.000,
4,1947-05-01,21.950,
...,...,...,...
932,2024-09-01,314.851,4.1
933,2024-10-01,315.564,4.1
934,2024-11-01,316.449,4.2
935,2024-12-01,317.603,4.1


In [107]:
sp500 = get_ticker_data("^SPX", plot=False)

  ticker.iloc[i, 0] = ticker.iloc[i, 0].date()


In [108]:
sp500

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1927-12-30,17.660000,17.660000,17.660000,17.660000,0
1,1928-01-03,17.760000,17.760000,17.760000,17.760000,0
2,1928-01-04,17.719999,17.719999,17.719999,17.719999,0
3,1928-01-05,17.549999,17.549999,17.549999,17.549999,0
4,1928-01-06,17.660000,17.660000,17.660000,17.660000,0
...,...,...,...,...,...,...
24401,2025-02-24,6026.689941,6043.649902,5977.830078,5983.250000,4990120000
24402,2025-02-25,5982.729980,5992.649902,5908.490234,5955.250000,5374690000
24403,2025-02-26,5970.870117,6009.819824,5932.689941,5956.060059,4869580000
24404,2025-02-27,5981.879883,5993.689941,5858.779785,5861.569824,5057680000


In [109]:
sp500.rename(columns={"Date": "year", "Close": "close"}, inplace=True)

In [110]:
sp500.head(4)

Unnamed: 0,year,Open,High,Low,close,Volume
0,1927-12-30,17.66,17.66,17.66,17.66,0
1,1928-01-03,17.76,17.76,17.76,17.76,0
2,1928-01-04,17.719999,17.719999,17.719999,17.719999,0
3,1928-01-05,17.549999,17.549999,17.549999,17.549999,0


In [111]:
sp500.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24406 entries, 0 to 24405
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    24406 non-null  object 
 1   Open    24406 non-null  float64
 2   High    24406 non-null  float64
 3   Low     24406 non-null  float64
 4   close   24406 non-null  float64
 5   Volume  24406 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 1.1+ MB


In [112]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   year          308 non-null    datetime64[ns]
 1   gdp           308 non-null    float64       
 2   inflation     308 non-null    float64       
 3   unemployment  308 non-null    float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 9.8 KB


In [113]:
sp500["year"] = pd.to_datetime(sp500["year"])

In [114]:
df_merged = df_merged.merge(sp500, on="year", how="inner")
df_merged

Unnamed: 0,year,gdp,inflation,unemployment,Open,High,Low,close,Volume
0,1948-04-01,272.567,23.820,3.9,15.120000,15.120000,15.120000,15.120000,0
1,1948-07-01,279.196,24.400,3.6,16.700001,16.700001,16.700001,16.700001,0
2,1948-10-01,280.366,24.310,3.7,15.670000,15.670000,15.670000,15.670000,0
3,1949-04-01,271.351,23.920,5.3,14.940000,14.940000,14.940000,14.940000,0
4,1949-07-01,272.889,23.700,6.7,14.260000,14.260000,14.260000,14.260000,0
...,...,...,...,...,...,...,...,...,...
156,2022-04-01,25805.791,288.582,3.7,4540.319824,4548.700195,4507.569824,4545.859863,4562940000
157,2022-07-01,26272.011,294.940,3.5,3781.000000,3829.820068,3752.100098,3825.330078,4046950000
158,2024-04-01,29016.714,313.016,3.9,5257.970215,5263.950195,5229.200195,5243.770020,3325930000
159,2024-07-01,29374.914,313.566,4.2,5471.080078,5479.549805,5446.529785,5475.089844,3488760000


In [115]:
# drop open, high, low and Volume
df_merged.drop(columns=["Open", "High", "Low", "Volume"], inplace=True)
df_merged.rename(columns={"close": "sp_close"}, inplace=True)

In [123]:
ndx.drop(columns=["Open", "High", "Low", "Volume"], inplace=True)
ndx.rename(columns={"Date" : "year", "Close": "ndx_close"}, inplace=True)
ndx

Unnamed: 0,year,ndx_close
0,1985-10-01,112.139999
1,1985-10-02,110.824997
2,1985-10-03,110.870003
3,1985-10-04,110.074997
4,1985-10-07,108.199997
...,...,...
9926,2025-02-24,21352.080078
9927,2025-02-25,21087.250000
9928,2025-02-26,21132.919922
9929,2025-02-27,20550.949219


In [136]:
df_merged

Unnamed: 0,year,gdp,inflation,unemployment,sp_close
0,1948-04-01,272.567,23.820,3.9,15.120000
1,1948-07-01,279.196,24.400,3.6,16.700001
2,1948-10-01,280.366,24.310,3.7,15.670000
3,1949-04-01,271.351,23.920,5.3,14.940000
4,1949-07-01,272.889,23.700,6.7,14.260000
...,...,...,...,...,...
156,2022-04-01,25805.791,288.582,3.7,4545.859863
157,2022-07-01,26272.011,294.940,3.5,3825.330078
158,2024-04-01,29016.714,313.016,3.9,5243.770020
159,2024-07-01,29374.914,313.566,4.2,5475.089844


In [138]:
fed_int_rate

1954-07-01    0.80
1954-08-01    1.22
1954-09-01    1.07
1954-10-01    0.85
1954-11-01    0.83
              ... 
2024-09-01    5.13
2024-10-01    4.83
2024-11-01    4.64
2024-12-01    4.48
2025-01-01    4.33
Length: 847, dtype: float64

In [140]:
ten_y_tres_yield

1962-01-02    4.06
1962-01-03    4.03
1962-01-04    3.99
1962-01-05    4.02
1962-01-08    4.03
              ... 
2025-02-21    4.42
2025-02-24    4.40
2025-02-25    4.30
2025-02-26    4.25
2025-02-27    4.29
Length: 16478, dtype: float64

In [141]:
m2_mon_supp

1959-01-01      286.6
1959-02-01      287.7
1959-03-01      289.2
1959-04-01      290.1
1959-05-01      292.2
               ...   
2024-09-01    21252.4
2024-10-01    21332.7
2024-11-01    21465.8
2024-12-01    21549.3
2025-01-01    21561.4
Length: 793, dtype: float64

In [142]:
cons_sent_idx

1952-11-01    86.2
1952-12-01     NaN
1953-01-01     NaN
1953-02-01    90.7
1953-03-01     NaN
              ... 
2024-09-01    70.1
2024-10-01    70.5
2024-11-01    71.8
2024-12-01    74.0
2025-01-01    71.7
Length: 867, dtype: float64

In [143]:
ind_pro_idx

1919-01-01      4.8654
1919-02-01      4.6504
1919-03-01      4.5160
1919-04-01      4.5966
1919-05-01      4.6235
                ...   
2024-09-01    102.5873
2024-10-01    102.1219
2024-11-01    101.9736
2024-12-01    102.9833
2025-01-01    103.5110
Length: 1273, dtype: float64

In [144]:
exports

1946-01-01         NaN
1946-04-01         NaN
1946-07-01         NaN
1946-10-01         NaN
1947-01-01      18.394
                ...   
2023-10-01    3091.748
2024-01-01    3125.421
2024-04-01    3154.321
2024-07-01    3220.292
2024-10-01    3218.600
Length: 316, dtype: float64

In [145]:
imports

1946-01-01         NaN
1946-04-01         NaN
1946-07-01         NaN
1946-10-01         NaN
1947-01-01       7.519
                ...   
2023-10-01    3882.899
2024-01-01    3966.989
2024-04-01    4061.192
2024-07-01    4163.975
2024-10-01    4149.659
Length: 316, dtype: float64

In [146]:
new_home_const

1959-01-01    1657.0
1959-02-01    1667.0
1959-03-01    1620.0
1959-04-01    1590.0
1959-05-01    1498.0
               ...  
2024-09-01    1355.0
2024-10-01    1344.0
2024-11-01    1305.0
2024-12-01    1515.0
2025-01-01    1366.0
Length: 793, dtype: float64

In [147]:
mortgage_rate

1971-04-02    7.33
1971-04-09    7.31
1971-04-16    7.31
1971-04-23    7.31
1971-04-30    7.29
              ... 
2025-01-30    6.95
2025-02-06    6.89
2025-02-13    6.87
2025-02-20    6.85
2025-02-27    6.76
Length: 2814, dtype: float64

In [148]:
volatility_index

1990-01-02    17.24
1990-01-03    18.19
1990-01-04    19.22
1990-01-05    20.11
1990-01-08    20.26
              ...  
2025-02-24    18.98
2025-02-25    19.43
2025-02-26    19.10
2025-02-27    21.13
2025-02-28    19.63
Length: 9174, dtype: float64

In [149]:
crude_oil_wti

1986-01-02    25.56
1986-01-03    26.00
1986-01-06    26.53
1986-01-07    25.85
1986-01-08    25.87
              ...  
2025-02-18    72.21
2025-02-19    72.58
2025-02-20    72.88
2025-02-21    70.72
2025-02-24    71.06
Length: 10213, dtype: float64