In [2]:
import pandas as pd
import cpi
import nasdaqdatalink
import datetime as dt

In [3]:
import os
from dotenv import load_dotenv

In [4]:
load_dotenv("keys.env")

nasdaq_api_key = os.getenv("NASDAQ_API_KEY")

nasdaqdatalink.ApiConfig.api_key = nasdaq_api_key

In [29]:
inflation_rate = []
cpi_values = []
datetime_values = []


years = 72
#subtract by years + 1 because we need annual inflation by grabbing the year before's values
start_year = dt.date.today().year - (years+1)
reset_months = 1
months =  ( (years + 1) * 12) + (dt.date.today().month-1)

for i in range(months):
    
    if reset_months > 12: 
        reset_months = 1
        start_year +=1
        
    datetime_values.append(dt.date(start_year, reset_months, 1))
    
    cpi_data = cpi.get(dt.date(start_year, reset_months, 1))
    cpi_values.append(cpi_data)
     
    reset_months += 1
    
    if i >= 12:
        inflation = (cpi_values[i] - cpi_values[i-12]) / cpi_values[i-12]
        inflation_rate.append(inflation)

datetime_values = datetime_values[12:]
  
inflation_df = pd.DataFrame(data = {"Inflation Rate": inflation_rate}, index = datetime_values)  
inflation_df["Inflation Percent Change"] = inflation_df["Inflation Rate"].pct_change(12)
inflation_df

Unnamed: 0,Inflation Rate,Inflation Percent Change
1950-01-01,-0.020833,
1950-02-01,-0.012605,
1950-03-01,-0.008403,
1950-04-01,-0.012552,
1950-05-01,-0.004202,
...,...,...
2021-09-01,0.053903,2.930760
2021-10-01,0.062219,4.263554
2021-11-01,0.068090,4.797186
2021-12-01,0.070364,4.166207


In [19]:
##incase inflation may not have a strong correlation with the other factors because CPI has been transformed too much
cpi_df = pd.DataFrame(data = {"CPI": cpi_values[12:]}, index = datetime_values)
cpi_df["CPI Percent Change"] = cpi_df["CPI"].pct_change(12) * 100
cpi_df

Unnamed: 0,CPI,CPI Percent Change
1950-01-01,23.500,
1950-02-01,23.500,
1950-03-01,23.600,
1950-04-01,23.600,
1950-05-01,23.700,
...,...,...
2021-09-01,274.310,5.390349
2021-10-01,276.589,6.221869
2021-11-01,277.948,6.809003
2021-12-01,278.802,7.036403


In [6]:
#put in a seperate cell to avoid reaching api call limits
usa_gdp = nasdaqdatalink.get("FRED/GDP")
usa_gdp = usa_gdp/1000

In [7]:
start_year = dt.date.today().year - years

usa_gdp = usa_gdp.reset_index()


for i in range(len(usa_gdp)):
    entry_year = usa_gdp.iloc[i,0].date().year
    if entry_year == start_year:
        usa_gdp = usa_gdp[i:]
        break

usa_gdp = usa_gdp.set_index("Date")
usa_gdp.rename(columns = {"Value": "USA GDP in Trillions"}, inplace = True)
usa_gdp["GDP Percent Change"] = usa_gdp["USA GDP in Trillions"].pct_change(4) * 100

usa_gdp

Unnamed: 0_level_0,USA GDP in Trillions,GDP Percent Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1950-01-01,0.280828,
1950-04-01,0.290383,
1950-07-01,0.308153,
1950-10-01,0.319945,
1951-01-01,0.336000,19.646189
...,...,...
2020-10-01,21.477597,-0.999615
2021-01-01,22.038226,2.592288
2021-04-01,22.740959,16.755356
2021-07-01,23.202344,9.763052


In [8]:
employment_data = nasdaqdatalink.get("FRED/PAYEMS")
employment_data/=1000

In [9]:
employment_data = employment_data.reset_index()

for i in range(len(employment_data)):
    entry_year = employment_data.iloc[i,0].date().year
    if entry_year == start_year:
        employment_data = employment_data[i:]
        break

employment_data = employment_data.set_index("Date")
employment_data.rename(columns = {"Value": "Employment in the Millions"}, inplace = True)
employment_data["Employment Percent Change"] = employment_data["Employment in the Millions"].pct_change(12) * 100

employment_data

Unnamed: 0_level_0,Employment in the Millions,Employment Percent Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1950-01-01,43.526,
1950-02-01,43.297,
1950-03-01,43.954,
1950-04-01,44.382,
1950-05-01,44.718,
...,...,...
2021-09-01,147.328,4.021690
2021-10-01,148.005,4.024487
2021-11-01,148.652,4.235268
2021-12-01,149.162,4.677291


In [32]:
combined_df = pd.concat([inflation_df, usa_gdp, employment_data, cpi_df], axis = 1)
combined_df.dropna(inplace = True)
combined_df

Unnamed: 0,Inflation Rate,Inflation Percent Change,USA GDP in Trillions,GDP Percent Change,Employment in the Millions,Employment Percent Change,CPI,CPI Percent Change
1951-01-01,0.080851,-4.880851,0.336000,19.646189,47.288,8.643110,25.400,8.085106
1951-04-01,0.093220,-8.426554,0.344090,18.495229,47.861,7.838763,25.800,9.322034
1951-07-01,0.074689,3.425311,0.351385,14.029394,48.061,5.735469,25.900,7.468880
1951-10-01,0.065041,0.712737,0.356178,11.324759,48.006,2.783368,26.200,6.504065
1952-01-01,0.043307,-0.464360,0.359820,7.089286,48.296,2.131619,26.500,4.330709
...,...,...,...,...,...,...,...,...
2020-10-01,0.011821,-0.329911,21.477597,-0.999615,142.279,-5.985318,260.388,1.182066
2021-01-01,0.013998,-0.437068,22.038226,2.592288,143.017,-5.989036,261.582,1.399770
2021-04-01,0.041597,11.639735,22.740959,16.755356,144.694,10.865584,267.054,4.159695
2021-07-01,0.053655,4.441207,23.202344,9.763052,146.387,5.278033,273.003,5.365475


In [33]:
import hvplot.pandas 

In [35]:
combined_df[["Employment Percent Change", "GDP Percent Change"]].hvplot() + combined_df[["CPI Percent Change", "GDP Percent Change"]].hvplot()


In [36]:
combined_df[["Inflation Percent Change", "GDP Percent Change", "Employment Percent Change", "CPI Percent Change"]].corr()

Unnamed: 0,Inflation Percent Change,GDP Percent Change,Employment Percent Change,CPI Percent Change
Inflation Percent Change,1.0,-0.03545,-0.065631,0.025309
GDP Percent Change,-0.03545,1.0,0.734086,0.533791
Employment Percent Change,-0.065631,0.734086,1.0,0.154646
CPI Percent Change,0.025309,0.533791,0.154646,1.0


In [37]:
combined_df[["Inflation Rate", "USA GDP in Trillions", "Employment in the Millions", "CPI"]].corr()

Unnamed: 0,Inflation Rate,USA GDP in Trillions,Employment in the Millions,CPI
Inflation Rate,1.0,-0.284685,-0.138827,-0.244642
USA GDP in Trillions,-0.284685,1.0,0.925956,0.978426
Employment in the Millions,-0.138827,0.925956,1.0,0.974675
CPI,-0.244642,0.978426,0.974675,1.0


#Questions to bring up:

Correlation indicates that we can predict economic cycle to a certain extent --> First run linear regression model and measure the error for both long term (annual) and short term (monthly) data. If both predicted lines approximately coincide somewhere, we have predicted an estimated long term value but at the same time, it will approximate the time it will take to get to that point. 

Second, for supervised learning, use ML to learn the economic cycles from the economic factor and predict what the next cycle should be. See if it follows the order of the clock and see if it verifies the theory.


#Notes:

- Use unemployment rate instead of employment rate. That should prove to have an inverse relationship with GDP and produce a negative correlation instead.
- Produce highest returns in the stock market in terms of sectors (add a slider to adjust the timeframe for the return) --> provide metrics such as beta, volatility, sharpe ratio, value at risk etc.

- Create trading algo to simulate the returns on a chosen stock (include transaction costs)

- Also include a function that returns any stock and its informations (high, low, predicted values, sector)







# Really future ideas:

Create a mini-blockchain and record a transaction made along with its advised valuation. Return to it in the future and compare stock performance to see the accuracy of the valuation
