In [1]:
import pandas as pd
import numpy as np
import math
import scipy.stats as stats
import json
import requests

from functools import reduce
import yfinance as yf

import datetime as dt
from datetime import datetime, date, time, timedelta
from dateutil.relativedelta import relativedelta

# Import PlotLy Dependencies
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

In [2]:
%run data_source.ipynb

In [28]:
# GDP - Gross Domestic Product
gdp_df = pd.DataFrame(fred_api_function("GDP"))

# Rename the 'Value' column to 'Gross Domestic Product'
gdp_df.rename(columns={'Value': 'Gross Domestic Product'}, inplace=True)

gdp_df

Unnamed: 0,Date,Gross Domestic Product
0,1947-01-01,243.164
1,1947-04-01,245.968
2,1947-07-01,249.585
3,1947-10-01,259.745
4,1948-01-01,265.742
...,...,...
302,2022-07-01,25994.639
303,2022-10-01,26408.405
304,2023-01-01,26813.601
305,2023-04-01,27063.012


In [4]:
# GPDI - Gross Private Domestic Investment
# Private Investment leads TO future growth
gross_investment_df = pd.DataFrame(fred_api_function("GPDI"))

# Rename the 'Value' column to 'Gross Private Domestic Investment'
gross_investment_df.rename(columns={'Value': 'Gross Private Domestic Investment'}, inplace=True)

gross_investment_df

Unnamed: 0,Date,Gross Private Domestic Investment
0,1947-01-01,35.854
1,1947-04-01,34.505
2,1947-07-01,34.911
3,1947-10-01,43.250
4,1948-01-01,47.202
...,...,...
302,2022-07-01,4724.621
303,2022-10-01,4796.162
304,2023-01-01,4725.828
305,2023-04-01,4780.290


In [5]:
# GFDEGDQ188S - Federal Debt: Total Public Debt as Percent of Gross Domestic Product
# Federal borrowing FROM future growth 
gov_debt_to_gdp_df = pd.DataFrame(fred_api_function("GFDEGDQ188S"))

# Rename the 'Value' column to 'Total Public Debt'
gov_debt_to_gdp_df.rename(columns={'Value': 'Total Public Debt of GDP (%)'}, inplace=True)

gov_debt_to_gdp_df



Unnamed: 0,Date,Total Public Debt of GDP (%)
0,1966-01-01,40.33999
1,1966-04-01,39.26763
2,1966-07-01,39.62091
3,1966-10-01,39.51977
4,1967-01-01,39.20383
...,...,...
225,2022-04-01,119.66902
226,2022-07-01,118.98189
227,2022-10-01,118.97609
228,2023-01-01,117.32269


In [6]:
productivity_df = pd.DataFrame()

# Merge DataFrames on the 'Date' column
productivity_df = pd.merge(gdp_df, gross_investment_df, on='Date').merge(gov_debt_to_gdp_df, on='Date')

productivity_df

Unnamed: 0,Date,Gross Domestic Product,Gross Private Domestic Investment,Total Public Debt of GDP (%)
0,1966-01-01,795.734,144.200,40.33999
1,1966-04-01,804.981,143.501,39.26763
2,1966-07-01,819.638,143.194,39.62091
3,1966-10-01,833.302,145.855,39.51977
4,1967-01-01,844.170,142.811,39.20383
...,...,...,...,...
225,2022-04-01,25544.273,4739.049,119.66902
226,2022-07-01,25994.639,4724.621,118.98189
227,2022-10-01,26408.405,4796.162,118.97609
228,2023-01-01,26813.601,4725.828,117.32269


In [7]:
# Normalize Data Scaling
norm_productivity_df = pd.DataFrame()

# Calculate Maximum and Minimums for each feature
gdp_min = productivity_df['Gross Domestic Product'].min()
gdp_max = productivity_df['Gross Domestic Product'].max()
gpdi_min = productivity_df['Gross Private Domestic Investment'].min()
gpdi_max = productivity_df['Gross Private Domestic Investment'].max()
debt_min = productivity_df['Total Public Debt of GDP (%)'].min()
debt_max = productivity_df['Total Public Debt of GDP (%)'].max()

# Calculate Normalization of each feature
norm_productivity_df["Date"] = productivity_df['Date']
norm_productivity_df["Normalized GDP"] = (productivity_df['Gross Domestic Product'] - gdp_min) / (gdp_max - gdp_min)
norm_productivity_df["Normalized GPDI"] = (productivity_df['Gross Private Domestic Investment'] - gpdi_min) / (gpdi_max - gpdi_min)
norm_productivity_df["Public Debt To GDP"] = productivity_df['Total Public Debt of GDP (%)'] / 100

# How each feature contributes to the Productivity Index needs to be weighted to provide a single number for the Productivity Index contribution to the algorithm
# Note:- The corresponding weights are 35%, 30%, and 35% results in the total of 100%
norm_productivity_df["Overall Productivity Index"] = (norm_productivity_df["Normalized GDP"] * 0.60) +    \
                                                (norm_productivity_df["Normalized GPDI"] * 0.2) +  \
                                                ((1 - norm_productivity_df["Public Debt To GDP"]) * (0.20))
norm_productivity_df


Unnamed: 0,Date,Normalized GDP,Normalized GPDI,Public Debt To GDP,Overall Productivity Index
0,1966-01-01,0.000000,0.001439,0.403400,0.119608
1,1966-04-01,0.000352,0.001289,0.392676,0.121934
2,1966-07-01,0.000910,0.001223,0.396209,0.121549
3,1966-10-01,0.001430,0.001795,0.395198,0.122177
4,1967-01-01,0.001844,0.001141,0.392038,0.122927
...,...,...,...,...,...
225,2022-04-01,0.942181,0.987740,1.196690,0.723519
226,2022-07-01,0.959327,0.984643,1.189819,0.734561
227,2022-10-01,0.975079,1.000000,1.189761,0.747095
228,2023-01-01,0.990505,0.984903,1.173227,0.756638


In [22]:
# Plot Resulting Productivity Data

fig = go.Figure()
fig.add_trace(go.Scatter(
    x = norm_productivity_df['Date'],
    y = norm_productivity_df['Normalized GDP'],
    line = dict(color='blue', width=1, dash='dot'),
    showlegend=True,
    name="Normalized GDP"
))

fig.add_trace(go.Scatter(
    x = norm_productivity_df['Date'],
    y = norm_productivity_df['Normalized GPDI'],
    line = dict(color='red', width=1, dash='dot'),
    showlegend=True,
    name="Normalized GPDI"
))
fig.add_trace(go.Scatter(
    x = norm_productivity_df['Date'],
    y = norm_productivity_df['Public Debt To GDP'],
    line = dict(color='green', width=1, dash='dot'),
    showlegend=True,
    name="Public Debt To GDP"
))
fig.add_trace(go.Scatter(
    x = norm_productivity_df['Date'],
    y = norm_productivity_df['Overall Productivity Index'],
    line = dict(color='black', width=2),
    showlegend=True,
    name="Overall Productivity Index"
))

fig.update_layout(
    title="Overall Productivity Index"
)


In [9]:
norm_productivity_df

Unnamed: 0,Date,Normalized GDP,Normalized GPDI,Public Debt To GDP,Overall Productivity Index
0,1966-01-01,0.000000,0.001439,0.403400,0.119608
1,1966-04-01,0.000352,0.001289,0.392676,0.121934
2,1966-07-01,0.000910,0.001223,0.396209,0.121549
3,1966-10-01,0.001430,0.001795,0.395198,0.122177
4,1967-01-01,0.001844,0.001141,0.392038,0.122927
...,...,...,...,...,...
225,2022-04-01,0.942181,0.987740,1.196690,0.723519
226,2022-07-01,0.959327,0.984643,1.189819,0.734561
227,2022-10-01,0.975079,1.000000,1.189761,0.747095
228,2023-01-01,0.990505,0.984903,1.173227,0.756638


In [13]:
# Plot Quarterly Productivity Change
productivity_change_df = pd.DataFrame()

productivity_change_df["Date"] = norm_productivity_df["Date"]
productivity_change_df["Overall Productivity Index"] = norm_productivity_df["Overall Productivity Index"]
productivity_change_df["Quarterly Change in Productivity Index (%)"] = ((productivity_change_df["Overall Productivity Index"] -  \
                                                    productivity_change_df["Overall Productivity Index"].shift(1)) /  \
                                                    productivity_change_df["Overall Productivity Index"]) * 100

productivity_change_df.tail()

productivity_change_df

Unnamed: 0,Date,Overall Productivity Index,Quarterly Change in Productivity Index (%)
0,1966-01-01,0.119608,
1,1966-04-01,0.121934,1.907537
2,1966-07-01,0.121549,-0.316698
3,1966-10-01,0.122177,0.514525
4,1967-01-01,0.122927,0.609669
...,...,...,...
225,2022-04-01,0.723519,1.957737
226,2022-07-01,0.734561,1.503233
227,2022-10-01,0.747095,1.677724
228,2023-01-01,0.756638,1.261216


In [17]:
# Calculate Change in Velocity
productivity_change_df["Productivity Change in Velocity (basis points)"] = (productivity_change_df["Quarterly Change in Productivity Index (%)"] -  \
                                                    productivity_change_df["Quarterly Change in Productivity Index (%)"].shift(1)) * 100
productivity_change_df

Unnamed: 0,Date,Overall Productivity Index,Quarterly Change in Productivity Index (%),Productivity Change in Velocity (basis points)
0,1966-01-01,0.119608,,
1,1966-04-01,0.121934,1.907537,
2,1966-07-01,0.121549,-0.316698,-222.423511
3,1966-10-01,0.122177,0.514525,83.122272
4,1967-01-01,0.122927,0.609669,9.514394
...,...,...,...,...
225,2022-04-01,0.723519,1.957737,8.259742
226,2022-07-01,0.734561,1.503233,-45.450386
227,2022-10-01,0.747095,1.677724,17.449089
228,2023-01-01,0.756638,1.261216,-41.650792


Data needs to be in a monthly time series. To do this, we can use the quarterly data point and use it for 2 consecutive months. This way we can have data that can merge with other data tables that are in monthly formats.

In [19]:
# Function to convert quarterly data to monthly data for Productivity Data

def convert_to_monthly(df):
    
    monthly_df = pd.DataFrame()

    date = []
    index = []
    index_change = []
    velocity_change = []
    
    for i, row in df.iterrows():
        
        original_date = pd.to_datetime(row[0])
        date.append(original_date)
        index.append(row[1])
        index_change.append(row[2])
        velocity_change.append(row[3])
    
        date_plus_1 = original_date + pd.DateOffset(months=1)
        date.append(date_plus_1)
        index.append(row[1])
        index_change.append(row[2])
        velocity_change.append(row[3])
        
        date_plus_2 = original_date + pd.DateOffset(months=2)
        date.append(date_plus_2)
        index.append(row[1])
        index_change.append(row[2])
        velocity_change.append(row[3])
        
    data = {'Date': date,
            'Overall Productivity Index': index,
            'Change in Productivity Index (%)': index_change,
            'Productivity Change in Velocity (basis points)': velocity_change}
    
    monthly_df = pd.DataFrame(data)
    return monthly_df

productivity_index_monthly_data = convert_to_monthly(productivity_change_df)

# Store dataframe to export
%store productivity_index_monthly_data
productivity_index_monthly_data

Stored 'productivity_index_monthly_data' (DataFrame)


Unnamed: 0,Date,Overall Productivity Index,Change in Productivity Index (%),Productivity Change in Velocity (basis points)
0,1966-01-01,0.119608,,
1,1966-02-01,0.119608,,
2,1966-03-01,0.119608,,
3,1966-04-01,0.121934,1.907537,
4,1966-05-01,0.121934,1.907537,
...,...,...,...,...
685,2023-02-01,0.756638,1.261216,-41.650792
686,2023-03-01,0.756638,1.261216,-41.650792
687,2023-04-01,0.760378,0.491841,-76.937550
688,2023-05-01,0.760378,0.491841,-76.937550


In [27]:
# Plot Resulting Change in Velocity Data

fig = go.Figure()

fig.add_trace(go.Scatter(
    x = productivity_index_monthly_data['Date'],
    y = productivity_index_monthly_data['Productivity Change in Velocity (basis points)'],
    line = dict(color='blue', width=1),
    showlegend=True,
    name="Productivity Change in Velocity (basis points)"
))


fig.update_layout(
    title="Monthly Productivity Data"
)
fig.update_yaxes(
    title_text="basis points"
)

In [31]:
# Quarterly GDP Change
gdp_change_df = pd.DataFrame()

gdp_change_df["Date"] = gdp_df["Date"]
gdp_change_df["Gross Domestic Product"] = gdp_df["Gross Domestic Product"]
gdp_change_df["Quarterly Change in GDP (%)"] = ((gdp_change_df["Gross Domestic Product"] -  \
                                                    gdp_change_df["Gross Domestic Product"].shift(1)) /  \
                                                    gdp_change_df["Gross Domestic Product"]) * 100


# Calculate Change in Velocity
gdp_change_df["GDP Change in Velocity (basis points)"] = (gdp_change_df["Quarterly Change in GDP (%)"] -  \
                                                    gdp_change_df["Quarterly Change in GDP (%)"].shift(1)) * 100

gdp_change_df

Unnamed: 0,Date,Gross Domestic Product,Quarterly Change in GDP (%),GDP Change in Velocity (basis points)
0,1947-01-01,243.164,,
1,1947-04-01,245.968,1.139986,
2,1947-07-01,249.585,1.449206,30.921999
3,1947-10-01,259.745,3.911529,246.232293
4,1948-01-01,265.742,2.256700,-165.482851
...,...,...,...,...
302,2022-07-01,25994.639,1.732534,-28.418795
303,2022-10-01,26408.405,1.566797,-16.573753
304,2023-01-01,26813.601,1.511158,-5.563817
305,2023-04-01,27063.012,0.921594,-58.956481


In [32]:
# Function to convert quarterly data to monthly data for Productivity Data

def convert_to_monthly(df):
    
    monthly_df = pd.DataFrame()

    date = []
    gdp = []
    gdp_change = []
    velocity_change = []
    
    for i, row in df.iterrows():
        
        original_date = pd.to_datetime(row[0])
        date.append(original_date)
        gdp.append(row[1])
        gdp_change.append(row[2])
        velocity_change.append(row[3])
    
        date_plus_1 = original_date + pd.DateOffset(months=1)
        date.append(date_plus_1)
        gdp.append(row[1])
        gdp_change.append(row[2])
        velocity_change.append(row[3])
        
        date_plus_2 = original_date + pd.DateOffset(months=2)
        date.append(date_plus_2)
        gdp.append(row[1])
        gdp_change.append(row[2])
        velocity_change.append(row[3])
        
    data = {'Date': date,
            'Overall Productivity Index': gdp,
            'Change in GDP (%)': gdp_change,
            'GDP Change in Velocity (basis points)': velocity_change}
    
    monthly_df = pd.DataFrame(data)
    return monthly_df

gdp_monthly_data = convert_to_monthly(gdp_change_df)

# Store dataframe to export
%store gdp_monthly_data
gdp_monthly_data

Stored 'gdp_monthly_data' (DataFrame)


Unnamed: 0,Date,Overall Productivity Index,Change in GDP (%),GDP Change in Velocity (basis points)
0,1947-01-01,243.164,,
1,1947-02-01,243.164,,
2,1947-03-01,243.164,,
3,1947-04-01,245.968,1.139986,
4,1947-05-01,245.968,1.139986,
...,...,...,...,...
916,2023-05-01,27063.012,0.921594,-58.956481
917,2023-06-01,27063.012,0.921594,-58.956481
918,2023-07-01,27644.463,2.103318,118.172447
919,2023-08-01,27644.463,2.103318,118.172447


In [33]:
# Plot Resulting Change in Velocity Data

fig = go.Figure()

fig.add_trace(go.Scatter(
    x = gdp_monthly_data['Date'],
    y = gdp_monthly_data['GDP Change in Velocity (basis points)'],
    line = dict(color='blue', width=1),
    showlegend=True,
    name="GDP Change in Velocity (basis points)"
))


fig.update_layout(
    title="GDP Change in Velocity (basis points)"
)
fig.update_yaxes(
    title_text="basis points"
)