In [1]:
import arviz as az
import io
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import pymc as pm
import pytensor.tensor as pt
import requests
import statsmodels.api as sm
import warnings
import xarray as xr

In [2]:
data_path = Path.cwd().parent / "data raw" / "ADS_Dataset.xlsx"
print("Full file path:")
print(data_path)
print("")
print("File exists:")
print(data_path.exists())

Full file path:
/Users/awalters/escp_phd/govt_spending/data raw/ADS_Dataset.xlsx

File exists:
True


In [3]:
df_raw = pd.read_excel(data_path)

In [4]:
df_raw

Unnamed: 0.1,Unnamed: 0,'pdvmily','rgdppc','govpc','pgdp','tbill','totdefgdp','feddebtgdp','rinvpc','tfp_adj','patents','hours','rgov_invES_pc','rgov_invRD_pc','rgov_cons_pc','laborprod','rconspc','rexppc','rimppc','tfp'
0,1890.00,0.000000,-5.377503,-8.489825,-3.012944,0.042473,0.008416,0.081220,-7.031690,1.000000,8.660774,6.961860,-10.899490,-13.926542,-8.588761,-12.339363,-5.642778,-7.893120,-8.016904,1.000000
1,1890.25,0.001008,-5.367321,-8.361955,-3.016701,0.040384,0.007440,0.078143,-7.034209,1.000731,8.791334,6.961375,-10.730485,-13.892051,-8.464635,-12.328696,-5.640117,-7.934455,-7.926712,1.010615
2,1890.50,0.000000,-5.357144,-8.254742,-2.997408,0.043973,0.004118,0.073304,-7.052211,1.019988,8.801469,6.948343,-10.579253,-13.850632,-8.361820,-12.305487,-5.635331,-7.974536,-7.899304,1.029335
3,1890.75,0.000000,-5.339102,-8.254618,-3.000996,0.054525,0.004122,0.069763,-7.079066,1.038773,8.749891,6.939308,-10.553038,-13.839070,-8.364626,-12.278410,-5.622912,-7.900646,-7.960915,1.052241
4,1891.00,0.000000,-5.345935,-8.304856,-3.002703,0.043061,0.003595,0.068084,-7.147801,1.035873,8.640649,6.934658,-10.581543,-13.833001,-8.417573,-12.280594,-5.625953,-7.866075,-7.991724,1.046395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,2014.75,0.000000,-2.986680,-4.704116,0.086792,0.000200,-0.038392,0.737578,-4.808616,2.983326,11.291418,6.772107,-6.610061,-7.806485,-4.919322,-9.758787,-3.365468,-5.001276,-4.792632,3.016675
500,2015.00,0.000000,-2.986857,-4.710307,0.087086,0.000300,-0.035163,0.740285,-4.803727,2.983944,11.253494,6.772218,-6.625721,-7.794549,-4.924795,-9.759075,-3.368030,-5.043382,-4.824758,3.015514
501,2015.25,0.000000,-2.978942,-4.707879,0.092342,0.000200,-0.027783,0.732216,-4.800523,2.987593,11.315888,6.774924,-6.596384,-7.793126,-4.927300,-9.753867,-3.360748,-5.040333,-4.835139,3.020722
502,2015.50,0.000000,-2.976091,-4.706336,0.095574,0.000400,-0.036445,0.756848,-4.792990,2.987491,11.327222,6.778221,-6.594293,-7.794318,-4.925705,-9.754312,-3.355415,-5.054523,-4.843686,3.020682


In [5]:
time_col = df_raw.columns[0]
df_raw = df_raw.rename(columns={time_col: "year_frac"})
df_raw.columns = df_raw.columns.str.strip("'")

df_raw["year"] = df_raw["year_frac"].astype(int)
df_raw["quarter"] = ((df_raw["year_frac"] - df_raw["year"]) * 4 + 1).round().astype(int)

df_raw["date"] = pd.PeriodIndex.from_fields(
    year=df_raw["year"].astype(int).to_numpy(),
    quarter=df_raw["quarter"].astype(int).to_numpy(),
    freq="Q"
)

df_raw["pdvmily"] *= 100
df_raw["tbill"] *= 100
df_raw["totdefgdp"] *= 100
df_raw["feddebtgdp"] *= 100

df_raw = df_raw.set_index("date").sort_index()
df_raw.index = df_raw.index.to_timestamp(how="start")

df_raw = df_raw.drop(columns=["year_frac", "year", "quarter"])
df_raw = df_raw.loc[df_raw.index >= "1890-01-01"] 

In [6]:
cols_var = [
    "pdvmily", #military spending news
    "rgdppc", #real gdp per capita
    "govpc", #real government spending per capita
    "tbill", #fed funds rate
    "totdefgdp", #deficit as percent of gdp
    "feddebtgdp", #debt as percent of gdp
    "tfp_adj" #adjusted tfp
    ]

df = df_raw[cols_var].reset_index().copy()

In [7]:
df.to_csv(Path.cwd().parent / "data processed" / "ads_dataset.csv", index=False)