In [10]:
import requests
from key import fred_key
import pandas as pd
import numpy as np

base_url = "https://api.stlouisfed.org/fred/"
obs_endpoint = "series/observations"

start_date = "1950-01-01"
end_date = "2023-12-31"


def create_series_dict(name, series_id):
    units = 'log' if name != 'r' else 'lin' 
    return {
        'name': name,
        'series_id': series_id,
        'api_key': fred_key,
        'file_type': 'json',
        'observation_start': start_date,
        'observation_end': end_date,
        'frequency': 'a',
        'units': 'lin'
    }

series_list = [
    ('GDP', 'GDPC1'),
    ('CND', 'PCEND'),
    ('CD', 'PCEDG'),
    ('H', 'HOANBS'),
    ('L', 'PAYEMS'),
    ('AveW', 'AHETPI')
]

request_parameters = [create_series_dict(name, series_id) for name, series_id in series_list]



In [11]:
def fetch_data(parameter):
    response = requests.get(base_url + obs_endpoint, params=parameter)
    if response.status_code == 200:
        res_data = response.json()
        obs_data = pd.DataFrame(res_data['observations'])
        obs_data['date'] = pd.to_datetime(obs_data['date'])
        obs_data.set_index('date', inplace=True)
        obs_data = obs_data.drop(['realtime_start', 'realtime_end'], axis=1)

        # Convert 'value' to numeric, coercing errors to NaN
        obs_data['value'] = pd.to_numeric(obs_data['value'], errors='coerce')

        obs_data.rename(columns={'value': parameter['name']}, inplace=True)
        return obs_data
    else:
        print('Failed to retrieve data. Status code:', response.status_code)
        return pd.DataFrame()  # Return an empty DataFrame on failure
    
    
dataframe = pd.DataFrame()
# concatenate the data
for parameter in request_parameters:
    df = fetch_data(parameter)
    if dataframe.empty:
        dataframe = df
    else:
        dataframe = pd.concat([dataframe, df], axis=1)

# Display the merged dataframe
dataframe

Unnamed: 0_level_0,GDP,CND,CD,H,L,AveW
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-01,2458.532,,,42.265,45282,
1951-01-01,2656.320,,,44.261,47926,
1952-01-01,2764.803,,,44.753,48903,
1953-01-01,2894.412,,,45.880,50304,
1954-01-01,2877.708,,,44.331,49087,
...,...,...,...,...,...,...
2019-01-01,20692.087,3006.5,1522.7,102.791,150906,23.51
2020-01-01,20234.074,3084.2,1628.9,95.064,142165,24.69
2021-01-01,21407.693,3500.2,2006.4,100.227,146276,25.91
2022-01-01,21822.037,3868.1,2129.0,104.243,152531,27.57


In [12]:
# adjust the dataset

# add productivity
dataframe['GDP/L'] = (dataframe['GDP']/dataframe['L'])  # annual average income
dataframe['AveH'] = dataframe['H']/dataframe['L']*1000
dataset1 = dataframe.dropna()
# data from 1964 to 2022
dataset1




Unnamed: 0_level_0,GDP,CND,CD,H,L,AveW,GDP/L,AveH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1964-01-01,4205.277,152.7,59.5,50.351,58394,2.54,0.072016,0.862263
1965-01-01,4478.555,163.3,66.4,52.297,60879,2.63,0.073565,0.859032
1966-01-01,4773.931,177.9,71.8,54.107,64025,2.73,0.074564,0.845092
1967-01-01,4904.864,185.0,74.0,54.113,65935,2.86,0.074389,0.820702
1968-01-01,5145.914,199.8,84.8,55.074,68027,3.02,0.075645,0.80959
1969-01-01,5306.595,214.2,90.5,56.666,70515,3.22,0.075255,0.803602
1970-01-01,5316.391,228.8,90.0,55.799,71007,3.4,0.074871,0.785824
1971-01-01,5491.446,239.7,102.4,55.704,71331,3.63,0.076985,0.780923
1972-01-01,5780.048,257.4,116.4,57.455,73788,3.9,0.078333,0.77865
1973-01-01,6106.371,286.1,130.5,59.786,76902,4.14,0.079405,0.777431


In [38]:
sd = dataset1.std()/dataset1.mean()

lags = range(-4, 5)

# initialize a DataFrame to store cross-correlation values
cross_corr_df = pd.DataFrame(index=dataset1.columns, columns=lags)

# calculate cross-correlation for each variable and each lag
for col in dataset1.columns:
    for lag in lags:
        cross_corr_df.at[col, lag] = dataset1['GDP'].corr(dataset1[col].shift(-lag))



cross_corr_df['SD%'] = sd
table1 = cross_corr_df
table1



In [39]:
# import data for the second table

series_list2 = [
    ('Y', 'A939RC0Q052SBEA'),
    ('C', 'A794RC0Q052SBEA'),
    ('I', 'RGDPLPUSA625NUPN'),
    ('w', 'LES1252881600Q'),
    ('r', 'FEDFUNDS'),
    ('A', 'RTFPNAUSA632NRUG')
]

request_parameters2 = [create_series_dict(name, series_id) for name, series_id in series_list2]


dataframe2 = pd.DataFrame()
# concatenate the data
for parameter in request_parameters2:
    df = fetch_data(parameter)
    if dataframe2.empty:
        dataframe2 = df
    else:
        dataframe2 = pd.concat([dataframe2, df], axis=1)

dataframe2['N'] = dataset1['AveH']
dataframe2['Y/N'] = dataframe2['Y']/dataframe2['N']
dataframe2

dataset2 = dataframe2.dropna()
# data from 1982 to 2007
dataset2


In [37]:
sd2 = dataset2.std()
sd_r = sd2/dataset2.mean()
p = dataset2.apply(lambda series: series.autocorr(lag = 1)) # first order autocorrection (how much the t value is influenced by the t-1 value )
corr_Y = dataset2.corr()['Y']

table2 = pd.DataFrame(index=dataset2.columns)
table2['SD'] = sd2
table2['SD%'] = sd_r
table2['p'] = p
table2['corr_Y'] = corr_Y
table2