In [None]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import statsmodels.tsa.stattools as st

# Dataset: FRED-MD

In [None]:
# macroeconomic database of monthly U.S. indicators 
# such as output and income, the labor market and prices from 1959 to 2020
df = pd.read_csv(r"current.csv")
df

In [None]:
# drop everything before 1980
fred = df[253:-2].dropna()
fred

In [None]:
# difference the non-stationary series to stationarity, and standardize
# the series to have zero mean and unit variance before extracting principal components
def center(np_arr):
    zero_mean = np_arr - np.mean(np_arr)
    unit_variance = zero_mean / (np.std(np_arr))
    return unit_variance

In [None]:
# standardize all columns to have zero mean and unit variance
whitened_fred = np.empty(shape=(0,0))
for col in fred.to_numpy().T:
    # don't apply this to malformed columns, i.e. columns with nan or non-floats
    if not isinstance(col[0], str):
        whitened_fred = np.append(whitened_fred, center(col))
whitened_fred = whitened_fred.reshape((128, 341))


In [None]:
# check for non-stationary and take differences of non-stationary series
def non_stationary(timeseries):
    # Perform augmented Dickey-Fuller test and return whether the test statistic is greater than the critical value,
    # which means the time series is non-stationary. We pick an alpha = 0.10
    dftest = st.adfuller(timeseries, autolag='AIC')
    test_statistic = dftest[0]
    crit_value = dftest[4]['10%']
    return test_statistic > crit_value
for col in whitened_fred:
    if non_stationary(col):
        

In [None]:
# test 
for col in whitened_fred:
    print(np.mean(col), "\t", np.std(col))