In [17]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
import os
from dotenv import load_dotenv
import pyfredapi as pf
import requests
import re
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import warnings

import xgboost as xgb

from prince import FAMD

from itertools import combinations, chain

# Suppress specific FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [8]:
df = pd.read_csv('beta_dates/beta_data_7_60.csv', index_col=0)

In [9]:
pd.set_option('display.max_rows', 100)    # Show all rows
pd.set_option('display.max_columns', None) # Show all columns

In [10]:
df.head(50)

Unnamed: 0,fed_party,potus_party,recess,pce,ue,cars,house,cli,loan,exports,rgdp,gdpg,veloc,ffr,d_pce,d_ue,d_cars,d_house,d_cli,d_loan,d_exports,d_rgdp,d_gdpg,d_veloc,d_ffr,b0_spx,b0_usd,b1_spx,b1_usd
1989-08-23,1.0,1.0,0.0,3.95,5.2,16.547,1332.0,99.68832,10.5,4.1,3.0,3.0,7.307,9.25,-0.09,0.0,1.852,-95.0,-0.03078,-0.5,0.0,0.0,0.0,0.0,0.0,326.646881,98.601609,0.591493,0.029379
1989-10-04,1.0,1.0,0.0,3.8,5.3,15.66,1279.0,99.69093,10.5,4.1,3.0,3.0,7.307,9.0,-0.15,0.1,-0.887,-53.0,0.00261,0.0,0.0,0.0,0.0,0.0,-0.25,346.730932,100.55841,0.0229,0.041353
1989-11-15,1.0,1.0,0.0,3.92,5.4,13.37,1351.0,99.72887,10.5,6.2,0.8,2.9,7.283,9.0,0.12,0.1,-2.29,72.0,0.03794,0.0,2.1,-2.2,-0.1,-0.024,0.0,351.846065,101.280681,-0.248915,-0.072283
1989-12-20,1.0,1.0,0.0,3.94,5.4,13.55,1251.0,99.75903,10.5,6.2,0.8,2.9,7.283,8.5,0.0,0.1,-0.031,-159.0,0.05376,0.0,0.0,0.0,0.0,0.0,-0.5,337.15443,98.881178,0.203495,-0.050785
1990-02-08,1.0,1.0,0.0,3.85,5.4,16.308,1551.0,99.79324,10.0,18.5,4.4,4.4,7.356,8.25,-0.09,0.0,2.758,300.0,0.03421,-0.5,12.3,3.6,1.5,0.073,-0.25,355.827805,94.598438,-0.451468,-0.043729
1990-03-28,1.0,1.0,0.0,3.95,5.2,14.486,1289.0,99.82265,10.0,18.5,4.4,4.4,7.356,8.25,0.1,-0.2,-1.822,-262.0,0.02941,0.0,0.0,0.0,0.0,0.0,0.0,326.243845,91.701826,0.247458,0.043299
1990-05-16,1.0,1.0,0.0,3.8,5.4,14.022,1212.0,99.5741,10.0,5.1,1.5,4.6,7.391,8.25,-0.15,0.2,-0.464,-77.0,-0.24855,0.0,-13.4,-2.9,0.2,0.035,0.0,342.031139,94.301928,-0.129148,-0.032449
1990-07-05,1.0,1.0,0.0,3.94,5.2,14.149,1177.0,99.28502,10.0,5.1,1.5,4.6,7.391,8.25,0.14,-0.2,0.127,-35.0,-0.28908,0.0,0.0,0.0,0.0,0.0,0.0,349.570252,91.670526,0.261849,0.017318
1990-08-22,1.0,1.0,1.0,4.14,5.7,13.859,1115.0,98.40642,10.0,3.1,0.3,3.5,7.377,8.25,0.2,0.5,-0.29,-62.0,-0.8786,0.0,-2.0,-1.2,-1.1,-0.014,0.0,364.44334,91.969602,-0.381664,-0.096868
1990-10-03,1.0,1.0,1.0,4.13,5.9,14.298,1110.0,97.9221,10.0,3.1,0.3,3.5,7.377,8.0,-0.01,0.2,0.439,-5.0,-0.48432,0.0,0.0,0.0,0.0,0.0,-0.25,338.111373,86.951395,-0.535971,-0.025692


In [4]:
df.shape

(275, 30)

In [9]:
df.dtypes

fed_party      float64
potus_party    float64
pce            float64
ue             float64
cars           float64
recess         float64
house          float64
cli            float64
loan           float64
exports        float64
rgdp           float64
gdpg           float64
veloc          float64
ffr            float64
d_pce          float64
d_ue           float64
d_cars         float64
d_recess       float64
d_house        float64
d_cli          float64
d_loan         float64
d_exports      float64
d_rgdp         float64
d_gdpg         float64
d_veloc        float64
d_ffr          float64
b0_spx         float64
b0_usd         float64
b1_spx         float64
b1_usd         float64
dtype: object

In [19]:
df.iloc[:, 17]

1989-08-23    0.0
1989-10-04    0.0
1989-11-15    0.0
1989-12-20    0.0
1990-02-08    0.0
             ... 
2023-05-03    0.0
2023-06-14    0.0
2023-07-26    0.0
2023-09-20    0.0
2023-11-01    0.0
Name: d_recess, Length: 275, dtype: float64

In [15]:
indices = []

for i, name in enumerate(df.columns):
    # Assuming 'df' is your DataFrame and 'column' is the column you want to check
    unique_count = df[name].nunique()
    if unique_count < 4:  # Adjust the threshold as needed
        indices.append(i)
    


In [16]:
indices

[0, 1, 2]

In [45]:
def famd_df(df, n_components=4, cat_threshold=4):
    # Get indices of categorical columns
    indices = []
    for i, name in enumerate(df.columns):
        # Assuming 'df' is your DataFrame and 'column' is the column you want to check
        unique_count = df[name].nunique()
        if unique_count < cat_threshold:  # Adjust the threshold as needed
            indices.append(i)
    
    df_encoded = df.iloc[:, indices]
    df_numeric = df.drop(df.columns[indices], axis=1)
    
    # Then we scale the numeric data
    scaler = StandardScaler()
    df_numeric = scaler.fit_transform(df_numeric)
    df_numeric = pd.DataFrame(df_numeric, index=df.index)
    
    # The function FAMD imported from prince requires categorical data to
    # Be of type 'object' so we convert it since it is already encoded
    df_categorical = df_encoded.astype(str)
    
    df_final = pd.concat([df_numeric, df_categorical], axis=1)
    
    famd = FAMD(n_components=n_components)
    
    X_famd = famd.fit_transform(df_final)
    
    return X_famd
    

In [46]:
famd_df(df)

component,0,1,2,3
1989-08-23,0.137784,-3.401774,1.799982,0.034764
1989-10-04,0.488936,-3.421260,1.444929,0.382391
1989-11-15,0.433805,-3.508263,1.052209,1.058645
1989-12-20,0.872683,-3.237511,1.734121,0.780077
1990-02-08,-1.782782,-2.601548,3.410342,0.193162
...,...,...,...,...
2023-05-03,-0.579000,-2.195915,-3.027815,1.355374
2023-06-14,-0.793874,-1.562868,-1.634355,0.949730
2023-07-26,-2.117895,-1.462514,-0.131908,1.066756
2023-09-20,-0.989583,-1.999002,-2.238130,2.186384
