In [13]:
import os
import pandas as pd
import statsmodels.formula.api as smf

# Set working directory
os.chdir(os.path.expanduser("/Users/ericarmstrong/Desktop"))

# Load and parse dates
df = pd.read_csv("table_final.csv")
df['date'] = pd.to_datetime(df['date'])

# Group and average duplicate entries
df_clean = df.groupby(['date', 'seriesName'], as_index=False)['data'].mean()

# Pivot long → wide
df_wide = df_clean.pivot(index='date', columns='seriesName', values='data')
print("Available columns after pivot:\n", df_wide.columns.tolist())

# Keep only target variables
target_vars = ['UFMEN', 'UFEC', 'NRSI511N', 'CCIN', 'NRSV2N', 'CCOND', 'RECPROB', 'VSMINN', 'VSMIDN', 
               'Personal consumption expenditures (PCE)', 'Education']
for var in target_vars:
    if var not in df_wide.columns:
        raise ValueError(f"Missing required column: {var}")

# Keep only those + date
df_model = df_wide[target_vars].copy()
df_model = df_model.dropna()  # Drop rows with any NaNs

# Convert to (if needed)
df_model = df_model.apply(pd.to_numeric, errors='coerce')
df_model = df_model.dropna()

# Compute percentage change
df_model = df_model.pct_change().dropna()

# Join with date
df_model['date'] = df_model.index
df_model = df_model[['date'] + target_vars]

# Check row count
print("Remaining rows after pct_change + dropna:", len(df_model))

# Confirm types and nulls
for col in target_vars:
    print(f"{col}: {df_model[col].isna().sum()} NaNs, dtype: {df_model[col].dtype}")

# Run regression
formula = f"UFMEN ~ UFEC + NRSV2N + NRSI511N + CCIN"
model = smf.ols(formula, data=df_model).fit()
print(model.summary())


Available columns after pivot:
 ['Addenda:', 'CCIN', 'CCOND', 'CPIUANN', 'Clothing and footwear', 'Clothing, footwear, and related services', 'Communication', 'Durable goods', 'ERSI5N', 'Education', 'Energy goods and services5', 'Final consumption expenditures of nonprofit institutions serving households (NPISHs)1', 'Financial services and insurance', 'Food and beverages purchased for off-premises consumption', 'Food services and accommodations', 'Furnishings and durable household equipment', 'Furnishings, household equipment, and routine household maintenance', 'GDPA', 'Gasoline and other energy goods', 'Goods', 'Gross output of nonprofit institutions2', 'HLNCVP', 'Health', 'Health care', 'Household consumption expenditures (for services)', 'Household consumption expenditures7', 'Housing and utilities', 'Housing, utilities, and fuels', 'Less: Receipts from sales of goods and services by nonprofit institutions3', 'Market-based PCE excluding food and energy6', 'Market-based PCE6', 'Moto

In [4]:
formula2 = "UFME ~ UFEC + NRSV2N + NRSI511N + CCIN"
model2 = smf.ols(formula2, data=df_wide).fit()
print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:                   UFME   R-squared:                       0.957
Model:                            OLS   Adj. R-squared:                  0.949
Method:                 Least Squares   F-statistic:                     127.6
Date:                Tue, 15 Jul 2025   Prob (F-statistic):           2.39e-15
Time:                        09:40:19   Log-Likelihood:                -111.91
No. Observations:                  28   AIC:                             233.8
Df Residuals:                      23   BIC:                             240.5
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    649.9786     85.396      7.611      0.0

In [None]:
formula3 = "UFMEN ~ UFEC + NRSI511N + CCIN + RECPROB"
model3 = smf.ols(formula3, data=df_wide).fit()
print(model3.summary())

                            OLS Regression Results                            
Dep. Variable:                  UFMEN   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.957
Method:                 Least Squares   F-statistic:                     149.6
Date:                Mon, 14 Jul 2025   Prob (F-statistic):           4.15e-16
Time:                        13:46:56   Log-Likelihood:                -112.79
No. Observations:                  28   AIC:                             235.6
Df Residuals:                      23   BIC:                             242.3
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    898.2764     88.599     10.139      0.0

In [None]:
formula4 = "UFMEN ~ UFEC + PMEA + PXEA + NRSI511N + CCIN + CCOND + NRSV2N + VSMINN"
model4 = smf.ols(formula4, data=df_wide).fit()
print(model4.summary())

                            OLS Regression Results                            
Dep. Variable:                  UFMEN   R-squared:                       0.971
Model:                            OLS   Adj. R-squared:                  0.959
Method:                 Least Squares   F-statistic:                     80.73
Date:                Mon, 14 Jul 2025   Prob (F-statistic):           5.00e-13
Time:                        13:48:38   Log-Likelihood:                -109.17
No. Observations:                  28   AIC:                             236.3
Df Residuals:                      19   BIC:                             248.3
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    829.2209    702.601      1.180      0.2

In [21]:
df_wide = df_wide.rename(columns={'Personal consumption expenditures (PCE)': 'PCE'})

formula5 = 'UFMEN ~ UFEC + PCE + VSMINN + CCOND + RECPROB + PXEA + PMEA + NRSI511N + VSMIDN + Education'
model5 = smf.ols(formula5, data=df_wide).fit()
print(model5.summary())

                            OLS Regression Results                            
Dep. Variable:                  UFMEN   R-squared:                       0.986
Model:                            OLS   Adj. R-squared:                  0.979
Method:                 Least Squares   F-statistic:                     124.2
Date:                Tue, 15 Jul 2025   Prob (F-statistic):           7.31e-14
Time:                        10:46:05   Log-Likelihood:                -98.678
No. Observations:                  28   AIC:                             219.4
Df Residuals:                      17   BIC:                             234.0
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    304.2706    601.335      0.506      0.6

In [11]:
df_wide = df_wide.rename(columns={'Personal consumption expenditures (PCE)': 'PCE'})
df_wide = df_wide.rename(columns={'Food and beverages purchased for off-premises consumption': 'FNBOPC'})

formula6 = 'UFMEN ~ UFEC + FNBOPC + PCE + NRSI511N'
model6 = smf.ols(formula6, data=df_wide).fit()
print(model6.summary())

                            OLS Regression Results                            
Dep. Variable:                  UFMEN   R-squared:                       0.950
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     109.2
Date:                Tue, 15 Jul 2025   Prob (F-statistic):           1.31e-14
Time:                        10:37:49   Log-Likelihood:                -117.02
No. Observations:                  28   AIC:                             244.0
Df Residuals:                      23   BIC:                             250.7
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.9362    719.842      0.010      0.9