In [1]:
import numpy as np
import pandas as pd
from statsmodels.datasets import grunfeld
data = grunfeld.load_pandas().data
data.year = data.year.astype(np.int64)

# Establish unique IDs to conform with package
N = len(np.unique(data.firm))
ID = dict(zip(np.unique(data.firm).tolist(),np.arange(1,N+1)))
data.firm = data.firm.apply(lambda x: ID[x])

# use multi-index for panel groups
data = data.set_index(['firm', 'year'])
y = data['invest']
X = data.drop('invest', axis=1)

# Call ipca
from ipca import InstrumentedPCA
regr = InstrumentedPCA(n_factors=1, intercept=False)
regr = regr.fit(X=X, y=y)
Gamma, Factors = regr.get_factors(label_ind=True)



The panel dimensions are:
n_samples: 11 , L: 2 , T: 20





Step 1 - Aggregate Update: 991040.1067154529
Step 2 - Aggregate Update: 0.03116845703445048
Step 3 - Aggregate Update: 0.013774627857861516
Step 4 - Aggregate Update: 0.006045637463264697
Step 5 - Aggregate Update: 0.002646397095048436
Step 6 - Aggregate Update: 0.0011571703804043043
Step 7 - Aggregate Update: 0.0005057548444745708
Step 8 - Aggregate Update: 0.0002210022866247774
Step 9 - Aggregate Update: 9.656419373532676e-05
Step 10 - Aggregate Update: 4.219093867988133e-05
Step 11 - Aggregate Update: 1.843381188348925e-05
Step 12 - Aggregate Update: 8.053932978979716e-06
-- Convergence Reached --


In [2]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,invest,value,capital
firm,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,1935,317.600,3078.500,2.800
6,1936,391.800,4661.700,52.600
6,1937,410.600,5387.100,156.900
6,1938,257.700,2792.200,209.200
6,1939,330.800,4313.200,203.400
...,...,...,...,...
1,1950,4.770,36.494,75.847
1,1951,6.532,46.082,77.367
1,1952,7.329,57.616,78.631
1,1953,9.020,57.441,80.215


In [3]:
data = grunfeld.load_pandas().data
data.year = data.year.astype(np.int64)
data

Unnamed: 0,invest,value,capital,firm,year
0,317.600,3078.500,2.800,General Motors,1935
1,391.800,4661.700,52.600,General Motors,1936
2,410.600,5387.100,156.900,General Motors,1937
3,257.700,2792.200,209.200,General Motors,1938
4,330.800,4313.200,203.400,General Motors,1939
...,...,...,...,...,...
215,4.770,36.494,75.847,American Steel,1950
216,6.532,46.082,77.367,American Steel,1951
217,7.329,57.616,78.631,American Steel,1952
218,9.020,57.441,80.215,American Steel,1953


In [5]:
md = pd.read_parquet("market_data.parquet")
md.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,price,mktcap,liquidity,sector
date,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-05-06,1,461.0,1235952000.0,129080.0,
1999-05-07,1,455.0,1219866000.0,4550.0,
1999-05-10,1,455.0,1219866000.0,910.0,
1999-05-11,1,460.0,1233271000.0,4600.0,
1999-05-12,1,460.0,1233271000.0,460.0,


In [23]:
from config.experiment_config import ExperimentConfig

data_df = pd.read_csv(ExperimentConfig.PATH_OUTPUT / ExperimentConfig.DF_FILENAME)
data_df.head()

Unnamed: 0.1,Unnamed: 0,date,EDS,CBB,AAL,X,PCAR,SII,NUE,CINF,...,inflation_total_r,spread,tail,ts,low_risk,momentum,quality,size,value,^VIX
0,0,2004-03-22,-0.010262,,,-0.027732,-0.008479,,-0.026502,-0.005893,...,0.005008,1.227273,-0.009,-0.010733,0.01007,-0.00448,0.002255,-0.005556,0.00679,21.58
1,1,2004-03-23,-0.00311,,,0.002824,-0.01134,,0.023428,-0.008891,...,-0.00076,-0.734694,-0.001816,-0.002406,0.001321,0.001231,0.001307,0.00187,0.001374,20.67
2,2,2004-03-24,-0.00936,,,-0.029287,0.003385,,-0.036756,-0.000236,...,-0.001592,1.076923,-0.002224,0.009065,-0.002535,-0.001107,0.002863,-0.000396,-0.004048,19.809999
3,3,2004-03-25,0.002625,,,0.011024,0.025675,,0.02728,0.00425,...,-0.004365,1.111111,0.011348,0.006375,-0.014668,0.000857,0.001405,0.004555,-0.010705,17.879999
4,4,2004-03-26,0.005236,,,0.038738,-0.002558,,0.010101,-0.008229,...,-0.003839,-1.035088,-0.002204,-0.00384,-0.003715,0.004026,-0.002044,0.002799,-0.000894,17.33


In [17]:
data_df["date"] = pd.to_datetime(data_df["date"])
data_df = data_df.set_index("date")

In [10]:
import yfinance as yfin

data = yfin.download("^VIX", ExperimentConfig.TRAIN_START_DATE, ExperimentConfig.TEST_END_DATE)
data

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,^VIX,^VIX,^VIX,^VIX,^VIX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2000-02-08,21.250000,22.080000,21.170000,21.680000,0
2000-02-09,22.900000,22.900000,21.290001,21.540001,0
2000-02-10,23.070000,23.549999,22.750000,23.360001,0
2000-02-11,24.420000,24.799999,22.820000,22.940001,0
2000-02-14,24.379999,24.889999,24.270000,24.670000,0
...,...,...,...,...,...
2025-02-27,21.129999,21.469999,17.670000,18.250000,0
2025-02-28,19.629999,22.400000,19.049999,21.209999,0
2025-03-03,22.780001,24.309999,19.250000,19.830000,0
2025-03-04,23.510000,26.350000,21.709999,22.959999,0


In [18]:
data_df.merge(data["Close"], left_index=True, right_index=True)

Unnamed: 0,EDS,CBB,AAL,X,PCAR,SII,NUE,CINF,PG,CMG,...,inflation_total_r,spread,tail,ts,low_risk,momentum,quality,size,value,^VIX
2004-03-22,-0.010262,,,-0.027732,-0.008479,,-0.026502,-0.005893,-0.000577,,...,0.005008,1.227273,-0.009000,-0.010733,0.010070,-0.004480,0.002255,-0.005556,0.006790,21.580000
2004-03-23,-0.003110,,,0.002824,-0.011340,,0.023428,-0.008891,-0.005867,,...,-0.000760,-0.734694,-0.001816,-0.002406,0.001321,0.001231,0.001307,0.001870,0.001374,20.670000
2004-03-24,-0.009360,,,-0.029287,0.003385,,-0.036756,-0.000236,-0.003290,,...,-0.001592,1.076923,-0.002224,0.009065,-0.002535,-0.001107,0.002863,-0.000396,-0.004048,19.809999
2004-03-25,0.002625,,,0.011024,0.025675,,0.027280,0.004250,0.014755,,...,-0.004365,1.111111,0.011348,0.006375,-0.014668,0.000857,0.001405,0.004555,-0.010705,17.879999
2004-03-26,0.005236,,,0.038738,-0.002558,,0.010101,-0.008229,-0.005644,,...,-0.003839,-1.035088,-0.002204,-0.003840,-0.003715,0.004026,-0.002044,0.002799,-0.000894,17.330000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,,,,,0.004585,,0.007032,0.016585,0.004937,0.010718,...,0.000512,0.167912,0.009571,0.032535,-0.003275,0.001742,-0.000836,0.001441,-0.002167,14.270000
2024-12-26,,,,,0.002377,,0.010304,0.000275,0.007222,-0.012372,...,-0.000290,0.121800,-0.000379,0.000840,-0.003629,-0.000382,-0.002163,0.008920,-0.000932,14.730000
2024-12-27,,,,,-0.008157,,-0.011463,-0.008671,-0.003702,-0.011225,...,-0.000662,-0.273056,-0.010186,0.170322,0.007336,-0.004039,0.000551,-0.003385,0.005571,15.950000
2024-12-30,,,,,-0.009180,,-0.012108,-0.006317,-0.014393,-0.011352,...,-0.000241,0.171070,-0.009888,-0.013963,0.003767,0.000817,-0.000671,0.002048,0.005083,17.400000


In [19]:
# data_df.merge(data["Close"], left_index=True, right_index=True).to_csv(ExperimentConfig.PATH_OUTPUT / ExperimentConfig.DF_FILENAME)

In [21]:
data_df = data_df.rename(columns={"Unnamed: 0": "date"})
data_df

Unnamed: 0,date,EDS,CBB,AAL,X,PCAR,SII,NUE,CINF,PG,...,inflation_total_r,spread,tail,ts,low_risk,momentum,quality,size,value,^VIX
0,2004-03-22,-0.010262,,,-0.027732,-0.008479,,-0.026502,-0.005893,-0.000577,...,0.005008,1.227273,-0.009000,-0.010733,0.010070,-0.004480,0.002255,-0.005556,0.006790,21.580000
1,2004-03-23,-0.003110,,,0.002824,-0.011340,,0.023428,-0.008891,-0.005867,...,-0.000760,-0.734694,-0.001816,-0.002406,0.001321,0.001231,0.001307,0.001870,0.001374,20.670000
2,2004-03-24,-0.009360,,,-0.029287,0.003385,,-0.036756,-0.000236,-0.003290,...,-0.001592,1.076923,-0.002224,0.009065,-0.002535,-0.001107,0.002863,-0.000396,-0.004048,19.809999
3,2004-03-25,0.002625,,,0.011024,0.025675,,0.027280,0.004250,0.014755,...,-0.004365,1.111111,0.011348,0.006375,-0.014668,0.000857,0.001405,0.004555,-0.010705,17.879999
4,2004-03-26,0.005236,,,0.038738,-0.002558,,0.010101,-0.008229,-0.005644,...,-0.003839,-1.035088,-0.002204,-0.003840,-0.003715,0.004026,-0.002044,0.002799,-0.000894,17.330000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5226,2024-12-24,,,,,0.004585,,0.007032,0.016585,0.004937,...,0.000512,0.167912,0.009571,0.032535,-0.003275,0.001742,-0.000836,0.001441,-0.002167,14.270000
5227,2024-12-26,,,,,0.002377,,0.010304,0.000275,0.007222,...,-0.000290,0.121800,-0.000379,0.000840,-0.003629,-0.000382,-0.002163,0.008920,-0.000932,14.730000
5228,2024-12-27,,,,,-0.008157,,-0.011463,-0.008671,-0.003702,...,-0.000662,-0.273056,-0.010186,0.170322,0.007336,-0.004039,0.000551,-0.003385,0.005571,15.950000
5229,2024-12-30,,,,,-0.009180,,-0.012108,-0.006317,-0.014393,...,-0.000241,0.171070,-0.009888,-0.013963,0.003767,0.000817,-0.000671,0.002048,0.005083,17.400000


In [28]:
data_df.to_csv(ExperimentConfig.PATH_OUTPUT / ExperimentConfig.DF_FILENAME, index=False)

In [24]:
data_df["spx_vol"] = data_df["spx"] ** 2

In [26]:
data_df = data_df.drop("Unnamed: 0", axis=1)

In [27]:
data_df

Unnamed: 0,date,EDS,CBB,AAL,X,PCAR,SII,NUE,CINF,PG,...,spread,tail,ts,low_risk,momentum,quality,size,value,^VIX,spx_vol
0,2004-03-22,-0.010262,,,-0.027732,-0.008479,,-0.026502,-0.005893,-0.000577,...,1.227273,-0.009000,-0.010733,0.010070,-0.004480,0.002255,-0.005556,0.006790,21.580000,1.678974e-04
1,2004-03-23,-0.003110,,,0.002824,-0.011340,,0.023428,-0.008891,-0.005867,...,-0.734694,-0.001816,-0.002406,0.001321,0.001231,0.001307,0.001870,0.001374,20.670000,1.752228e-06
2,2004-03-24,-0.009360,,,-0.029287,0.003385,,-0.036756,-0.000236,-0.003290,...,1.076923,-0.002224,0.009065,-0.002535,-0.001107,0.002863,-0.000396,-0.004048,19.809999,5.735980e-06
3,2004-03-25,0.002625,,,0.011024,0.025675,,0.027280,0.004250,0.014755,...,1.111111,0.011348,0.006375,-0.014668,0.000857,0.001405,0.004555,-0.010705,17.879999,2.678248e-04
4,2004-03-26,0.005236,,,0.038738,-0.002558,,0.010101,-0.008229,-0.005644,...,-1.035088,-0.002204,-0.003840,-0.003715,0.004026,-0.002044,0.002799,-0.000894,17.330000,1.037875e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5226,2024-12-24,,,,,0.004585,,0.007032,0.016585,0.004937,...,0.167912,0.009571,0.032535,-0.003275,0.001742,-0.000836,0.001441,-0.002167,14.270000,1.219417e-04
5227,2024-12-26,,,,,0.002377,,0.010304,0.000275,0.007222,...,0.121800,-0.000379,0.000840,-0.003629,-0.000382,-0.002163,0.008920,-0.000932,14.730000,1.645328e-07
5228,2024-12-27,,,,,-0.008157,,-0.011463,-0.008671,-0.003702,...,-0.273056,-0.010186,0.170322,0.007336,-0.004039,0.000551,-0.003385,0.005571,15.950000,1.222293e-04
5229,2024-12-30,,,,,-0.009180,,-0.012108,-0.006317,-0.014393,...,0.171070,-0.009888,-0.013963,0.003767,0.000817,-0.000671,0.002048,0.005083,17.400000,1.145331e-04
