In [24]:
import pickle
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [43]:
# Load the dictionary from the file
with open('../Intermediate Data/belongings_by_year.pkl', 'rb') as f:
    belongings_by_year = pickle.load(f)

# Load in Financial Data
financial_data = pd.read_csv("../Financial Data/CompustatCRSP_Annual.csv")
financial_data.rename(columns={"fyear" : "year"}, inplace=True)
financial_data = financial_data[['year','bkvlps', 'epspx', 'cik']]


In [55]:
triadic_data = []
for year, companies in belongings_by_year.items():
    for cik, motifs in companies.items():
        motifs['year'] = float(year)
        motifs['cik'] = cik
        triadic_data.append(motifs)

triadic_df = pd.DataFrame(triadic_data)

# Merge the financial data with the triadic data
merged_df = pd.merge(financial_data, triadic_df, on=['cik', 'year'])


# Create dummy variables for years and motifs
merged_df = pd.get_dummies(merged_df, columns=["year"], dtype=float, drop_first=True)

merged_df.to_csv("../Intermediate Data/financial_triad_merged.csv")

In [58]:

# Define the independent variables
X = merged_df.drop(columns=['bkvlps', 'epspx', 'cik'])
X = sm.add_constant(X)  # Add a constant term to the model

# Dependent variable: bkvlps
y_bkvlps = merged_df['bkvlps']

# Run the regression for bkvlps
model_bkvlps = sm.OLS(y_bkvlps, X, missing='drop').fit()
print(model_bkvlps.summary())

# Dependent variable: epspx
y_epspx = merged_df['epspx']

# Run the regression for epspx
model_epspx = sm.OLS(y_epspx, X, missing='drop').fit()
print(model_epspx.summary())

                            OLS Regression Results                            
Dep. Variable:                 bkvlps   R-squared:                       0.028
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     6.779
Date:                Sun, 26 May 2024   Prob (F-statistic):           1.54e-20
Time:                        21:13:46   Log-Likelihood:                -22202.
No. Observations:                5158   AIC:                         4.445e+04
Df Residuals:                    5135   BIC:                         4.460e+04
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          12.1867      1.304      9.344      