In [65]:
import pickle
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [66]:
# Load the dictionary from the file
with open('../Intermediate Data/belongings_by_year.pkl', 'rb') as f:
    belongings_by_year = pickle.load(f)

# Load in Financial Data
financial_data = pd.read_csv("../Financial Data/CompustatCRSP_Annual.csv")
financial_data.rename(columns={"fyear" : "year"}, inplace=True)
financial_data = financial_data[['year','bkvlps', 'epspx', 'cik']]


In [67]:
triadic_data = []
for year, companies in belongings_by_year.items():
    for cik, motifs in companies.items():
        motifs['year'] = float(year)
        motifs['cik'] = cik
        triadic_data.append(motifs)

triadic_df = pd.DataFrame(triadic_data)

# Merge the financial data with the triadic data
merged_df = pd.merge(financial_data, triadic_df, on=['cik', 'year'])

TRIAD_NAMES = ["003", "012", "102", "021D", "021U", "021C", "111D", "111U", "030T", "030C", "201", "120D", "120U", "120C", "210", "300"]
rename_dict = {name: f"motif_{name}" for name in TRIAD_NAMES}
merged_df.rename(columns=rename_dict, inplace=True)
merged_df.to_csv("../Intermediate Data/no_dummies_financial_triad_merged.csv")

# Create dummy variables for years and motifs
merged_df = pd.get_dummies(merged_df, columns=["year"], dtype=float, drop_first=True)

merged_df.to_csv("../Intermediate Data/financial_triad_merged.csv")

In [68]:

# Define the independent variables
X = merged_df.drop(columns=['bkvlps', 'epspx', 'cik'])
X = sm.add_constant(X)  # Add a constant term to the model

# Dependent variable: bkvlps
y_bkvlps = merged_df['bkvlps']

# Run the regression for bkvlps
model_bkvlps = sm.OLS(y_bkvlps, X, missing='drop').fit()
#print(model_bkvlps.summary())

# Dependent variable: epspx
y_epspx = merged_df['epspx']

# Run the regression for epspx
model_epspx = sm.OLS(y_epspx, X, missing='drop').fit()
#print(model_epspx.summary())

In [69]:
from docx import Document
from docx.shared import Pt

def regression_to_word(models, model_names, file_name):
    doc = Document()
    
    for model, name in zip(models, model_names):
        doc.add_heading(f'Regression Results for {name}', level=1)
        doc.add_paragraph(model.summary().as_text())
    
    doc.save(file_name)

# Assuming model_bkvlps and model_epspx are your regression models
models = [model_bkvlps, model_epspx]
model_names = ['bkvlps', 'epspx']

# Output the results to a Word document
regression_to_word(models, model_names, '../Outputs/regression_results.docx')