In [96]:
# Pandas: To manipulate DataFrames
import pandas as pd
# Seaborn & Matplotlib: To plot scatter plots
import seaborn as sns
import matplotlib.pyplot as plt
# Statsmodels: To estimate our regression model
import statsmodels.api as sm
# Numpy: For mathematical computations
import numpy as np

In [97]:
sheet = ["Y variable", "x1-market", "x2-value", "x3-small"]
sets = []

data_dict = {}
for title in sheet:
    data_dict[title] = pd.DataFrame(pd.read_excel("TP2-Dataset.xlsx", skiprows = range(6), sheet_name=title).iloc[:]['PX_LAST'])
    data_dict[title][title] = np.log(data_dict[title]['PX_LAST']/data_dict[title]['PX_LAST'].shift(1))
    del data_dict[title]['PX_LAST']
    data_dict[title] = data_dict[title].dropna()
    sets += [data_dict[title]]

df = pd.concat(sets, axis = 1)


In [98]:
name = ['Tech', 'Market', 'Value', 'Size']

ren = {sheet[i]: name for i, name in enumerate(name)}
df = df.rename(columns=ren)

In [99]:
with open('latex/descriptive_stats.txt', 'w') as txt_file:
    txt_file.write(df.describe().to_latex())

df.describe()


Unnamed: 0,Tech,Market,Value,Size
count,517.0,517.0,517.0,517.0
mean,-0.003133,-0.000783,-0.000799,-0.000167
std,0.03186,0.019804,0.022181,0.02759
min,-0.151192,-0.09093,-0.095603,-0.134211
25%,-0.021413,-0.011619,-0.012017,-0.014185
50%,-0.004975,-0.002268,-0.001781,-0.00237
75%,0.01359,0.007087,0.007886,0.01289
max,0.140315,0.165685,0.211631,0.264327


In [100]:
# Define dependant (y) and independent (x) variables
y = df[name[0]]
x = df[name[1:]]

# Add a constant to the predictor variable (intercept)
x = sm.add_constant(x)

# Fit the OLS model to the sample data
model_ols = sm.OLS(y, x).fit()

with open('latex/model_summary.txt', 'w') as txt_file:
    for table in model_ols.summary().tables:
        txt_file.write(table.as_latex_tabular())

# View model summary (intercept, coefficient, r-squared)
model_ols.summary()

0,1,2,3
Dep. Variable:,Tech,R-squared:,0.499
Model:,OLS,Adj. R-squared:,0.496
Method:,Least Squares,F-statistic:,170.1
Date:,"Thu, 06 Oct 2022",Prob (F-statistic):,1.56e-76
Time:,14:53:06,Log-Likelihood:,1227.2
No. Observations:,517,AIC:,-2446.0
Df Residuals:,513,BIC:,-2429.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0022,0.001,-2.247,0.025,-0.004,-0.000
Market,2.8998,0.166,17.423,0.000,2.573,3.227
Value,-1.6893,0.131,-12.932,0.000,-1.946,-1.433
Size,-0.1698,0.072,-2.352,0.019,-0.312,-0.028

0,1,2,3
Omnibus:,12.445,Durbin-Watson:,2.194
Prob(Omnibus):,0.002,Jarque-Bera (JB):,23.633
Skew:,-0.007,Prob(JB):,7.38e-06
Kurtosis:,4.047,Cond. No.,204.0
