# OLS Regression

In [21]:
import pandas as pd
import statsmodels.api as sm

# Read CSV file into a Pandas DataFrame
df = pd.read_csv('/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/ols_data.csv')

# Summary statistics table
summary_table = df.describe().transpose()[['mean', 'std', 'min', 'max', 'count']]

# Print summary table
print("Summary Statistics")
print(summary_table)
print("")

# Calculate correlation matrix
corr_matrix = df[['car', 'ss', 'length']].corr()

# Print correlation matrix
print("Correlation Matrix")
print(corr_matrix)
print("")

# Define dependent and independent variables
y = df['car']
X = df[['ss', 'length']]

# Add constant to independent variables
X = sm.add_constant(X)

# Fit OLS regression model
model = sm.OLS(y, X).fit()

# Print model summary
print("Regression Results")
print(model.summary())
print("")

# Estimation of the regression equation table
coef_table = pd.DataFrame(model.params, columns=['Coefficient'])
coef_table['Std. Error'] = model.bse
coef_table['t-value'] = model.tvalues
coef_table['p-value'] = model.pvalues
coef_table.index.name = 'Variable'

# Add r, r-squared, and F-statistic to the table
coef_table.loc['R-squared'] = [model.rsquared, '-', '-', '-']
coef_table.loc['F-statistic'] = [model.fvalue, '-', '-', '-']
coef_table.loc['Prob (F-statistic)'] = [model.f_pvalue, '-', '-', '-']

print("Estimation of the Regression Equation")
print(coef_table)


Summary Statistics
              mean         std        min         max  count
car      -0.002063    0.035430  -0.106627    0.119565  134.0
ss        0.162771    0.301825  -0.587302    0.923077  134.0
length  157.835821  117.758271  20.000000  608.000000  134.0

Correlation Matrix
             car        ss    length
car     1.000000 -0.051804  0.047225
ss     -0.051804  1.000000 -0.494936
length  0.047225 -0.494936  1.000000

Regression Results
                            OLS Regression Results                            
Dep. Variable:                    car   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.012
Method:                 Least Squares   F-statistic:                    0.2169
Date:                Mon, 15 May 2023   Prob (F-statistic):              0.805
Time:                        14:57:31   Log-Likelihood:                 258.17
No. Observations:                 134   AIC:                            -51

In [22]:
import pandas as pd
import statsmodels.api as sm
import openpyxl

# Read CSV file into a Pandas DataFrame
df = pd.read_csv('/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/ols_data.csv')

# Summary statistics table
summary_table = df.describe().transpose()[['mean', 'std', 'min', 'max', 'count']]

# Calculate correlation matrix
corr_matrix = df[['car', 'ss', 'length']].corr()

# Define dependent and independent variables
y = df['car']
X = df[['ss', 'length']]

# Add constant to independent variables
X = sm.add_constant(X)

# Fit OLS regression model
model = sm.OLS(y, X).fit()

# Estimation of the regression equation table
coef_table = pd.DataFrame(model.params, columns=['Coefficient'])
coef_table['Std. Error'] = model.bse
coef_table['t-value'] = model.tvalues
coef_table['p-value'] = model.pvalues
coef_table.index.name = 'Variable'

# Add r, r-squared, and F-statistic to the table
coef_table.loc['R-squared'] = [model.rsquared, '-', '-', '-']
coef_table.loc['F-statistic'] = [model.fvalue, '-', '-', '-']
coef_table.loc['Prob (F-statistic)'] = [model.f_pvalue, '-', '-', '-']

# Specify the file path and name
output_file = "/Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/olsresults.xlsx"

# Create an Excel writer using openpyxl
writer = pd.ExcelWriter(output_file, engine='openpyxl')

# Write the results to different sheets in the Excel file
summary_table.to_excel(writer, sheet_name='Summary Statistics')
corr_matrix.to_excel(writer, sheet_name='Correlation Matrix')

# Convert the table to a DataFrame and write it to the Excel file
regression_results_df = pd.DataFrame(model.summary().tables[1].data[1:], columns=model.summary().tables[1].data[0])
regression_results_df.to_excel(writer, sheet_name='Regression Results', index=False, header=False)

coef_table.to_excel(writer, sheet_name='Estimation of the Regression Equation')

# Save the Excel file
writer.save()

print("Results saved to:", output_file)


Results saved to: /Users/jomarjordas/Documents/MSFIN299/MSFIN299-Research/_data/olsresults.xlsx


  writer.save()
