In [12]:
import csv
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm


def import_csv(name, delimiter):
    with open(name, 'r') as file:
        data_reader = csv.reader(file, delimiter=delimiter)

        # importing the header line separately
        # and printing it to screen
        header = next(data_reader)
        print("\n\nImporting data with target:\n" + header[0])
        print("and inputs:\n" + str(header[1:]))

        # creating an empty list to store each row of data
        data = []

        for row in data_reader:
            # for each row of data
            # converting each element (from string) to float type
            row_of_floats = list(map(float, row))

            # now storing in our data list
            data.append(row_of_floats)

        print("There are %d entries." % len(data))

        # converting the data (list object) into a numpy array
        data_as_array = np.array(data)

        n = data_as_array.shape[1]
        # deleting the last column (quality) from inputs
        inputs = np.delete(data_as_array, 0, 1)
        # assigning it as targets instead
        targets = data_as_array[:, 0]

        # returning this array to caller
        return header, inputs, targets

    
class color:
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'


# importing using csv reader and storing as numpy array
header, inputs, targets = import_csv('h2_data_no_interaction_without_log.csv',',')


print(color.BOLD + "\nLinear regression without interaction term:\n" + color.END)
# fitting regression model
X = inputs
y = targets
X = sm.add_constant(X)

results = sm.OLS(y, X).fit()
R = results.rsquared
print(results.summary())
print("\n")
print("R squared: " + str(R) + "\n")


# importing using csv reader and storing as numpy array
header, inputs, targets = import_csv('h2_data_with_interaction_without_log.csv',',')


print(color.BOLD + "\nLinear regression with interaction term:\n" + color.END)
# fitting regression model
X = inputs
y = targets
X = sm.add_constant(X)

results = sm.OLS(y, X).fit()
R = results.rsquared
print(results.summary())
print("\n")
print("R squared: " + str(R) + "\n")
print("\n")





Importing data with target:
﻿RETit
and inputs:
['After', 'Eit', 'Logarithm', 'ROA', 'Growth']
There are 90 entries.
[1m
Linear regression without interaction term:
[0m
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.118
Model:                            OLS   Adj. R-squared:                  0.065
Method:                 Least Squares   F-statistic:                     2.241
Date:                Sat, 02 Jun 2018   Prob (F-statistic):             0.0576
Time:                        11:09:56   Log-Likelihood:                -111.42
No. Observations:                  90   AIC:                             234.8
Df Residuals:                      84   BIC:                             249.8
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err      