In [1]:
#Create Dataset
import numpy as np

#Generate mean and covariance for variables
mean = [4.5, 6.9]  #means for iv and dv
cov = [[1, -0.321], [-0.321, 1]] #covariance matrix.

#Create size of dataset
nsize = 60

##Simulate data (Independent Variable and Dependent variable variable)
x, y = np.random.multivariate_normal(mean, cov, size = nsize).T

In [3]:
class Regression():
    """ A useful tool to run regression analyses on data 
        will provide the slope and intercept of a line along with 
        a list of residuals and the r squared of the model """
    
    def __init__(self, x, y):
        self.nsize = len(x)
        
        #Compute Variables for b0 and b1
        self.x = x
        self.y = y
        self.sumx = sum(x)
        self.sumy = sum(y)
        self.x2 = self.x**2
        self.sumx_sqr = sum(self.x2)
        self.xy = x*y
        self.sumxy = sum(self.xy)
        
        #For R Squared 
        self.xsqr = self.x**2
        self.sumx_sqr = sum(self.xsqr)
        self.ysqr = self.y**2
        self.sumy_sqr = sum(self.ysqr)
        self.xy = self.x*self.y
        self.sum_xy = sum(self.xy)

    def intercept(self):
        """ Return the slope of the regression line. """
        self.b_0 = ((self.sumy*(self.sumx_sqr))-(self.sumx*self.sumxy))/((self.nsize*(self.sumx_sqr))-(self.sumx)**2)
        return self.b_0

    def slope(self):
        self.b_1 = ((self.nsize*self.sumxy)-(self.sumx*self.sumy))/((self.nsize*self.sumx_sqr)-((self.sumx)**2))
        return self.b_1

    def residuals(self):
        """ Outputs a list of residuals from the OLS regression model"""
        self.predicted_y = [self.b_0 + (self.b_1*i) for i in self.x]  #regression model
        self.residuals = [(self.y[i] - self.predicted_y[i]) for i in range(min(len(self.y), len(self.predicted_y)))]
        #self.residuals = list(self.residuals) #Change residuals to list - Unnecessary 
        return self.residuals
    
    def rsquared(self):
        self.numerator = (self.nsize*(self.sum_xy) - (self.sumx*self.sumy))
        self.denominator_squared = ((self.nsize*self.sumx_sqr - (self.sumx**2)) * (self.nsize*self.sumy_sqr - (self.sumy**2)))
        self.denominator = ((self.denominator_squared)**(1/2))
        self.r = (self.numerator/self.denominator)
        self.rsquared = (self.r)**2
        return self.rsquared
        

In [39]:
reg = Regression(x,y)
intercept = reg.intercept()
slope = reg.slope()
residuals = reg.residuals()
rsquared = reg.rsquared()
#print(slope)
#print(intercept)
#print(residuals)
#print(rsquared)

-0.2847659131439026
8.15546341540138


In [18]:
#Correlation class with coef method
class Correlation(Regression):
    pass #bring in all methods from regression
     
    def coeff(self): #create coeff from r_squared method from Regression
        self.r = (rsquared)**(1/2)
        return self.r


In [30]:
coeff_r = Correlation2(x,y)
coeff_r.coeff()

0.2415554366983289

In [33]:
#Check of Correlation Using numpy
import numpy as np
np.corrcoef(x,y)

array([[ 1.        , -0.24155544],
       [-0.24155544,  1.        ]])

In [34]:
#Check of regression using stats models
import statsmodels.api as sm

ols_model = sm.OLS(y, x).fit()
predictions = ols_model.predict(x)
residuals = [(y[i] - predictions[i]) for i in range(min(len(y), len(predictions)))]
print(residuals)

[1.1389256294958336, -0.6890795180632914, -2.9457126295226574, 0.5146902211649351, -0.23653773259128386, 0.6470091990840388, 2.7470095615183325, 4.490518130886199, 1.0714180558307973, 0.7009141441075979, -0.5441733977661158, -2.9209088740848, 0.6304981911283347, -1.3401855910081943, 3.957043027010746, 1.9897184233781005, 0.2893085623625593, -2.910567282040791, -3.124947861506861, -2.008619810222056, 1.0479828488952325, 2.711784224319792, -0.31897470887356594, 1.2138510994382692, 1.608154707649299, 0.7048355674139435, 1.6944216122932225, 0.8397385906877934, 1.7063327909128443, 1.3317830980179437, 0.8644581955913297, 0.5210128000848231, -0.19881859700705107, 1.37232240957694, -0.49228455443992747, 0.1644525970352806, 1.1453495336641595, 0.24984996692067352, 0.7949585358728726, -1.3815849247664929, 1.113469262612787, -1.665366009578058, -2.43230124348856, 0.7085177896017827, 2.795835620984315, 0.2855073879800951, -1.838251763724407, 0.8879058702421254, 3.3554149823943673, 0.82138610325614

In [28]:
print_model = ols_model.summary()
print(print_model)

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.927
Model:                            OLS   Adj. R-squared (uncentered):              0.926
Method:                 Least Squares   F-statistic:                              747.9
Date:                Tue, 01 Oct 2019   Prob (F-statistic):                    3.31e-35
Time:                        20:04:55   Log-Likelihood:                         -123.27
No. Observations:                  60   AIC:                                      248.5
Df Residuals:                      59   BIC:                                      250.6
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------