In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [4]:
x = np.array([5,15,25,35,45,55]).reshape((-1,1))
y = np.array([5,20,14,32,22,38])

In [5]:
print(x)

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]


In [6]:
print(y)

[ 5 20 14 32 22 38]


In [11]:
#creating an instance of class linear regression, which will represent the regression
model = LinearRegression()

In [14]:
#we will first fit our values into our model
model = LinearRegression().fit(x, y)

In [15]:
#now we will check whether our model work satisfactorily or not
#for this we will obtain the coefficient of determination(𝑅²) with .score. 
#𝑅² tells you which amount of variation in y can be explained by the dependence on x using the particular regression model
#model is better fit when it has larger 𝑅² value
r_sq = model.score(x,y)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.7158756137479542


In [16]:
#attributes of model are .intercept_, which represent the coefficient, b0 and .coef_ represent b1
print('intercept:',model.intercept_)
print('slope:', model.coef_)

intercept: 5.633333333333329
slope: [0.54]


In [22]:
#now our model is ready to predict, we will use .predict() for that
y_pred = model.predict(x)
print('predicted response:', y_pred , sep='\n')
print('\n')
#the above out is same as when we multiply the model coefficient with x and add the intercept
y_pred1 = model.intercept_ + model.coef_*x
print('predicted response:', y_pred1 , sep='\n')
print('\n')
#the above out put is in two dimensional, if we want it same as the first one we can either use x.reshape(-1),x.flatten() or x.ravel
y_pred2 = model.intercept_ + model.coef_*x.reshape(-1)
print('predicted response:', y_pred2 , sep='\n')



predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


predicted response:
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [21]:
#####Multiple Regression######

In [23]:
a = [[0,1], [5,1],[15,2],[25,5],[35,11],[45,15],[55,34],[60,35]]
b = [4,5,20,14,32,22,38,43]
a,b = np.array(a), np.array(b)

In [24]:
print(a)
print(b)

[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]
[ 4  5 20 14 32 22 38 43]


In [25]:
model_mul = LinearRegression().fit(a,b)

In [27]:
r_square = model_mul.score(a,b)
print('coefficient of determination:', r_square)

coefficient of determination: 0.8615939258756776


In [28]:
print('intercept:', model_mul.intercept_)
print('coefficient:',model_mul.coef_)

intercept: 5.52257927519819
coefficient: [0.44706965 0.25502548]


In [32]:
y_prediction = model_mul.predict(a)
print('predicted response:',y_prediction,sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [35]:
y_prediction_ = model_mul.intercept_ + np.sum(model_mul.coef_*a,axis = 1)
print('predicted response:', y_prediction_, sep = '\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [None]:
####Linear Regression with statsmodel######

In [37]:
import statsmodels.api as sm

In [38]:
#we will use the same inputs a and b for this
#The stat model does not take the b0 intercept by default, for this we have to add a column of ones to our input
a = sm.add_constant(a)

In [39]:
print(a)

[[ 1.  0.  1.]
 [ 1.  5.  1.]
 [ 1. 15.  2.]
 [ 1. 25.  5.]
 [ 1. 35. 11.]
 [ 1. 45. 15.]
 [ 1. 55. 34.]
 [ 1. 60. 35.]]


In [40]:
#The regression model based on ordinary least squares is an instance of the class statsmodels.regression.linear_model.OLS
#this is how we can obtain one
model_OLS = sm.OLS(b,a)
##Notice that the first argument is the output, followed with the input.

In [42]:
#By callung .fit() you obtain the variable results, which is an instance of the 
#class statsmodels.regression.linear_model.RegressionResultsWrapper. This object holds a lot of information about hte regression model

results = model_OLS.fit()


In [43]:
#We will use .summary to get the table with the results of linear regression
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Sun, 03 May 2020   Prob (F-statistic):            0.00713
Time:                        11:20:33   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2



In [44]:
print('coefficient of determination:', results.rsquared)
print('adjusted coefficient of determination:',results.rsquared_adj)
#value of b0,b1,b2
print('regression coefficients:',results.params)

coefficient of determination: 0.8615939258756776
adjusted coefficient of determination: 0.8062314962259487
regression coefficients: [5.52257928 0.44706965 0.25502548]


In [47]:
#We can get predicted response on the input values used for creating the model using .fiitedvalues or .predict()
print('predicted response:',results.fittedvalues,sep='\n')
print('predicted response:',results.predict(a),sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
