# Multiple Linear Regression

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import f_regression
import seaborn as sns
sns.set()

## Load the data

In [4]:
df = pd.read_csv('h2.csv')
df.head()

Unnamed: 0,SquareFeet,YearBuilt,Price
0,2126,1969,215355.2836
1,2459,1980,195014.2216
2,2044,1957,184992.3213
3,2123,1956,190773.1486
4,1130,1962,143050.2018


## Create the multiple linear regression

In [5]:
x = df[['SquareFeet', 'YearBuilt']]
y = df['Price']

In [6]:
reg = LinearRegression()

In [7]:
reg.fit(x, y)

## Coefficients

In [8]:
reg.coef_

array([ 99.21859355, -13.84082308])

## Intercept

In [9]:
reg.intercept_

53726.72253961227

## Calculating the R-Squared

In [10]:
reg.score(x, y)

0.5640954974416654

## Adjusted R-Squared

In [11]:
r2 = reg.score(x, y)
n = x.shape[0]
p = x.shape[1]
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
adjusted_r2

0.5640692981086591

# Feature Selection

In [13]:
f_regression(x, y)

(array([4.30606229e+04, 1.60456722e+00]), array([0.        , 0.20526603]))

In [14]:
p_values = f_regression(x, y)[1]
p_values

array([0.        , 0.20526603])

In [15]:
p_values.round(3)

array([0.   , 0.205])

## Creating a summary table

In [16]:
reg_summary = pd.DataFrame(data = x.columns.values, columns = ['Features'])
reg_summary['Coefficients'] = reg.coef_
reg_summary['P-Values'] = p_values.round(3)
reg_summary

Unnamed: 0,Features,Coefficients,P-Values
0,SquareFeet,99.218594,0.0
1,YearBuilt,-13.840823,0.205
