# Multiple Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
df = pd.read_csv(r"train_energy_data.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [3]:
print(X)

[['Residential' 7063 76 10 29.84 'Weekday']
 ['Commercial' 44372 66 45 16.72 'Weekday']
 ['Industrial' 19255 37 17 14.3 'Weekend']
 ...
 ['Commercial' 39562 88 20 32.18 'Weekday']
 ['Residential' 8348 67 37 16.48 'Weekend']
 ['Commercial' 15813 57 11 31.4 'Weekend']]


In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

#Let's create object for encoding

ct  = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0,5])], remainder= 'passthrough')
X = np.array(ct.fit_transform(X))

In [6]:
print(X)

[[0.0 0.0 1.0 ... 76 10 29.84]
 [1.0 0.0 0.0 ... 66 45 16.72]
 [0.0 1.0 0.0 ... 37 17 14.3]
 ...
 [1.0 0.0 0.0 ... 88 20 32.18]
 [0.0 0.0 1.0 ... 67 37 16.48]
 [1.0 0.0 0.0 ... 57 11 31.4]]


## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [8]:
print(X_train)

[[0.0 0.0 1.0 ... 14 23 26.61]
 [1.0 0.0 0.0 ... 99 11 30.78]
 [0.0 0.0 1.0 ... 50 23 23.8]
 ...
 [0.0 0.0 1.0 ... 66 8 10.16]
 [0.0 0.0 1.0 ... 98 36 29.52]
 [0.0 0.0 1.0 ... 12 44 10.73]]


## Training the Multiple Linear Regression model on the Training set

In [9]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [10]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[2883.65 2883.63]
 [5147.1  5147.11]
 [5332.1  5332.1 ]
 [3027.8  3027.78]
 [4254.6  4254.59]
 [4983.   4983.  ]
 [5754.95 5754.96]
 [4693.8  4693.8 ]
 [5101.15 5101.13]
 [5268.15 5268.16]
 [3776.2  3776.21]
 [3599.   3598.99]
 [2921.   2921.01]
 [3859.6  3859.59]
 [5744.85 5744.86]
 [5505.95 5505.97]
 [3409.   3408.98]
 [3639.75 3639.76]
 [5031.15 5031.14]
 [4745.45 4745.47]
 [4700.4  4700.42]
 [4650.7  4650.7 ]
 [3631.1  3631.08]
 [4969.8  4969.79]
 [5088.2  5088.21]
 [3314.9  3314.91]
 [4430.5  4430.49]
 [2656.25 2656.26]
 [5877.2  5877.19]
 [5745.   5744.99]
 [5390.75 5390.76]
 [4417.35 4417.34]
 [4498.05 4498.07]
 [3818.7  3818.71]
 [2619.35 2619.36]
 [3377.4  3377.38]
 [2815.35 2815.34]
 [5961.5  5961.49]
 [3525.65 3525.66]
 [3093.7  3093.71]
 [4034.65 4034.64]
 [3546.35 3546.34]
 [3937.6  3937.59]
 [3952.6  3952.58]
 [2755.75 2755.75]
 [3630.25 3630.24]
 [3653.25 3653.25]
 [3435.2  3435.18]
 [3109.3  3109.29]
 [5290.05 5290.06]
 [5590.7  5590.7 ]
 [5080.4  5080.4 ]
 [3750.35 37

## Evaluating the Model Performance

In [11]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9999999998119723