In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
energy_factors = pd.read_csv('../Data.csv')

In [3]:
energy_factors.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [4]:
# Split data into feature matrix (X) and target vector (y)
X = energy_factors.iloc[:, :-1].values
y = energy_factors.iloc[:, -1].values

In [5]:
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [6]:
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

In [7]:
# Split into Training and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
# create a scaler
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_trainS = sc_X.fit_transform(X_train)

sc_y = StandardScaler()
y_trainS = sc_y.fit_transform(y_train.reshape(-1,1))

In [9]:
y_trainS2 = y_trainS.reshape(-1,)

In [10]:
# Multiple linear regression
from sklearn.linear_model import LinearRegression
mlRegressor = LinearRegression()
mlRegressor.fit(X_train, y_train)

In [11]:
# Polynominal Linear regression
# Transform to polynomials
from sklearn.preprocessing import PolynomialFeatures
polynomial_converter = PolynomialFeatures(degree = 4)
# Deploy the Linear regression
plRegressor = LinearRegression()
plRegressor.fit(X_train, y_train)

In [12]:
# SVR
from sklearn.svm import SVR
svrRegressor = SVR()
svrRegressor.fit(X_train, y_train)

In [13]:
# Support vector regression
from sklearn.svm import SVR
svrRegressorS = SVR()
svrRegressorS.fit(X_trainS, y_trainS2)

In [14]:
# Decision tree
from sklearn.tree import DecisionTreeRegressor
dtRegressor = DecisionTreeRegressor()
dtRegressor.fit(X_train, y_train)

In [15]:
# Random tree
from sklearn.ensemble import RandomForestRegressor
rfRegressor = RandomForestRegressor(n_estimators=10)
rfRegressor.fit(X_train, y_train)

In [16]:
mlRegressor.predict(X_test)

array([431.42761597, 458.56124622, 462.75264705, ..., 469.51835895,
       442.41759454, 461.88279939])

In [17]:
plRegressor.predict(X_test)

array([431.42761597, 458.56124622, 462.75264705, ..., 469.51835895,
       442.41759454, 461.88279939])

In [18]:
svrRegressor.predict(X_test)

array([445.29929921, 455.02475403, 457.85022498, ..., 457.13242754,
       450.71643694, 455.2789628 ])

In [19]:
sc_y.inverse_transform(svrRegressorS.predict(X_test).reshape(-1,1))

array([[457.54079861],
       [457.54079861],
       [457.54079861],
       ...,
       [457.54079861],
       [457.54079861],
       [457.54079861]])

In [20]:
dtRegressor.predict(X_test)

array([431.55, 459.59, 460.06, ..., 471.46, 437.09, 462.74])

In [21]:
rfRegressor.predict(X_test)

array([434.49 , 457.712, 462.572, ..., 472.194, 438.884, 460.987])

In [22]:
X.shape

(9568, 4)

In [23]:
y.shape

(9568,)

In [24]:
# performance metrics
from sklearn.metrics import r2_score
print("R^2 for multiple linear regression is", r2_score(y_test, mlRegressor.predict(X_test)))
print("R^2 for polynomial linear regression is", r2_score(y_test, plRegressor.predict(X_test)))
print("R^2 for support vector regression is", r2_score(y_test, svrRegressor.predict(X_test)))
print("R^2 for Scaled support vector regression is", r2_score(y_test, sc_y.inverse_transform(svrRegressorS.predict(sc_X.transform(X_test)).reshape(-1,1))))
print("R^2 for decision tree regression is", r2_score(y_test, dtRegressor.predict(X_test)))
print("R^2 for random forest regression is", r2_score(y_test, rfRegressor.predict(X_test)))

R^2 for multiple linear regression is 0.9325315554761303
R^2 for polynomial linear regression is 0.9325315554761303
R^2 for support vector regression is 0.38182110451293594
R^2 for Scaled support vector regression is 0.948078404998626
R^2 for decision tree regression is 0.9217437799253096
R^2 for random forest regression is 0.9616195614180062


In [None]:
# Display decison tree
from sklearn.tree import plot_tree
plt.figure(figsize=(150,150))
plot_tree(dtRegressor, feature_names=['AT', 'V', 'AP', 'RH'], filled=True)
plt.show()