# **Training many Regression models on a dataset and evaluate each model on it**

In [90]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

dataset = pd.read_csv('/content/Data.csv')
print(dataset.head())

      AT      V       AP     RH      PE
0  14.96  41.76  1024.07  73.17  463.26
1  25.18  62.96  1020.04  59.08  444.37
2   5.11  39.40  1012.16  92.14  488.56
3  20.86  57.32  1010.24  76.64  446.48
4  10.82  37.50  1009.23  96.62  473.90


In [66]:
print(dataset.isna().sum())

AT    0
V     0
AP    0
RH    0
PE    0
dtype: int64


In [67]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

## **Multiple Linear Regression**

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [69]:
from sklearn.linear_model import LinearRegression
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, Y_train)

In [70]:
Y_predict = linear_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((Y_predict.reshape(len(Y_predict), 1), Y_test.reshape(len(Y_test), 1)), 1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


### **Evalutation the Model performance**

In [71]:
from sklearn.metrics import r2_score
r2_score(Y_test, Y_predict)

0.9325315554761303

## **Polynomial Regression**

In [72]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

poly = PolynomialFeatures(degree=4)
X_poly = poly.fit_transform(X_train)

poly_regressor = LinearRegression()
poly_regressor.fit(X_poly, Y_train)

In [73]:
Y_predict = poly_regressor.predict(poly.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((Y_predict.reshape(len(Y_predict), 1), Y_test.reshape(len(Y_test), 1)), 1))

[[434.16 431.23]
 [458.26 460.01]
 [460.72 461.14]
 ...
 [469.49 473.26]
 [438.53 438.  ]
 [461.62 463.28]]


### **Evalutation the Model performance**

In [74]:
from sklearn.metrics import r2_score
r2_score(Y_test, Y_predict)

0.9455261542316076

## **SVR**

HERE WE HAVE TO APPLY FEATURE SCALING

In [75]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

In [76]:
Y = Y.reshape(len(Y), 1)

In [77]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [78]:
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
X_train = sc_x.fit_transform(X_train)

sc_y = StandardScaler()
Y_train = sc_y.fit_transform(Y_train)

In [79]:
from sklearn.svm import SVR
svr_regressor = SVR(kernel='rbf')
svr_regressor.fit(X_train, Y_train)

  y = column_or_1d(y, warn=True)


In [80]:
Y_predict = sc_y.inverse_transform(svr_regressor.predict(sc_x.transform(X_test)).reshape(-1, 1))
np.set_printoptions(precision=2)
print(np.concatenate((Y_predict.reshape(len(Y_predict), 1), Y_test.reshape(len(Y_test), 1)), 1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


### **Evalutation the Model performance**

In [81]:
from sklearn.metrics import r2_score
r2_score(Y_test, Y_predict)

0.9480784049986258

## **Decision Tree**




In [82]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

In [83]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [84]:
from sklearn.tree import DecisionTreeRegressor
decision_tree_regressor = DecisionTreeRegressor(random_state = 0)
decision_tree_regressor.fit(X_train, Y_train)

In [85]:
Y_predict = decision_tree_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((Y_predict.reshape(len(Y_predict),1),Y_test.reshape(len(Y_test),1)), 1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


### **Evalutation the Model performance**

In [86]:
from sklearn.metrics import r2_score
r2_score(Y_test, Y_predict)

0.922905874177941

## **Random_Forest (winner ðŸŒŸ)**

In [87]:
from sklearn.ensemble import RandomForestRegressor
random_forest_regressor = RandomForestRegressor(n_estimators = 10 , random_state = 0)
random_forest_regressor.fit(X_train, Y_train)

In [88]:
Y_predict = random_forest_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((Y_predict.reshape(len(Y_predict),1),Y_test.reshape(len(Y_test),1)), 1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


### **Evalutation the Model performance**

In [89]:
from sklearn.metrics import r2_score
r2_score(Y_test, Y_predict)

0.9615908334363876