In [50]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [51]:
class DataLoader:
    def __init__(self):
        pass
    def load_data(self, file_name):
        self.data = pd.read_csv(file_name)
        self.X = self.data.iloc[:, :-1].values
        self.y = self.data.iloc[:, -1].values
        return self
    def prepare_data(self, apply_scaling = False):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state = 0)
        if(apply_scaling):
            self.sc_X = StandardScaler()
            self.sc_y = StandardScaler()
            self.X_train = self.sc_X.fit_transform(self.X_train)
            self.X_test = self.sc_X.transform(self.X_test)
            y_temp = self.y_train.reshape(len(self.y_train), 1)
            self.y_train = self.sc_y.fit_transform(y_temp).reshape(1, len(self.y_train))[0]
        return self
            

In [52]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
class ModelFitting:
    def __init__(self):
        np.set_printoptions(precision=2)
        
    def train_with_multi_linear_regression(self, data_loader):
        self.multi_regress = LinearRegression()
        self.multi_regress.fit(data_loader.X_train, data_loader.y_train)
        y_pred = self.multi_regress.predict(data_loader.X_test)
        print('Multi Regression: predict/test')
        print(np.concatenate((y_pred.reshape(len(y_pred), 1), data_loader.y_test.reshape(len(y_pred), 1)), 1))
        self.multi_regress_score = r2_score(data_loader.y_test, y_pred)
        return self.multi_regress_score
    def train_with_polynomial_regression(self, data_loader):
        poly_feat = PolynomialFeatures(degree=4)
        X_poly = poly_feat.fit_transform(data_loader.X_train)
        self.poly_regress = LinearRegression()
        self.poly_regress.fit(X_poly, data_loader.y_train)
        y_pred = self.poly_regress.predict(poly_feat.transform(data_loader.X_test))
        print('Polynomial Regression: predict/test')
        print(np.concatenate((y_pred.reshape(len(y_pred), 1), data_loader.y_test.reshape(len(y_pred), 1)), 1))
        self.poly_regress_score = r2_score(data_loader.y_test, y_pred)
        return self.poly_regress_score
    def train_with_support_vector_regression(self, data_loader):
        self.svr = SVR(kernel='rbf')
        self.svr.fit(data_loader.X_train, data_loader.y_train)
        y_pred = data_loader.sc_y.inverse_transform(self.svr.predict(data_loader.X_test))
        print('SVR: predict/test')
        print(np.concatenate((y_pred.reshape(len(y_pred), 1), data_loader.y_test.reshape(len(y_pred), 1)), 1))
        self.svr_score = r2_score(data_loader.y_test, y_pred)
        return self.svr_score
    def train_with_decision_tree(self, data_loader):
        self.decision_tree_regress = DecisionTreeRegressor()
        self.decision_tree_regress.fit(data_loader.X_train, data_loader.y_train)
        y_pred = self.decision_tree_regress.predict(data_loader.X_test)
        print('Decision Tree Regression: predict/test')
        print(np.concatenate((y_pred.reshape(len(y_pred), 1), data_loader.y_test.reshape(len(y_pred), 1)), 1))
        self.decision_tree_score = r2_score(data_loader.y_test, y_pred)
        return self.decision_tree_score
    def train_wih_random_forest(self, data_loader, num_of_trees = 100):
        self.random_forest_regress = RandomForestRegressor(n_estimators=num_of_trees)
        self.random_forest_regress.fit(data_loader.X_train, data_loader.y_train)
        y_pred = self.random_forest_regress.predict(data_loader.X_test)
        print('Random Forest Regression: predict/test')
        print(np.concatenate((y_pred.reshape(len(y_pred), 1), data_loader.y_test.reshape(len(y_pred), 1)), 1))
        self.random_forest_score = r2_score(data_loader.y_test, y_pred)
        return self.random_forest_score

In [53]:
data_loader = DataLoader()
data_loader.load_data('Data.csv').prepare_data()
scaled_data_loader = DataLoader()
scaled_data_loader.load_data('Data.csv').prepare_data(apply_scaling=True)

<__main__.DataLoader at 0x1e861422d90>

In [54]:
model_fitting = ModelFitting()
# Using multi linear regression
multi_regress_score = model_fitting.train_with_multi_linear_regression(data_loader=data_loader)
# Using polynomial linear regression
poly_regress_score = model_fitting.train_with_polynomial_regression(data_loader=data_loader)
# Using Support Vector Regressor
svr_score = model_fitting.train_with_support_vector_regression(data_loader=scaled_data_loader)
# Using Decision Tree Regressor
decision_tree_score = model_fitting.train_with_decision_tree(data_loader=data_loader)
#Using Random Forest Regressor
random_forest_score = model_fitting.train_wih_random_forest(data_loader=data_loader)
print(multi_regress_score, poly_regress_score, svr_score, decision_tree_score, random_forest_score)

Multi Regression: predict/test
[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]
Polynomial Regression: predict/test
[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]
SVR: predict/test
[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]
Decision Tree Regression: predict/test
[[429.06 431.23]
 [462.81 460.01]
 [460.19 461.14]
 ...
 [470.31 473.26]
 [437.76 438.  ]
 [462.55 463.28]]
Random Forest Regression: predict/test
[[433.39 431.23]
 [457.95 460.01]
 [463.53 461.14]
 ...
 [469.44 473.26]
 [438.99 438.  ]
 [461.59 463.28]]
0.9325315554761303 0.9458193300146379 0.948078404998626 0.9241243858560355 0.9652231108206529


In [57]:
model_fitting.random_forest_regress.predict([data_loader.X_test[3]])

array([446.71])