In [17]:
import pandas as pd
import numpy as np
import warnings
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

In [18]:
warnings.filterwarnings('ignore')

In [19]:
df = pd.read_csv('./Data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [20]:
models = {
    'XGBoost': XGBRegressor(),
    'Polynomial Regression': LinearRegression(),
    'Linear Regression': LinearRegression(),
    'K-NN': KNeighborsRegressor(),
    'SVR': SVR(),
    'CatBoost': CatBoostRegressor(verbose = 0),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
} 

In [21]:
dimensionalityReduction = {
    'Full': '',
    'PCA': PCA(),
}

In [22]:
results = pd.DataFrame(columns = list(dimensionalityReduction), index = list(models))

In [23]:
for technique in dimensionalityReduction.items():

    for modelName in models.items():
        
        estimator = Pipeline([('sc', StandardScaler()), ('technique', technique[1]), ('model', modelName[1])])
        param_grid = dict(technique__n_components = np.arange(1, df.columns.size))
        
        if technique[0] == 'Full':
            estimator.steps.pop(1)
            param_grid = dict()

        if modelName == 'K-NN':
            param_grid['model__n_neighbors'] = np.arange(1, 50)
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
            
        elif modelName == 'Polynomial Regression':
            estimator.steps.insert(1, ['poly', PolynomialFeatures()])
            param_grid['poly__degree'] = np.arange(2, 10)
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
            
        elif modelName == 'XGBoost':
            Score = GridSearchCV(estimator, param_grid, cv = 10, error_score='raise').fit(X, y).best_score_
            
        elif modelName == 'SVC':
            param_grid['model__kernel'] = ['rbf', 'poly', 'sigmoid', 'linear']
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
            
        elif modelName == 'Decision Tree':
            param_grid['model__criterion'] = ['poisson', 'friedman_mse', 'absolute_error', 'squared_error']
            param_grid['model__splitter'] = ['best', 'random']
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
            
        elif modelName == 'Random Forest':
            param_grid['model__criterion'] = ['poisson', 'friedman_mse', 'absolute_error', 'squared_error']
            param_grid['model__max_features'] = ['sqrt', 'log2']
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
            
        else:
            Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
        
        results.loc[modelName[0], technique[0]] = '{:.2f}'.format(Score * 100) + '%'

In [24]:
results.sort_values('Full', ascending = False)

Unnamed: 0,Full,PCA
XGBoost,96.63%,94.51%
CatBoost,96.50%,94.99%
Random Forest,96.30%,95.27%
K-NN,95.16%,95.16%
SVR,94.05%,94.05%
Decision Tree,92.97%,90.73%
Polynomial Regression,92.85%,92.85%
Linear Regression,92.85%,92.85%
