In [49]:
from __future__ import print_function, division

In [50]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import scale
from sklearn.decomposition import KernelPCA

from sklearn.linear_model import Ridge, ElasticNet, BayesianRidge
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from mlens.ensemble import SuperLearner
from sklearn.svm import SVR

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import r2_score

In [51]:
# update your data path
DATA_PATH = "/home/aunagar/Personal/Study/Sem1/Advanced ML/projects/task1/Task1/"

In [52]:
# import data
train_X = pd.read_csv(DATA_PATH + "X_train.csv")
train_Y = pd.read_csv(DATA_PATH + "y_train.csv")
test_X = pd.read_csv(DATA_PATH + "X_test.csv")
sample_submission = pd.read_csv(DATA_PATH + "sample.csv")

In [53]:
# split data
train_ids = train_X.iloc[:, 0]
train_features = train_X.iloc[:, 1:]
test_ids = test_X.iloc[:, 0]
test_features = test_X.iloc[:, 1:]

In [54]:
def missing_value_imputation(train, test):
    train = train.fillna(train.mean())
    
    test = test.fillna(train.mean())
    
    return train, test

def outlier_treatment(train, test):
    
    train_mean, train_std = train.mean(), train.std()
    
    train[train > train_mean + 2.*train_std] = np.nan
    train[train < train_mean - 2.*train_std] = np.nan
    
    test[test > train_mean + 2.*train_std] = np.nan
    test[test < train_mean - 2.*train_std] = np.nan
    
    train = train.fillna(train_mean)
    test = test.fillna(train_mean)
    
    return train, test

def normalize(train, test):
    train_mean, train_std = train.mean(), train.std()

    train = (train - train_mean)/train_std
    # test 
    test = (test - train_mean)/train_std
    
    return train, test

def remove_corr_features(train, test, threshold = 0.8):
    
    # Create correlation matrix
    corr_matrix = train.corr().abs()
    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    # Find index of feature columns with correlation greater than 0.7
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    # train
    train = train.drop(columns = to_drop)
    # test
    test = test.drop(columns = to_drop)
    
    return train, test
        
def preprocess(train, test):
    
    # impute missing values
    train, test = missing_value_imputation(train, test)
    
    # remove columns with 0 variance
    train_mean, train_std = train.mean(), train.std()
    train = train.iloc[:, np.where(train_std > 0.0)[0]]
    test = test.iloc[:, np.where(train_std > 0.0)[0]]
    
    # remove outliers
    train, test = outlier_treatment(train, test)
    
    # normalize
    train, test = normalize(train, test)
    
    # remove correlated features
    train, test = remove_corr_features(train, test)
    
    return train, test

In [55]:
train_features, test_features = preprocess(train_features, test_features)

In [56]:
###### Correlated feature removal #########
# Create correlation matrix
corr_matrix = train_features.corr().abs()
# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
# Find index of feature columns with correlation greater than 0.7
to_drop = [column for column in upper.columns if any(upper[column] > 0.7)]

# train
train_features = train_features.drop(columns = to_drop)
# test
test_features = test_features.drop(columns = to_drop)

In [57]:
###### linear model
lr = ElasticNet(alpha = 0.5, l1_ratio=0.5)
validation_score = cross_val_score(lr, train_features, train_Y.iloc[:, 1:], cv = 5, scoring = 'r2')
print(validation_score.mean())

# train model on whole train data
lr.fit(X = train_features, y = train_Y.iloc[:, 1])

0.4817015978694041


ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [47]:
# finding to which features to remove
non_zero_weights = np.where(lr.coef_ != 0.)[0]

# removing these features from training data
train_features = train_features.iloc[:, non_zero_weights]
test_features = test_features.iloc[:, non_zero_weights]

In [48]:
#### training more complex model on this cleared date

###### linear model
alpha = np.arange(0.1, 2, 0.2)
l1_ratio = np.arange(0.1, 1, 0.2)
results = pd.DataFrame(columns=['alpha', 'l1_ratio', 'cv_score'])
for a in alpha:
    for l1 in l1_ratio:
        lr = ElasticNet(alpha = a, l1_ratio=l1)
        validation_score = cross_val_score(lr, train_features, train_Y.iloc[:, 1:], cv = 5, scoring = 'r2')
        results = results.append({'alpha':a, 'l1_ratio':l1, 'cv_score':validation_score.mean()}, ignore_index = True)

best_parameters = results.iloc[np.argmax(results.cv_score.values),:]
print(best_parameters['cv_score'])

# linear Elastic NEt
lr = ElasticNet(alpha = best_parameters['alpha'], l1_ratio = best_parameters['l1_ratio'])

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(loss='huber', learning_rate=0.05, n_estimators=1000, subsample=0.9,
                                criterion='friedman_mse', min_samples_split=10, min_samples_leaf=10,
                                min_weight_fraction_leaf=0.0, max_depth=7, min_impurity_decrease=0.0,
                                min_impurity_split=None, init=None, random_state=42, max_features=None,
                                alpha=0.9, verbose=2, max_leaf_nodes=None, warm_start=False, presort='auto',
                                validation_fraction=0.33, n_iter_no_change=None, tol=0.0001)

gbr_cross_val = cross_val_score(gbr, train_features, train_Y.iloc[:, 1], cv = 5, scoring = 'r2')
print(gbr_cross_val.mean())

# support vector regression
C = np.arange(1, 10, 1)
svr_params = pd.DataFrame(columns = ['C', 'cv_score'])
for c in C:
    svr = SVR(kernel = 'rbf', degree = 10, gamma = 'scale', C = c)
    cv_score = cross_val_score(svr, train_features, train_Y.iloc[:, 1], cv= 5, scoring = 'r2').mean()
    svr_params = svr_params.append({'C':c, 'cv_score':cv_score}, ignore_index = True)

best_params = svr_params.iloc[np.argmax(svr_params.cv_score.values), :]
print(best_params['cv_score'].mean())

svr = SVR(kernel= 'rbf', gamma = 'scale', C = best_params['C'])

0.582770370350672
      Iter       Train Loss      OOB Improve   Remaining Time 
         1          44.7100           1.2775            3.72m
         2          40.3126           2.4686            3.22m
         3          37.9108           1.9871            3.11m
         4          35.7126           1.4301            3.03m
         5          32.4681           1.4775            3.10m
         6          31.2209           0.9250            3.12m
         7          28.8169           1.5739            3.27m
         8          26.5536           1.3571            3.21m
         9          24.4394           1.5660            3.26m
        10          24.0506           0.8909            3.19m
        11          21.9853           1.2191            3.16m
        12          21.4879           0.7219            3.17m
        13          19.6538           0.8292            3.15m
        14          18.6114           0.6904            3.11m
        15          17.1176           0.8135       

       132           0.4906          -0.0014            2.49m
       133           0.4951          -0.0009            2.48m
       134           0.4528          -0.0039            2.48m
       135           0.4757           0.0023            2.48m
       136           0.4360           0.0101            2.48m
       137           0.4270           0.0023            2.48m
       138           0.4130           0.0060            2.47m
       139           0.4529          -0.0029            2.46m
       140           0.4241           0.0038            2.46m
       141           0.4198           0.0033            2.45m
       142           0.3775           0.0067            2.45m
       143           0.3964          -0.0024            2.44m
       144           0.3828           0.0001            2.44m
       145           0.3873           0.0137            2.43m
       146           0.4018          -0.0007            2.42m
       147           0.3925           0.0013            2.42m
       1

       266           0.0913          -0.0001            1.96m
       267           0.0936           0.0001            1.96m
       268           0.0827           0.0002            1.95m
       269           0.0880           0.0005            1.95m
       270           0.0876           0.0001            1.95m
       271           0.0834           0.0010            1.94m
       272           0.0781          -0.0037            1.94m
       273           0.0866           0.0005            1.94m
       274           0.0811           0.0002            1.93m
       275           0.0852           0.0006            1.93m
       276           0.0850          -0.0002            1.93m
       277           0.0759           0.0012            1.92m
       278           0.0803           0.0003            1.92m
       279           0.0691          -0.0009            1.91m
       280           0.0768          -0.0000            1.91m
       281           0.0883          -0.0002            1.90m
       2

       400           0.0304           0.0001            1.53m
       401           0.0265           0.0000            1.53m
       402           0.0289           0.0004            1.53m
       403           0.0291           0.0001            1.52m
       404           0.0254           0.0010            1.52m
       405           0.0256           0.0003            1.52m
       406           0.0228           0.0001            1.51m
       407           0.0273           0.0001            1.51m
       408           0.0259           0.0000            1.51m
       409           0.0264           0.0003            1.51m
       410           0.0263           0.0006            1.50m
       411           0.0248           0.0004            1.50m
       412           0.0263          -0.0001            1.50m
       413           0.0242           0.0015            1.49m
       414           0.0242          -0.0000            1.49m
       415           0.0261           0.0000            1.48m
       4

       533           0.0180           0.0003            1.10m
       534           0.0178          -0.0000            1.10m
       535           0.0156           0.0009            1.10m
       536           0.0151           0.0013            1.10m
       537           0.0163          -0.0000            1.09m
       538           0.0162           0.0001            1.09m
       539           0.0156           0.0000            1.09m
       540           0.0133           0.0003            1.08m
       541           0.0161           0.0000            1.08m
       542           0.0149           0.0003            1.08m
       543           0.0146          -0.0012            1.08m
       544           0.0138           0.0000            1.07m
       545           0.0156           0.0002            1.07m
       546           0.0149           0.0001            1.07m
       547           0.0155           0.0000            1.06m
       548           0.0261          -0.0066            1.06m
       5

       667           0.0171           0.0002           44.95s
       668           0.0188           0.0000           44.80s
       669           0.0182           0.0000           44.65s
       670           0.0177          -0.0000           44.50s
       671           0.0161           0.0002           44.36s
       672           0.0186          -0.0001           44.21s
       673           0.0153           0.0006           44.06s
       674           0.0190          -0.0011           43.91s
       675           0.0173           0.0002           43.76s
       676           0.0195           0.0001           43.62s
       677           0.0184           0.0002           43.46s
       678           0.0186           0.0000           43.32s
       679           0.0158          -0.0001           43.18s
       680           0.0167           0.0002           43.04s
       681           0.0176          -0.0000           42.89s
       682           0.0179           0.0001           42.75s
       6

       801           0.0099           0.0002           25.95s
       802           0.0094           0.0006           25.81s
       803           0.0097          -0.0000           25.67s
       804           0.0098           0.0001           25.54s
       805           0.0096           0.0001           25.41s
       806           0.0096           0.0000           25.27s
       807           0.0100           0.0001           25.13s
       808           0.0083           0.0002           25.00s
       809           0.0095           0.0000           24.86s
       810           0.0095          -0.0000           24.72s
       811           0.0088          -0.0000           24.59s
       812           0.0094          -0.0006           24.45s
       813           0.0089          -0.0001           24.31s
       814           0.0092          -0.0000           24.17s
       815           0.0096          -0.0000           24.03s
       816           0.0101          -0.0016           23.89s
       8

       935           0.0062           0.0002            8.25s
       936           0.0059           0.0002            8.12s
       937           0.0062           0.0000            7.99s
       938           0.0064           0.0000            7.86s
       939           0.0066          -0.0004            7.74s
       940           0.0066          -0.0000            7.61s
       941           0.0065          -0.0002            7.48s
       942           0.0066          -0.0000            7.35s
       943           0.0063           0.0003            7.22s
       944           0.0052           0.0010            7.09s
       945           0.0055           0.0002            6.97s
       946           0.0059           0.0000            6.84s
       947           0.0048           0.0002            6.71s
       948           0.0052          -0.0000            6.58s
       949           0.0055           0.0001            6.45s
       950           0.0048           0.0000            6.33s
       9

        68           1.9497           0.0193            1.98m
        69           1.8831           0.0294            1.98m
        70           1.8986           0.0159            1.98m
        71           1.7495           0.0148            1.97m
        72           1.7656           0.0010            1.97m
        73           1.7341           0.0117            1.96m
        74           1.7439           0.0107            1.96m
        75           1.6848           0.0077            1.96m
        76           1.5012           0.0290            1.95m
        77           1.4955           0.0507            1.95m
        78           1.5331           0.0197            1.95m
        79           1.4842           0.0135            1.94m
        80           1.4857           0.0095            1.94m
        81           1.4334           0.0050            1.94m
        82           1.4067           0.0095            1.93m
        83           1.3671           0.0032            1.93m
        

       202           0.1427           0.0009            1.64m
       203           0.1556           0.0031            1.64m
       204           0.1493          -0.0005            1.64m
       205           0.1378           0.0041            1.63m
       206           0.1573          -0.0001            1.63m
       207           0.1631          -0.0001            1.63m
       208           0.1619           0.0002            1.62m
       209           0.1507          -0.0001            1.62m
       210           0.1564           0.0001            1.62m
       211           0.1448           0.0014            1.62m
       212           0.1353           0.0031            1.62m
       213           0.1365          -0.0006            1.62m
       214           0.1449           0.0012            1.61m
       215           0.1318           0.0006            1.61m
       216           0.1252           0.0011            1.61m
       217           0.1418           0.0007            1.61m
       2

       336           0.0308          -0.0001            1.33m
       337           0.0337           0.0001            1.32m
       338           0.0312           0.0005            1.32m
       339           0.0326          -0.0001            1.32m
       340           0.0319           0.0003            1.32m
       341           0.0299          -0.0000            1.32m
       342           0.0296           0.0001            1.31m
       343           0.0296           0.0003            1.31m
       344           0.0383          -0.0012            1.31m
       345           0.0377           0.0001            1.31m
       346           0.0322           0.0009            1.30m
       347           0.0352           0.0002            1.30m
       348           0.0329           0.0001            1.30m
       349           0.0352           0.0002            1.30m
       350           0.0290           0.0023            1.30m
       351           0.0314           0.0003            1.29m
       3

       469           0.0186           0.0002            1.03m
       470           0.0218           0.0003            1.03m
       471           0.0202          -0.0000            1.03m
       472           0.0203          -0.0005            1.03m
       473           0.0214           0.0002            1.02m
       474           0.0186           0.0004            1.02m
       475           0.0176           0.0008            1.02m
       476           0.0177           0.0013            1.02m
       477           0.0181           0.0001            1.01m
       478           0.0162           0.0008            1.01m
       479           0.0180          -0.0000            1.01m
       480           0.0167          -0.0000            1.01m
       481           0.0172          -0.0001            1.01m
       482           0.0198          -0.0000            1.00m
       483           0.0178           0.0000            1.00m
       484           0.0175           0.0000            1.00m
       4

       604           0.0112          -0.0000           45.30s
       605           0.0125           0.0000           45.18s
       606           0.0113          -0.0000           45.05s
       607           0.0126          -0.0010           44.94s
       608           0.0123           0.0001           44.82s
       609           0.0129          -0.0002           44.70s
       610           0.0126           0.0004           44.58s
       611           0.0111           0.0008           44.46s
       612           0.0134          -0.0014           44.35s
       613           0.0129          -0.0000           44.24s
       614           0.0131           0.0003           44.11s
       615           0.0131           0.0002           43.98s
       616           0.0121          -0.0003           43.87s
       617           0.0161          -0.0038           43.74s
       618           0.0143           0.0009           43.61s
       619           0.0167           0.0000           43.50s
       6

       738           0.0107           0.0000           29.56s
       739           0.0104           0.0000           29.44s
       740           0.0106          -0.0000           29.32s
       741           0.0099           0.0000           29.22s
       742           0.0105           0.0001           29.10s
       743           0.0090           0.0000           28.99s
       744           0.0094          -0.0000           28.87s
       745           0.0094          -0.0008           28.75s
       746           0.0106           0.0000           28.64s
       747           0.0090          -0.0000           28.52s
       748           0.0100           0.0004           28.40s
       749           0.0102          -0.0000           28.29s
       750           0.0100           0.0001           28.18s
       751           0.0095           0.0001           28.07s
       752           0.0080           0.0001           27.95s
       753           0.0099          -0.0000           27.84s
       7

       871           0.0044           0.0001           14.47s
       872           0.0047           0.0001           14.35s
       873           0.0040          -0.0009           14.24s
       874           0.0048           0.0001           14.13s
       875           0.0049          -0.0000           14.01s
       876           0.0058          -0.0004           13.90s
       877           0.0055           0.0001           13.78s
       878           0.0056           0.0000           13.67s
       879           0.0054           0.0000           13.56s
       880           0.0049           0.0002           13.44s
       881           0.0053           0.0001           13.33s
       882           0.0049           0.0000           13.21s
       883           0.0045           0.0003           13.10s
       884           0.0049           0.0000           12.99s
       885           0.0046           0.0001           12.88s
       886           0.0041           0.0001           12.76s
       8

         4          37.8412           2.1584            2.11m
         5          36.2092           1.3169            2.10m
         6          34.3629           1.0443            2.12m
         7          31.9557           1.5050            2.12m
         8          28.6723           1.2611            2.12m
         9          27.3536           0.8254            2.13m
        10          26.0545           1.1004            2.14m
        11          24.6651           0.6933            2.13m
        12          23.1275           0.6732            2.14m
        13          21.7214           0.6244            2.14m
        14          20.4094           0.8030            2.14m
        15          19.6424           0.4406            2.14m
        16          17.4116           0.7706            2.15m
        17          16.6585           0.2888            2.14m
        18          16.2698           0.5038            2.14m
        19          14.8407           0.6046            2.13m
        

       138           0.4019           0.0007            1.78m
       139           0.4225           0.0008            1.78m
       140           0.3707           0.0017            1.78m
       141           0.3926           0.0049            1.77m
       142           0.3987          -0.0003            1.77m
       143           0.3612           0.0090            1.77m
       144           0.3821          -0.0008            1.76m
       145           0.3674           0.0002            1.76m
       146           0.3751           0.0028            1.76m
       147           0.3353           0.0155            1.75m
       148           0.3258          -0.0006            1.75m
       149           0.3341           0.0072            1.75m
       150           0.3012          -0.0014            1.75m
       151           0.3319           0.0026            1.74m
       152           0.3305           0.0027            1.74m
       153           0.3323           0.0020            1.73m
       1

       271           0.0668           0.0013            1.44m
       272           0.0724          -0.0001            1.44m
       273           0.0696          -0.0003            1.44m
       274           0.0739           0.0003            1.44m
       275           0.0672          -0.0002            1.43m
       276           0.0627           0.0021            1.43m
       277           0.0704           0.0002            1.43m
       278           0.0694           0.0005            1.43m
       279           0.0565           0.0015            1.43m
       280           0.0665           0.0008            1.42m
       281           0.0651          -0.0002            1.42m
       282           0.0646           0.0010            1.42m
       283           0.0610           0.0010            1.42m
       284           0.0624           0.0002            1.42m
       285           0.0611          -0.0000            1.42m
       286           0.0622           0.0005            1.41m
       2

       406           0.0185           0.0000            1.15m
       407           0.0186           0.0002            1.15m
       408           0.0189           0.0000            1.15m
       409           0.0172          -0.0007            1.15m
       410           0.0183           0.0004            1.14m
       411           0.0189           0.0000            1.14m
       412           0.0160          -0.0000            1.14m
       413           0.0182           0.0000            1.14m
       414           0.0177          -0.0001            1.14m
       415           0.0156           0.0004            1.13m
       416           0.0178           0.0001            1.13m
       417           0.0160           0.0002            1.13m
       418           0.0166           0.0000            1.13m
       419           0.0148           0.0004            1.13m
       420           0.0167           0.0001            1.12m
       421           0.0157           0.0004            1.12m
       4

       540           0.0075           0.0000           52.50s
       541           0.0077           0.0001           52.37s
       542           0.0068           0.0001           52.24s
       543           0.0065           0.0003           52.14s
       544           0.0067           0.0001           52.02s
       545           0.0067           0.0002           51.90s
       546           0.0074           0.0000           51.78s
       547           0.0074           0.0001           51.66s
       548           0.0071           0.0001           51.53s
       549           0.0064           0.0002           51.40s
       550           0.0071           0.0000           51.29s
       551           0.0059           0.0000           51.17s
       552           0.0091          -0.0020           51.05s
       553           0.0086           0.0001           50.92s
       554           0.0088           0.0000           50.80s
       555           0.0089           0.0000           50.67s
       5

       674           0.0106           0.0000           37.88s
       675           0.0103           0.0000           37.76s
       676           0.0081           0.0005           37.67s
       677           0.0098           0.0002           37.57s
       678           0.0097           0.0001           37.45s
       679           0.0082           0.0001           37.32s
       680           0.0080           0.0003           37.22s
       681           0.0092           0.0000           37.12s
       682           0.0096          -0.0042           37.01s
       683           0.0100          -0.0000           36.89s
       684           0.0105           0.0000           36.78s
       685           0.0088           0.0010           36.68s
       686           0.0100           0.0005           36.58s
       687           0.0095           0.0003           36.45s
       688           0.0097           0.0001           36.33s
       689           0.0089           0.0000           36.23s
       6

       807           0.0087           0.0002           22.55s
       808           0.0082          -0.0007           22.43s
       809           0.0088           0.0001           22.31s
       810           0.0082           0.0002           22.19s
       811           0.0078           0.0000           22.07s
       812           0.0079           0.0002           21.96s
       813           0.0072           0.0000           21.84s
       814           0.0077           0.0001           21.72s
       815           0.0074           0.0002           21.60s
       816           0.0075           0.0001           21.49s
       817           0.0068           0.0002           21.37s
       818           0.0074           0.0000           21.25s
       819           0.0067          -0.0000           21.13s
       820           0.0070           0.0000           21.02s
       821           0.0073           0.0001           20.91s
       822           0.0074           0.0000           20.79s
       8

       940           0.0054           0.0001            6.95s
       941           0.0053          -0.0003            6.83s
       942           0.0055           0.0000            6.72s
       943           0.0051          -0.0001            6.60s
       944           0.0058          -0.0013            6.48s
       945           0.0054           0.0000            6.37s
       946           0.0063           0.0001            6.25s
       947           0.0063          -0.0000            6.13s
       948           0.0063           0.0000            6.02s
       949           0.0064          -0.0000            5.90s
       950           0.0063          -0.0000            5.78s
       951           0.0063           0.0001            5.67s
       952           0.0060           0.0001            5.55s
       953           0.0063          -0.0000            5.44s
       954           0.0064          -0.0002            5.32s
       955           0.0044           0.0001            5.20s
       9

        73           1.8066           0.0009            2.16m
        74           1.7121          -0.0037            2.15m
        75           1.5639           0.0390            2.15m
        76           1.4494           0.0275            2.14m
        77           1.6113           0.0077            2.14m
        78           1.4270           0.0218            2.13m
        79           1.4707           0.0032            2.13m
        80           1.4151           0.0134            2.12m
        81           1.3407           0.0233            2.12m
        82           1.2308           0.0188            2.11m
        83           1.3330           0.0018            2.11m
        84           1.2325           0.0137            2.10m
        85           1.2652           0.0039            2.10m
        86           1.2365           0.0015            2.09m
        87           1.2485          -0.0089            2.09m
        88           1.1306           0.0175            2.08m
        

       206           0.1468           0.0001            1.75m
       207           0.1302           0.0061            1.75m
       208           0.1340          -0.0013            1.75m
       209           0.1447           0.0001            1.74m
       210           0.1274           0.0032            1.74m
       211           0.1271           0.0022            1.74m
       212           0.1346           0.0002            1.73m
       213           0.1307           0.0006            1.73m
       214           0.1066           0.0047            1.73m
       215           0.1212           0.0006            1.72m
       216           0.1178           0.0020            1.72m
       217           0.1184           0.0002            1.72m
       218           0.1191           0.0015            1.71m
       219           0.1150           0.0015            1.71m
       220           0.1105           0.0012            1.71m
       221           0.1129          -0.0012            1.71m
       2

       341           0.0395           0.0043            1.38m
       342           0.0361           0.0018            1.37m
       343           0.0443           0.0002            1.37m
       344           0.0420           0.0007            1.37m
       345           0.0428          -0.0009            1.37m
       346           0.0445           0.0001            1.36m
       347           0.0387           0.0033            1.36m
       348           0.0387          -0.0002            1.36m
       349           0.0419           0.0000            1.36m
       350           0.0357          -0.0001            1.35m
       351           0.0380          -0.0002            1.35m
       352           0.0383           0.0006            1.35m
       353           0.0378          -0.0000            1.35m
       354           0.0349           0.0015            1.35m
       355           0.0358           0.0002            1.34m
       356           0.0316           0.0046            1.34m
       3

       474           0.0220           0.0005            1.08m
       475           0.0233           0.0001            1.08m
       476           0.0225           0.0002            1.08m
       477           0.0276          -0.0050            1.07m
       478           0.0262           0.0000            1.07m
       479           0.0285          -0.0002            1.07m
       480           0.0257           0.0008            1.07m
       481           0.0286           0.0001            1.07m
       482           0.0268           0.0000            1.06m
       483           0.0247           0.0014            1.06m
       484           0.0251           0.0002            1.06m
       485           0.0250           0.0002            1.06m
       486           0.0235          -0.0001            1.06m
       487           0.0222           0.0001            1.05m
       488           0.0244           0.0000            1.05m
       489           0.0250           0.0001            1.05m
       4

       607           0.0149          -0.0000           49.59s
       608           0.0149          -0.0003           49.48s
       609           0.0163           0.0001           49.34s
       610           0.0147           0.0003           49.20s
       611           0.0145           0.0006           49.06s
       612           0.0120           0.0000           48.92s
       613           0.0156          -0.0002           48.79s
       614           0.0128           0.0003           48.65s
       615           0.0141           0.0003           48.50s
       616           0.0149           0.0000           48.35s
       617           0.0144           0.0002           48.22s
       618           0.0132           0.0004           48.08s
       619           0.0130          -0.0000           47.94s
       620           0.0121           0.0002           47.80s
       621           0.0130           0.0002           47.66s
       622           0.0132          -0.0004           47.54s
       6

       740           0.0071           0.0001           32.51s
       741           0.0066           0.0003           32.39s
       742           0.0057           0.0004           32.26s
       743           0.0066          -0.0000           32.13s
       744           0.0065           0.0000           32.00s
       745           0.0062           0.0002           31.87s
       746           0.0064           0.0000           31.74s
       747           0.0064          -0.0001           31.62s
       748           0.0060          -0.0001           31.50s
       749           0.0054           0.0003           31.36s
       750           0.0061           0.0000           31.24s
       751           0.0066          -0.0000           31.11s
       752           0.0076          -0.0013           30.97s
       753           0.0080           0.0000           30.84s
       754           0.0075           0.0000           30.71s
       755           0.0079           0.0000           30.58s
       7

       873           0.0114          -0.0001           15.75s
       874           0.0096           0.0007           15.62s
       875           0.0097           0.0000           15.49s
       876           0.0107          -0.0024           15.37s
       877           0.0114           0.0002           15.24s
       878           0.0116          -0.0015           15.12s
       879           0.0129           0.0000           14.99s
       880           0.0121           0.0002           14.87s
       881           0.0112           0.0003           14.74s
       882           0.0116           0.0001           14.62s
       883           0.0113           0.0003           14.49s
       884           0.0112           0.0001           14.37s
       885           0.0116           0.0000           14.25s
       886           0.0110           0.0000           14.12s
       887           0.0111           0.0001           14.00s
       888           0.0105           0.0002           13.87s
       8

         5          35.5492           1.7291            2.42m
         6          33.9472           1.4843            2.42m
         7          31.2536           1.2777            2.48m
         8          29.2416           0.9217            2.49m
         9          27.8362           0.7143            2.46m
        10          25.4871           1.0683            2.48m
        11          24.0765           1.2525            2.49m
        12          22.8447           1.0996            2.47m
        13          22.3175           0.6380            2.44m
        14          20.8623           0.6848            2.41m
        15          19.7208           0.3947            2.40m
        16          18.2167           0.5063            2.40m
        17          17.1054           0.5742            2.38m
        18          16.3558           0.4994            2.36m
        19          15.3095           0.4215            2.34m
        20          14.7598           0.1983            2.34m
        

       139           0.5584          -0.0006            1.88m
       140           0.4726           0.0124            1.89m
       141           0.5303           0.0004            1.89m
       142           0.5045           0.0058            1.89m
       143           0.4712          -0.0015            1.89m
       144           0.5031          -0.0001            1.88m
       145           0.4919           0.0005            1.88m
       146           0.4758           0.0059            1.88m
       147           0.4197           0.0053            1.87m
       148           0.3996           0.0022            1.87m
       149           0.4691           0.0027            1.87m
       150           0.4078           0.0042            1.86m
       151           0.4445           0.0053            1.86m
       152           0.4183           0.0047            1.86m
       153           0.4298           0.0034            1.85m
       154           0.4338          -0.0008            1.85m
       1

       273           0.0842           0.0005            1.54m
       274           0.0930           0.0008            1.54m
       275           0.0926           0.0001            1.54m
       276           0.0910           0.0003            1.53m
       277           0.0884          -0.0001            1.53m
       278           0.0904           0.0007            1.53m
       279           0.0759          -0.0001            1.53m
       280           0.0842           0.0012            1.53m
       281           0.0714           0.0058            1.53m
       282           0.0804           0.0009            1.53m
       283           0.0807           0.0001            1.53m
       284           0.0721          -0.0000            1.53m
       285           0.0782          -0.0000            1.53m
       286           0.0763           0.0001            1.52m
       287           0.0766           0.0001            1.52m
       288           0.0618           0.0002            1.52m
       2

       406           0.0269           0.0000            1.26m
       407           0.0272          -0.0000            1.26m
       408           0.0282           0.0002            1.26m
       409           0.0270          -0.0000            1.26m
       410           0.0264           0.0005            1.25m
       411           0.0266           0.0001            1.25m
       412           0.0221           0.0006            1.25m
       413           0.0251           0.0000            1.25m
       414           0.0250           0.0000            1.25m
       415           0.0216           0.0008            1.24m
       416           0.0235          -0.0002            1.24m
       417           0.0255          -0.0000            1.24m
       418           0.0235           0.0004            1.24m
       419           0.0224           0.0000            1.23m
       420           0.0239          -0.0000            1.23m
       421           0.0234           0.0001            1.23m
       4

       540           0.0100           0.0004           58.17s
       541           0.0109           0.0001           58.03s
       542           0.0090           0.0003           57.93s
       543           0.0099           0.0001           57.85s
       544           0.0098           0.0002           57.72s
       545           0.0102           0.0000           57.60s
       546           0.0099           0.0002           57.47s
       547           0.0109          -0.0005           57.34s
       548           0.0114           0.0000           57.20s
       549           0.0113          -0.0000           57.06s
       550           0.0107           0.0002           56.94s
       551           0.0093           0.0006           56.82s
       552           0.0129          -0.0014           56.66s
       553           0.0114          -0.0000           56.53s
       554           0.0125          -0.0000           56.39s
       555           0.0165          -0.0034           56.25s
       5

       674           0.0106           0.0000           40.51s
       675           0.0094          -0.0000           40.37s
       676           0.0097           0.0000           40.25s
       677           0.0099           0.0001           40.12s
       678           0.0092           0.0003           39.99s
       679           0.0087          -0.0000           39.87s
       680           0.0081           0.0007           39.75s
       681           0.0097           0.0000           39.62s
       682           0.0091           0.0000           39.48s
       683           0.0088          -0.0000           39.36s
       684           0.0088          -0.0001           39.23s
       685           0.0084           0.0001           39.10s
       686           0.0091           0.0000           38.97s
       687           0.0088           0.0001           38.84s
       688           0.0094           0.0000           38.72s
       689           0.0086           0.0001           38.59s
       6

       807           0.0062          -0.0000           23.68s
       808           0.0064           0.0000           23.55s
       809           0.0065           0.0000           23.42s
       810           0.0062           0.0000           23.30s
       811           0.0053           0.0001           23.18s
       812           0.0060           0.0001           23.06s
       813           0.0058           0.0001           22.95s
       814           0.0061           0.0000           22.83s
       815           0.0059           0.0000           22.70s
       816           0.0062          -0.0002           22.58s
       817           0.0058          -0.0000           22.46s
       818           0.0063           0.0000           22.33s
       819           0.0056           0.0002           22.21s
       820           0.0056          -0.0000           22.09s
       821           0.0061          -0.0000           21.96s
       822           0.0058          -0.0000           21.84s
       8

       940           0.0063           0.0003            7.32s
       941           0.0071          -0.0000            7.20s
       942           0.0065           0.0002            7.08s
       943           0.0067           0.0000            6.95s
       944           0.0062           0.0001            6.83s
       945           0.0054           0.0000            6.71s
       946           0.0056           0.0000            6.58s
       947           0.0063          -0.0000            6.46s
       948           0.0069          -0.0003            6.34s
       949           0.0068           0.0000            6.22s
       950           0.0072          -0.0000            6.09s
       951           0.0069           0.0000            5.97s
       952           0.0067           0.0000            5.85s
       953           0.0064           0.0000            5.72s
       954           0.0066           0.0000            5.60s
       955           0.0048           0.0004            5.48s
       9

In [29]:
class predictor:
    
    def __init__(self, predictor1, predictor2):
        self.pr1 = predictor1
        self.pr2 = predictor2
        
    def fit(self, X, Y):
        self.pr1.fit(X, Y)
        
        error = self.pr1.predict(X) - Y
        
        self.pr2.fit(X, error)
    
    def get_params(self):
        pass
    
    def predict(X):
        
        pr_1 = self.pr1.predict(X)
        pr_2 = self.pr2.predict(X)
        
        return pr1 - pr2

In [137]:
train_x, test_x, train_y, test_y = train_test_split(train_features, train_Y.iloc[:, 1], test_size = 0.33,
                                                    random_state = 42)

In [138]:
lr.fit(train_x, train_y)
svr.fit(train_x, train_y)
gbr.fit(train_x, train_y)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.05, loss='huber', max_depth=7,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=10, min_samples_split=10,
                          min_weight_fraction_leaf=0.0, n_estimators=1000,
                          n_iter_no_change=None, presort='auto',
                          random_state=42, subsample=0.9, tol=0.0001,
                          validation_fraction=0.33, verbose=0,
                          warm_start=False)

In [139]:
lr_pred = lr.predict(test_x)
svr_pred = svr.predict(test_x)
gbr_pred = gbr.predict(test_x)

In [140]:
lr_scorelr.score(test_x, test_y)
svr_score = svr.score(test_x, test_y))
print(gbr.score(test_x, test_y))

0.571087397627106
0.5596892204391134
0.510028565950948


In [141]:
overall_pred = (lr_pred + svr_pred + gbr_pred)/3.

In [142]:
print(r2_score(test_y, overall_pred))

0.5710096591903691


### Predictions

In [97]:
best_model = None

In [99]:
lr_test_pred = lr.predict(test_features)
svr_test_pred = svr.predict(test_features)
gbr_test_pred = gbr.predict(test_features)

In [100]:
predicted = (lr_test_pred + svr_test_pred + gbr_test_pred)/3.

In [101]:
#predicted = best_model.predict(test_features)
sample_submission['y'] = predicted

In [102]:
sample_submission.to_csv("submissions/Ajay_13th_sub.csv", index = False)