In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, ShuffleSplit, StratifiedShuffleSplit
from sklearn.metrics import matthews_corrcoef, accuracy_score, make_scorer
from sklearn.utils import resample
from sklearn.decomposition import PCA, KernelPCA
from sklearn.feature_selection import VarianceThreshold, SelectPercentile, SelectKBest, chi2

from xgboost import XGBClassifier

import matplotlib.pyplot as plt

In [2]:
VALIDATION = False

train1_df = pd.read_csv(f'PRML_Datacontest_MKN_JUL_2021/Dataset_1_Training.csv',index_col=0).T
train2_df = pd.read_csv(f'PRML_Datacontest_MKN_JUL_2021/Dataset_2_Training.csv',index_col=0).T

test1_df = pd.read_csv(f'PRML_Datacontest_MKN_JUL_2021/Dataset_1_Testing.csv',index_col=0).T
test2_df = pd.read_csv(f'PRML_Datacontest_MKN_JUL_2021/Dataset_2_Testing.csv',index_col=0).T

train1 = train1_df
train2 = train2_df
test1 = test1_df
test2 = test2_df

if(VALIDATION):
    #Shuffling the dataset
    valSize = int(train1.shape[0]*0.8)
    train1 = train1.sample(frac=1).reset_index(drop=True)
    test1 = train1[valSize:]
    train1 = train1[:valSize]
    
    valSize = int(train2.shape[0]*0.8)
    train2 = train2.sample(frac=1).reset_index(drop=True)
    test2 = train2[valSize:]
    train2 = train2[:valSize]

In [3]:
print(train1.shape)
print(test1.shape)
print(train2.shape)
print(test2.shape)

(130, 22285)
(100, 22283)
(340, 54679)
(214, 54675)


In [4]:
test_cols1 = ['CO: 1','CO: 2']
test_cols2 = ['CO: 3','CO: 4','CO: 5','CO: 6']
test_cols = ['CO: 1','CO: 2','CO: 3','CO: 4','CO: 5','CO: 6']

In [5]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 2000, num = 5)]
# Number of features to consider at every split
max_features = ['auto', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(5, 100, num = 5)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2]
# Method of selecting samples for training each tree
bootstrap = [False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
#                'min_samples_split': min_samples_split,
#                'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [6]:
random_grid

{'n_estimators': [100, 575, 1050, 1525, 2000],
 'max_features': ['auto', 'log2'],
 'max_depth': [5, 28, 52, 76, 100, None],
 'bootstrap': [False]}

In [7]:
param = random_grid
def pred(train,test,col,model=RandomForestClassifier(),params=param):
    X = train.drop(test_cols, axis=1, errors='ignore')
    y = np.array(train[[col]])
    y = y.reshape(y.shape[0],)
    Xtest = test.drop(test_cols, axis=1, errors='ignore')

    if(VALIDATION): 
        ytest = test[[col]]
        ytrue.extend(list(ytest[col]))

    scaler = preprocessing.MinMaxScaler()
    scaledX = scaler.fit_transform(X)
#     selector = SelectPercentile(chi2, percentile=10)
#     scaledX = selector.fit_transform(scaledX,y)
    
    scaledXtest = scaler.transform(Xtest)
#     scaledXtest = selector.transform(scaledXtest)
    
    #model = XGBClassifier()
    model = GridSearchCV(model,params,verbose=1,n_jobs=4,cv=StratifiedShuffleSplit(n_splits=3,test_size=0.25),scoring=make_scorer(matthews_corrcoef))
    model.fit(scaledX,y)
    best_params.append(model.best_params_)
    print(model.best_params_)
      
    ypred.extend(model.predict(scaledXtest))   

    if(VALIDATION): print(col,model.score(scaledXtest,ytest),accuracy_score(ytest,model.predict(scaledXtest)))
    else: print(col)
        
    return pd.DataFrame(model.cv_results_).sort_values(by=['rank_test_score']).head(10)

In [8]:
ypred = []
ytrue = []
best_params = []
pred(train1,test1,'CO: 1',params=random_grid)

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   43.4s
[Parallel(n_jobs=4)]: Done 180 out of 180 | elapsed:  2.8min finished


{'bootstrap': False, 'max_depth': 28, 'max_features': 'auto', 'n_estimators': 100}
CO: 1


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
10,0.551856,0.014345,0.009973,1.123916e-07,False,28.0,auto,100,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.449013,0.223221,0.747018,0.473084,0.214516,1
44,11.829791,0.211911,0.134482,0.006519244,False,100.0,auto,2000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.449013,0.15538,0.656532,0.420308,0.205599,2
43,8.603096,0.052088,0.120523,0.01769654,False,100.0,auto,1525,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.449013,0.15538,0.656532,0.420308,0.205599,2
31,3.33148,0.086176,0.039894,0.001410402,False,76.0,auto,575,"{'bootstrap': False, 'max_depth': 76, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
14,11.169203,0.198695,0.150597,0.02893978,False,28.0,auto,2000,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
13,8.150936,0.160775,0.105717,0.008020123,False,28.0,auto,1525,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
12,5.563356,0.137621,0.070811,0.004534823,False,28.0,auto,1050,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
23,8.60328,0.230834,0.122181,0.01309147,False,52.0,auto,1525,"{'bootstrap': False, 'max_depth': 52, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
22,5.876863,0.078368,0.080119,0.003390294,False,52.0,auto,1050,"{'bootstrap': False, 'max_depth': 52, 'max_fea...",0.449013,0.15538,0.656532,0.420308,0.205599,2
53,9.114341,0.462969,0.107048,0.008198511,False,,auto,1525,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.449013,0.15538,0.656532,0.420308,0.205599,2


In [9]:
pred(train1,test1,'CO: 2',params=random_grid)

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   44.6s
[Parallel(n_jobs=4)]: Done 180 out of 180 | elapsed:  2.7min finished


{'bootstrap': False, 'max_depth': 52, 'max_features': 'auto', 'n_estimators': 1525}
CO: 2


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
23,8.149316,0.075296,0.099401,0.001694,False,52.0,auto,1525,"{'bootstrap': False, 'max_depth': 52, 'max_fea...",0.482382,0.421927,0.423911,0.44274,0.028043,1
10,0.550376,0.007132,0.009309,0.00047,False,28.0,auto,100,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.550848,0.350823,0.412217,0.437963,0.083665,2
0,0.55037,0.014229,0.011304,0.00205,False,5.0,auto,100,"{'bootstrap': False, 'max_depth': 5, 'max_feat...",0.547967,0.492308,0.267503,0.435926,0.121241,3
20,0.567169,0.008788,0.009641,0.001244,False,52.0,auto,100,"{'bootstrap': False, 'max_depth': 52, 'max_fea...",0.41302,0.421927,0.423911,0.419619,0.004736,4
54,10.869194,0.088436,0.131647,0.003257,False,,auto,2000,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.41302,0.350823,0.423911,0.395918,0.032195,5
22,5.585589,0.046978,0.073803,0.005085,False,52.0,auto,1050,"{'bootstrap': False, 'max_depth': 52, 'max_fea...",0.41302,0.350823,0.423911,0.395918,0.032195,5
51,3.117196,0.029662,0.037899,0.000815,False,,auto,575,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.482382,0.350823,0.351175,0.394794,0.061935,7
11,3.133306,0.070356,0.042886,0.001628,False,28.0,auto,575,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.278073,0.492308,0.351175,0.373852,0.088919,8
13,8.227727,0.062239,0.101394,0.003292,False,28.0,auto,1525,"{'bootstrap': False, 'max_depth': 28, 'max_fea...",0.41302,0.350823,0.351175,0.371673,0.029237,9
34,10.678625,0.157771,0.138962,0.002618,False,76.0,auto,2000,"{'bootstrap': False, 'max_depth': 76, 'max_fea...",0.41302,0.350823,0.351175,0.371673,0.029237,9


In [10]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 500, stop = 2000, num = 4)]
# Number of features to consider at every split
max_features = ['auto', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(50, 100, num = 3)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2]
# Method of selecting samples for training each tree
bootstrap = [False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
#                'min_samples_split': min_samples_split,
#                'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [11]:
random_grid

{'n_estimators': [500, 1000, 1500, 2000],
 'max_features': ['auto', 'log2'],
 'max_depth': [50, 75, 100, None],
 'bootstrap': [False]}

In [12]:
pred(train2,test2,'CO: 3',params=random_grid)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  5.6min
[Parallel(n_jobs=4)]: Done  96 out of  96 | elapsed: 12.3min finished


{'bootstrap': False, 'max_depth': 75, 'max_features': 'auto', 'n_estimators': 1000}
CO: 3


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
18,65.400951,1.02545,0.151262,0.012987,False,100.0,auto,1500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.130325,0.268117,0.387943,0.262128,0.105257,1
27,85.652646,3.00752,0.172549,0.026346,False,,auto,2000,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.130325,0.268117,0.387943,0.262128,0.105257,1
9,42.970469,0.28664,0.110039,0.007389,False,75.0,auto,1000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.130325,0.268117,0.387943,0.262128,0.105257,1
19,86.881147,1.204756,0.172203,0.005418,False,100.0,auto,2000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.130325,0.259134,0.387943,0.259134,0.105172,4
3,90.045841,1.366819,0.181515,0.010587,False,50.0,auto,2000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.038261,0.268117,0.387943,0.23144,0.145094,5
11,86.889452,1.134245,0.182184,0.009844,False,75.0,auto,2000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.038261,0.268117,0.387943,0.23144,0.145094,5
17,44.750379,0.497824,0.102229,0.011462,False,100.0,auto,1000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.038261,0.320508,0.333914,0.230894,0.136322,7
0,21.927651,0.161027,0.056848,0.000814,False,50.0,auto,500,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.130325,0.186087,0.333914,0.216775,0.085901,8
1,44.09288,0.397184,0.104719,0.002936,False,50.0,auto,1000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.038261,0.204579,0.387943,0.210261,0.142814,9
25,43.2951,0.420107,0.091195,0.002739,False,,auto,1000,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.038261,0.186087,0.387943,0.204097,0.143324,10


In [13]:
pred(train2,test2,'CO: 4',params=random_grid)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  7.0min
[Parallel(n_jobs=4)]: Done  96 out of  96 | elapsed: 15.3min finished


{'bootstrap': False, 'max_depth': 50, 'max_features': 'auto', 'n_estimators': 1000}
CO: 4


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
1,56.344647,2.909687,0.097243,0.004941,False,50.0,auto,1000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.365318,0.0,0.0,0.121773,0.172212,1
3,110.945016,3.980346,0.177858,0.004179,False,50.0,auto,2000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.365318,0.0,0.0,0.121773,0.172212,1
16,27.695567,0.847186,0.059175,0.004485,False,100.0,auto,500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.365318,0.0,0.0,0.121773,0.172212,1
24,27.590906,1.371614,0.058178,0.00683,False,,auto,500,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.365318,0.0,0.0,0.121773,0.172212,1
26,81.728626,2.52678,0.147938,0.014252,False,,auto,1500,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.365318,0.0,0.0,0.121773,0.172212,1
25,54.84546,1.768936,0.098722,0.008512,False,,auto,1000,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.365318,0.0,0.0,0.121773,0.172212,1
9,55.376845,1.779989,0.099068,0.00339,False,75.0,auto,1000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.365318,0.0,0.0,0.121773,0.172212,1
19,110.298403,4.09465,0.202961,0.026879,False,100.0,auto,2000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.365318,0.0,0.0,0.121773,0.172212,1
0,27.33437,1.32257,0.05984,0.004309,False,50.0,auto,500,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.256776,0.0,0.0,0.085592,0.121046,9
18,82.064589,3.195877,0.149933,0.015329,False,100.0,auto,1500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.256776,0.0,0.0,0.085592,0.121046,9


In [14]:
pred(train2,test2,'CO: 5',params=random_grid)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done  96 out of  96 | elapsed:  8.9min finished


{'bootstrap': False, 'max_depth': 50, 'max_features': 'auto', 'n_estimators': 1500}
CO: 5


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
2,46.190253,0.476925,0.165557,0.016265,False,50.0,auto,1500,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.759106,0.855442,0.741068,0.785205,0.050208,1
3,61.561639,0.665571,0.182851,0.006091,False,50.0,auto,2000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.759106,0.830968,0.741068,0.777048,0.038832,2
18,46.313205,0.776337,0.13331,0.001695,False,100.0,auto,1500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.782833,0.806776,0.737791,0.7758,0.028599,3
9,30.598493,0.262732,0.102725,0.000814,False,75.0,auto,1000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.759106,0.806776,0.745978,0.77062,0.026122,4
25,31.289043,0.245331,0.098737,0.002821,False,,auto,1000,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.759106,0.855377,0.693528,0.769337,0.06647,5
11,62.819538,1.863457,0.186846,0.011994,False,75.0,auto,2000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.735563,0.832208,0.737791,0.768521,0.045043,6
26,46.693427,0.237897,0.13629,0.002635,False,,auto,1500,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.735563,0.855377,0.712838,0.767926,0.062529,7
10,45.5787,0.163454,0.156418,0.002454,False,75.0,auto,1500,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.735563,0.807256,0.737791,0.760203,0.033284,8
17,30.646886,0.168053,0.102381,0.00401,False,100.0,auto,1000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.759106,0.78259,0.737791,0.759829,0.018296,9
16,15.525431,0.057338,0.054853,0.000814,False,100.0,auto,500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.759106,0.734212,0.785517,0.759612,0.020948,10


In [15]:
pred(train2,test2,'CO: 6',params=random_grid)

Fitting 3 folds for each of 32 candidates, totalling 96 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.4min
[Parallel(n_jobs=4)]: Done  96 out of  96 | elapsed:  9.9min finished


{'bootstrap': False, 'max_depth': 50, 'max_features': 'log2', 'n_estimators': 1000}
CO: 6


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_bootstrap,param_max_depth,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
5,3.644123,0.311755,0.106382,0.005777,False,50.0,log2,1000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.033514,0.100542,-0.005976,0.042693,0.043967,1
2,52.669879,1.023609,0.154919,0.017509,False,50.0,auto,1500,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.033514,0.119523,-0.043919,0.036373,0.066755,2
4,1.752534,0.040186,0.059344,0.002479,False,50.0,log2,500,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.057789,0.175803,-0.137247,0.032115,0.129085,3
1,36.352734,0.317327,0.105052,0.004178,False,50.0,auto,1000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.02285,0.025181,0.039841,0.029291,0.007521,4
7,6.729797,0.212728,0.198778,0.008692,False,50.0,log2,2000,"{'bootstrap': False, 'max_depth': 50, 'max_fea...",0.02285,0.104732,-0.073042,0.01818,0.072651,5
28,1.816705,0.053483,0.062859,0.005715,False,,log2,500,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.02285,0.104732,-0.073042,0.01818,0.072651,5
18,52.162994,0.527385,0.158624,0.020286,False,100.0,auto,1500,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",0.033514,0.114345,-0.107571,0.01343,0.091703,7
11,67.574107,0.600053,0.178206,0.006908,False,75.0,auto,2000,"{'bootstrap': False, 'max_depth': 75, 'max_fea...",0.033514,0.114345,-0.107571,0.01343,0.091703,7
21,3.339805,0.054215,0.107035,0.004647,False,100.0,log2,1000,"{'bootstrap': False, 'max_depth': 100, 'max_fe...",-0.037703,0.104732,-0.030482,0.012182,0.065509,9
24,16.870529,0.088949,0.05552,0.001726,False,,auto,500,"{'bootstrap': False, 'max_depth': None, 'max_f...",0.05164,0.0,-0.030482,0.007053,0.033895,10


In [16]:
# pred(train1,test1,'CO: 1')
# pred(train1,test1,'CO: 2')

# pred(train2,test2,'CO: 3')
# pred(train2,test2,'CO: 4')
# pred(train2,test2,'CO: 5')
# pred(train2,test2,'CO: 6')

In [17]:
print(best_params)

[{'bootstrap': False, 'max_depth': 28, 'max_features': 'auto', 'n_estimators': 100}, {'bootstrap': False, 'max_depth': 52, 'max_features': 'auto', 'n_estimators': 1525}, {'bootstrap': False, 'max_depth': 75, 'max_features': 'auto', 'n_estimators': 1000}, {'bootstrap': False, 'max_depth': 50, 'max_features': 'auto', 'n_estimators': 1000}, {'bootstrap': False, 'max_depth': 50, 'max_features': 'auto', 'n_estimators': 1500}, {'bootstrap': False, 'max_depth': 50, 'max_features': 'log2', 'n_estimators': 1000}]


In [18]:
if(VALIDATION):
    print(matthews_corrcoef(ytrue,ypred))
else:
#     submission = pd.DataFrame(ypred,columns=['Predicted'])
#     submission.index.name = 'Id'
    submission = pd.read_csv('dummy_submission.csv')
    submission.Predicted = np.array(ypred,dtype=int)
    submission.to_csv('RFGridCV.csv',index=False)
    print(submission.shape)

(1056, 2)
