In [72]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

In [73]:
# Load Data Train csv
train = pickle.load(open('Train_Test/train_bams.p','rb'))
test = pickle.load(open('Train_Test/test_bams.p','rb'))

In [74]:
# Load Pickle POLY
label_25 = pickle.load(open("desc/desc25.pkl", "rb"))
label_25

['SHCsatu',
 'minHdsCH',
 'mindsN',
 'maxHdsCH',
 'maxssCH2',
 'RDF30i',
 'RDF30i',
 'SdsN',
 'GATS4i',
 'AATSC8i',
 'GATS2s',
 'GATS2s',
 'VCH-7',
 'AATSC6s',
 'GATS2e',
 'WTPT-4',
 'MATS3p',
 'GATS4p',
 'SRW5',
 'ATSC2s',
 'n3Ring',
 'VCH-3',
 'VCH-3',
 'AATS8v',
 'AATS8v']

In [75]:
x_train = train.loc[:,label_25]
x_test = test.loc[:,label_25]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [76]:
x_train.shape, x_test.shape, y_train, y_test

((107, 25),
 (27, 25),
      pIC50
 125  5.958
 74   5.854
 1    6.292
 110  7.921
 99   5.854
 ..     ...
 68   6.180
 95   5.585
 32   8.699
 124  7.229
 131  7.259
 
 [107 rows x 1 columns],
      pIC50
 80   5.252
 104  5.367
 94   5.319
 71   6.666
 51   8.569
 3    6.310
 28   7.824
 84   6.114
 126  6.958
 103  6.292
 85   5.886
 25   8.398
 48   7.921
 10   6.393
 0    5.842
 52   5.009
 35   8.000
 98   5.444
 108  6.244
 114  8.108
 13   6.224
 40   8.432
 112  6.409
 109  7.081
 26   8.495
 43   8.456
 93   5.721)

In [77]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [78]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'degree' : [2,3,4,5],
    'epsilon' : [0.1,1,10,100,1000]
}

In [79]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'poly'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='poly',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4, 5],
                         'epsilon': [0.1, 1, 10, 100, 1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [80]:
grid.best_params_

{'C': 10, 'degree': 2, 'epsilon': 0.1}

In [81]:
pickle.dump(grid.best_params_, open( "data_Poly/params_poly_25.p", "wb" ))

In [82]:
# model
model = SVR(kernel = 'poly', C = grid.best_params_['C'],
           degree = grid.best_params_['degree'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=10, cache_size=200, coef0=0.0, degree=2, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [83]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [84]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [85]:
train_r2

0.9210939466475632

In [86]:
pickle.dump(train_r2, open( "data_Poly/train_poly_25.p", "wb" ))

In [87]:
test_r2

0.6914944912627656

In [88]:
pickle.dump(test_r2, open( "data_Poly/test_poly_25.p", "wb" ))

In [89]:
# # Load Pickle 
params5 = pickle.load(open("data_Poly/params_poly_5.p", "rb"))
train_poly5 = pickle.load(open("data_Poly/train_poly_5.p", "rb"))
test_poly5 = pickle.load(open("data_Poly/test_poly_5.p", "rb"))

params10 = pickle.load(open("data_Poly/params_poly_10.p", "rb"))
train_poly10 = pickle.load(open("data_Poly/train_poly_10.p", "rb"))
test_poly10 = pickle.load(open("data_Poly/test_poly_10.p", "rb"))

params15 = pickle.load(open("data_Poly/params_poly_15.p", "rb"))
train_poly15 = pickle.load(open("data_Poly/train_poly_15.p", "rb"))
test_poly15 = pickle.load(open("data_Poly/test_poly_15.p", "rb"))

params20 = pickle.load(open("data_Poly/params_poly_20.p", "rb"))
train_poly20 = pickle.load(open("data_Poly/train_poly_20.p", "rb"))
test_poly20 = pickle.load(open("data_Poly/test_poly_20.p", "rb"))

params25 = pickle.load(open("data_Poly/params_poly_25.p", "rb"))
train_poly25 = pickle.load(open("data_Poly/train_poly_25.p", "rb"))
test_poly25 = pickle.load(open("data_Poly/test_poly_25.p", "rb"))

In [90]:
params5, train_poly5, test_poly5

({'C': 1, 'degree': 2, 'epsilon': 1}, 0.4509564370484662, 0.46520422813501183)

In [91]:
params10, train_poly10, test_poly10

({'C': 1, 'degree': 2, 'epsilon': 0.1}, 0.6989259307595646, 0.6953636393790635)

In [92]:
params15, train_poly15, test_poly15

({'C': 1, 'degree': 3, 'epsilon': 0.1}, 0.8579029475621934, 0.5316570113810242)

In [93]:
params20, train_poly20, test_poly20

({'C': 1, 'degree': 2, 'epsilon': 0.1}, 0.856738957902528, 0.6518792140590215)

In [94]:
params25, train_poly25, test_poly25

({'C': 10, 'degree': 2, 'epsilon': 0.1},
 0.9210939466475632,
 0.6914944912627656)