In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

In [2]:
# Load Data Train csv
train = pickle.load(open('data/train_bams.p','rb'))
test = pickle.load(open('data/test_bams.p','rb'))

In [3]:
# Load Pickle POLY
label_25 = pickle.load(open("data/desc25.pkl", "rb"))
label_25

['VCH-5',
 'VCH-6',
 'MATS6e',
 'maxdsN',
 'nHCsats',
 'maxdsCH',
 'SssssC',
 'MDEN-23',
 'nssssC',
 'nHBAcc_Lipinski',
 'AATSC8i',
 'RDF30e',
 'RDF30e',
 'RDF30e',
 'maxHCsats',
 'GATS6m',
 'ATSC2i',
 'ATSC6s',
 'TDB9v',
 'SCH-5',
 'maxHBint4',
 'n3Ring',
 'SRW3',
 'maxHBint8',
 'SHCsats']

In [4]:
x_train = train.loc[:,label_25]
x_test = test.loc[:,label_25]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [5]:
x_train.shape, x_test.shape, y_train, y_test

((107, 25),
 (27, 25),
      pIC50
 125  5.958
 74   5.854
 1    6.292
 110  7.921
 99   5.854
 ..     ...
 68   6.180
 95   5.585
 32   8.699
 124  7.229
 131  7.259
 
 [107 rows x 1 columns],
      pIC50
 80   5.252
 104  5.367
 94   5.319
 71   6.666
 51   8.569
 3    6.310
 28   7.824
 84   6.114
 126  6.958
 103  6.292
 85   5.886
 25   8.398
 48   7.921
 10   6.393
 0    5.842
 52   5.009
 35   8.000
 98   5.444
 108  6.244
 114  8.108
 13   6.224
 40   8.432
 112  6.409
 109  7.081
 26   8.495
 43   8.456
 93   5.721)

In [6]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [7]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'degree' : [2,3,4,5],
    'epsilon' : [0.1,1,10,100,1000]
}

In [8]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'poly'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='poly',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4, 5],
                         'epsilon': [0.1, 1, 10, 100, 1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [9]:
grid.best_params_

{'C': 1, 'degree': 2, 'epsilon': 0.1}

In [10]:
# pickle.dump(grid.best_params_, open( "p_data/params_poly_25.p", "wb" ))

In [11]:
# model
model = SVR(kernel = 'poly', C = grid.best_params_['C'],
           degree = grid.best_params_['degree'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1, cache_size=200, coef0=0.0, degree=2, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [12]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [13]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [14]:
train_r2

0.8664205970491113

In [15]:
# pickle.dump(train_r2, open( "p_data/train_poly_25.p", "wb" ))

In [16]:
test_r2

0.7860846081005068

In [17]:
# pickle.dump(test_r2, open( "p_data/test_poly_25.p", "wb" ))

In [25]:
# # Load Pickle 
params5 = pickle.load(open("p_data/params_poly_5.p", "rb"))
train_poly5 = pickle.load(open("p_data/train_poly_5.p", "rb"))
test_poly5 = pickle.load(open("p_data/test_poly_5.p", "rb"))

params10 = pickle.load(open("p_data/params_poly_10.p", "rb"))
train_poly10 = pickle.load(open("p_data/train_poly_10.p", "rb"))
test_poly10 = pickle.load(open("p_data/test_poly_10.p", "rb"))

params15 = pickle.load(open("p_data/params_poly_15.p", "rb"))
train_poly15 = pickle.load(open("p_data/train_poly_15.p", "rb"))
test_poly15 = pickle.load(open("p_data/test_poly_15.p", "rb"))

params20 = pickle.load(open("p_data/params_poly_20.p", "rb"))
train_poly20 = pickle.load(open("p_data/train_poly_20.p", "rb"))
test_poly20 = pickle.load(open("p_data/test_poly_20.p", "rb"))

params25 = pickle.load(open("p_data/params_poly_25.p", "rb"))
train_poly25 = pickle.load(open("p_data/train_poly_25.p", "rb"))
test_poly25 = pickle.load(open("p_data/test_poly_25.p", "rb"))

In [26]:
params5, train_poly5, test_poly5

({'C': 1, 'degree': 2, 'epsilon': 0.1}, 0.7310928407366462, 0.4620020432823594)

In [27]:
params10, train_poly10, test_poly10

({'C': 1, 'degree': 2, 'epsilon': 1}, 0.5302481342651284, 0.49747918272137515)

In [28]:
params15, train_poly15, test_poly15

({'C': 1, 'degree': 2, 'epsilon': 0.1}, 0.8238757645478595, 0.7814013436677931)

In [29]:
params20, train_poly20, test_poly20

({'C': 1, 'degree': 2, 'epsilon': 1}, 0.6013006142066266, 0.5374689334977505)

In [30]:
params25, train_poly25, test_poly25

({'C': 1, 'degree': 2, 'epsilon': 0.1}, 0.8664205970491113, 0.7860846081005068)