In [69]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from joblib import dump, load

In [70]:
# Load Data Train csv
train = pickle.load(open('Train_Test/train_bams.p','rb'))
test = pickle.load(open('Train_Test/test_bams.p','rb'))

In [71]:
# Load Pickle POLY
label_25 = pickle.load(open("desc/desc25.pkl", "rb"))
label_25

['SHCsatu',
 'minHdsCH',
 'mindsN',
 'maxHdsCH',
 'maxssCH2',
 'RDF30i',
 'RDF30i',
 'SdsN',
 'GATS4i',
 'AATSC8i',
 'GATS2s',
 'GATS2s',
 'VCH-7',
 'AATSC6s',
 'GATS2e',
 'WTPT-4',
 'MATS3p',
 'GATS4p',
 'SRW5',
 'ATSC2s',
 'n3Ring',
 'VCH-3',
 'VCH-3',
 'AATS8v',
 'AATS8v']

In [72]:
x_train = train.loc[:,label_25]
x_test = test.loc[:,label_25]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [73]:
x_train.shape, x_test.shape, y_train, y_test

((107, 25),
 (27, 25),
      pIC50
 125  5.958
 74   5.854
 1    6.292
 110  7.921
 99   5.854
 ..     ...
 68   6.180
 95   5.585
 32   8.699
 124  7.229
 131  7.259
 
 [107 rows x 1 columns],
      pIC50
 80   5.252
 104  5.367
 94   5.319
 71   6.666
 51   8.569
 3    6.310
 28   7.824
 84   6.114
 126  6.958
 103  6.292
 85   5.886
 25   8.398
 48   7.921
 10   6.393
 0    5.842
 52   5.009
 35   8.000
 98   5.444
 108  6.244
 114  8.108
 13   6.224
 40   8.432
 112  6.409
 109  7.081
 26   8.495
 43   8.456
 93   5.721)

In [74]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [75]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'degree' : [1],
    'epsilon' : [0.1,1,10,100,1000]
}

In [76]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'linear'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='linear',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000], 'degree': [1],
                         'epsilon': [0.1, 1, 10, 100, 1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [77]:
grid.best_params_

{'C': 1, 'degree': 1, 'epsilon': 0.1}

In [78]:
pickle.dump(grid.best_params_, open( "data_Linear/params_linear_25.p", "wb" ))

In [79]:
# model
model = SVR(kernel = 'linear', C = grid.best_params_['C'],
           degree = grid.best_params_['degree'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1, cache_size=200, coef0=0.0, degree=1, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [80]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [81]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [82]:
train_r2

0.7620530500771677

In [83]:
pickle.dump(train_r2, open( "data_Linear/train_linear_25.p", "wb" ))

In [84]:
test_r2

0.7475349096694942

In [85]:
pickle.dump(test_r2, open( "data_Linear/test_linear_25.p", "wb" ))

In [86]:
# Load Pickle 
params5 = pickle.load(open("data_Linear/params_linear_5.p", "rb"))
train_linear5 = pickle.load(open("data_Linear/train_linear_5.p", "rb"))
test_linear5 = pickle.load(open("data_Linear/test_linear_5.p", "rb"))

params10 = pickle.load(open("data_Linear/params_linear_10.p", "rb"))
train_linear10 = pickle.load(open("data_Linear/train_linear_10.p", "rb"))
test_linear10 = pickle.load(open("data_Linear/test_linear_10.p", "rb"))

params15 = pickle.load(open("data_Linear/params_linear_15.p", "rb"))
train_linear15 = pickle.load(open("data_Linear/train_linear_15.p", "rb"))
test_linear15 = pickle.load(open("data_Linear/test_linear_15.p", "rb"))

params20 = pickle.load(open("data_Linear/params_linear_20.p", "rb"))
train_linear20 = pickle.load(open("data_Linear/train_linear_20.p", "rb"))
test_linear20 = pickle.load(open("data_Linear/test_linear_20.p", "rb"))

params25 = pickle.load(open("data_Linear/params_linear_25.p", "rb"))
train_linear25 = pickle.load(open("data_Linear/train_linear_25.p", "rb"))
test_linear25 = pickle.load(open("data_Linear/test_linear_25.p", "rb"))

In [87]:
params5, train_linear5, test_linear5

({'C': 100, 'degree': 1, 'epsilon': 1},
 0.3851801385807617,
 0.48651668516336166)

In [88]:
params10, train_linear10, test_linear10

({'C': 1, 'degree': 1, 'epsilon': 0.1}, 0.6749101987359583, 0.7405215786165336)

In [89]:
params15, train_linear15, test_linear15

({'C': 10, 'degree': 1, 'epsilon': 1}, 0.6230245035031439, 0.5823987848236698)

In [90]:
params20, train_linear20, test_linear20

({'C': 1, 'degree': 1, 'epsilon': 0.1}, 0.7609616472480392, 0.6895075494156417)

In [91]:
params25, train_linear25, test_linear25

({'C': 1, 'degree': 1, 'epsilon': 0.1}, 0.7620530500771677, 0.7475349096694942)