In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

In [2]:
# Load Data Train csv
train = pd.read_csv(r'data/new_train.csv')
test = pd.read_csv(r'data/new_test.csv')

In [3]:
# Load Pickle 
label_10 = pickle.load(open("data/desc_10.pkl", "rb"))

In [4]:
label_10

['MATS8i',
 'GATS7i',
 'VCH-6',
 'nssAsH',
 'SdssC',
 'SddsAs',
 'minssssBem',
 'maxsPH2',
 'nAtomLAC',
 'TDB6e']

In [5]:
x_train = train.loc[:,label_10]
x_test = test.loc[:,label_10]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [6]:
x_train.shape, x_test.shape, y_train, y_test

((107, 10),
 (27, 10),
      pIC50
 0    6.374
 1    5.678
 2    8.000
 3    6.824
 4    8.108
 ..     ...
 102  7.387
 103  6.821
 104  6.652
 105  5.958
 106  5.509
 
 [107 rows x 1 columns],
     pIC50
 0   6.228
 1   7.569
 2   8.201
 3   7.081
 4   7.796
 5   5.602
 6   6.167
 7   7.026
 8   6.358
 9   8.319
 10  5.469
 11  7.921
 12  5.051
 13  6.066
 14  8.658
 15  6.292
 16  5.886
 17  7.301
 18  6.393
 19  8.167
 20  6.114
 21  5.745
 22  6.215
 23  7.221
 24  8.495
 25  8.456
 26  6.541)

In [7]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [8]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'gamma' : ['auto','scale'],
    'epsilon' : [0.1,1,10,100,1000]
}

In [9]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'rbf'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'epsilon': [0.1, 1, 10, 100, 1000],
                         'gamma': ['auto', 'scale']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [10]:
grid.best_params_

{'C': 1, 'epsilon': 0.1, 'gamma': 'scale'}

In [11]:
pickle.dump(grid.best_params_, open( "r_data/rbf_params_desc_10.p", "wb" ))

In [12]:
# model
model = SVR(kernel = 'rbf', C = grid.best_params_['C'],
           gamma = grid.best_params_['gamma'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [13]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [14]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [15]:
train_r2

0.7216140059673426

In [16]:
pickle.dump(train_r2, open( "r_data/train_rbf_10.p", "wb" ))

In [17]:
test_r2

0.6717825216851132

In [18]:
pickle.dump(test_r2, open( "r_data/test_rbf_10.p", "wb" ))

In [19]:
# Load Pickle 
params5 = pickle.load(open("r_data/rbf_params_desc_5.p", "rb"))
train_rbf5 = pickle.load(open("r_data/train_rbf_5.p", "rb"))
test_rbf5 = pickle.load(open("r_data/test_rbf_5.p", "rb"))

params10 = pickle.load(open("r_data/rbf_params_desc_10.p", "rb"))
train_rbf10 = pickle.load(open("r_data/train_rbf_10.p", "rb"))
test_rbf10 = pickle.load(open("r_data/test_rbf_10.p", "rb"))

params15 = pickle.load(open("r_data/rbf_params_desc_15.p", "rb"))
train_rbf15 = pickle.load(open("r_data/train_rbf_15.p", "rb"))
test_rbf15 = pickle.load(open("r_data/test_rbf_15.p", "rb"))

params20 = pickle.load(open("r_data/rbf_params_desc_20.p", "rb"))
train_rbf20 = pickle.load(open("r_data/train_rbf_20.p", "rb"))
test_rbf20 = pickle.load(open("r_data/test_rbf_20.p", "rb"))

params25 = pickle.load(open("r_data/rbf_params_desc_25.p", "rb"))
train_rbf25 = pickle.load(open("r_data/train_rbf_25.p", "rb"))
test_rbf25 = pickle.load(open("r_data/test_rbf_25.p", "rb"))

In [20]:
params5, train_rbf5, test_rbf5

({'C': 10, 'epsilon': 0.1, 'gamma': 'auto'},
 0.529229479726216,
 0.5311655328369775)

In [21]:
params10, train_rbf10, test_rbf10

({'C': 1, 'epsilon': 0.1, 'gamma': 'scale'},
 0.7216140059673426,
 0.6717825216851132)

In [22]:
params15, train_rbf15, test_rbf15

({'C': 1, 'epsilon': 0.1, 'gamma': 'scale'},
 0.8417824578187709,
 0.8364965167789107)

In [23]:
params20, train_rbf20, test_rbf20

({'C': 10, 'epsilon': 0.1, 'gamma': 'scale'},
 0.92175931352501,
 0.7564997260630253)

In [24]:
params25, train_rbf25, test_rbf25

({'C': 1, 'epsilon': 0.1, 'gamma': 'scale'},
 0.7597027991621441,
 0.7566834406821884)