In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm

In [2]:
# Load Data Train csv
train = pickle.load(open('Train_Test/train_bams.p','rb'))
test = pickle.load(open('Train_Test/test_bams.p','rb'))

In [3]:
train

Unnamed: 0,nAcid,ALogP,ALogp2,AMR,apol,naAromAtom,nAromBond,nAtom,nHeavyAtom,nH,...,P2s,E1s,E2s,E3s,Ts,As,Vs,Ks,Ds,pIC50
125,0,0.0382,0.001459,138.1194,72.933239,17,18,59,36,23,...,0.174261,0.530126,0.512059,0.408730,31.318280,174.085678,385.225921,0.674239,1.450915,5.958
74,1,-0.7970,0.635209,77.2375,41.352102,6,6,37,23,14,...,0.101150,0.592622,0.550249,0.332241,18.177672,43.726934,87.066620,0.775200,1.475112,5.854
1,0,-0.0693,0.004802,113.5328,61.298032,0,0,54,30,24,...,0.124809,0.554310,0.365546,0.392117,25.097284,103.740040,238.275628,0.709610,1.311973,6.292
110,0,-2.5774,6.642991,128.7353,70.286825,17,18,60,35,25,...,0.245975,0.488948,0.408430,0.497519,24.554292,143.408571,358.730867,0.514894,1.394896,7.921
99,1,-0.4080,0.166464,117.9068,62.001653,12,12,54,33,21,...,0.325881,0.553109,0.498577,0.513381,21.955191,131.000413,332.588534,0.377649,1.565066,5.854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,0,-2.0910,4.372281,119.9176,64.300032,12,12,57,33,24,...,0.119319,0.611371,0.550971,0.478145,25.754579,105.897604,242.885706,0.720412,1.640487,6.180
95,0,-0.1589,0.025249,142.5628,78.271583,12,12,70,39,31,...,0.153062,0.639028,0.396740,0.351866,39.688457,278.796848,771.668789,0.680110,1.387634,5.585
32,0,-0.4082,0.166627,106.0673,58.637653,12,12,51,30,21,...,0.344943,0.576989,0.426777,0.404989,17.907066,96.524233,263.097835,0.277946,1.408755,8.699
124,0,-0.8291,0.687407,140.0467,75.315618,17,18,63,37,26,...,0.166500,0.557440,0.485636,0.409972,31.807655,174.995325,389.991476,0.685353,1.453048,7.229


In [4]:
# Load Pickle 
label_25 = pickle.load(open("desc/new_desc25.pkl", "rb"))
label_25

['C2SP3',
 'maxssCH2',
 'AATSC6e',
 'RDF30u',
 'MATS2i',
 'RDF30e',
 'ATSC6m',
 'SpMax3_Bhs',
 'SRW5',
 'MDEC-34',
 'minHBint4',
 'TDB4i',
 'nssCH2',
 'SHCsats',
 'SHBint4',
 'SC-4',
 'StN',
 'ntsC',
 'maxtsC',
 'ATSC4p',
 'ATSC4p',
 'SsF',
 'SC-6',
 'MDEC-24',
 'LipoaffinityIndex']

In [5]:
x_train = train.loc[:,label_25]
x_test = test.loc[:,label_25]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [6]:
x_test.shape

(27, 25)

In [7]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [8]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'degree' : [2,3,4,5],
    'epsilon' : [0.1,1,10,100,1000]
}

In [9]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'poly'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='poly',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4, 5],
                         'epsilon': [0.1, 1, 10, 100, 1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [10]:
grid.best_params_

{'C': 1, 'degree': 3, 'epsilon': 0.1}

In [11]:
pickle.dump(grid.best_params_, open( "data_Poly/new_params_poly_25.p", "wb" ))

In [12]:
# model
model = SVR(kernel = 'poly', C = grid.best_params_['C'],
           degree = grid.best_params_['degree'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [13]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [14]:
y_train_pred

array([7.27893014, 5.75434228, 6.39183009, 7.48947913, 5.60252644,
       8.53798416, 7.02912403, 8.43088412, 5.63120197, 6.97487923,
       7.46870273, 6.64095026, 7.83401467, 6.25585076, 8.26700644,
       5.76832761, 6.89971992, 5.85049165, 5.53199835, 8.46251566,
       8.06983315, 8.85891359, 7.26978544, 6.73735353, 8.98613179,
       6.37863155, 5.66880564, 5.92382909, 8.72378739, 6.21559431,
       6.31733491, 5.58442377, 5.79968321, 5.39811966, 6.29682512,
       5.91258081, 6.41024795, 6.16884426, 6.57672577, 5.56792125,
       5.67000123, 5.72097921, 6.82134846, 5.80458848, 6.22515125,
       5.96598215, 5.65038131, 5.72223904, 5.62298909, 6.28538737,
       5.79918896, 5.79070995, 6.06736546, 5.15124433, 6.82114543,
       6.89607473, 7.9308452 , 7.0737685 , 5.20914433, 7.07635105,
       6.54512989, 6.33811972, 5.61873426, 6.32813354, 5.63741444,
       5.55829499, 8.15253885, 6.67748232, 6.32389927, 8.27311125,
       6.14886947, 6.20877356, 5.6305298 , 6.05512211, 6.67907

In [15]:
pickle.dump(y_train_pred, open( "data_Poly/y_train_pred_poly25.p", "wb" ))

In [16]:
y_test_pred

array([5.88945877, 5.52245009, 5.70542332, 6.1434851 , 8.85438455,
       7.04137443, 8.64428534, 5.61984479, 6.94808608, 5.61759899,
       5.69222887, 8.93287455, 8.94795412, 6.69539289, 5.90470819,
       5.79257981, 9.51481226, 5.403187  , 6.32973217, 7.52155431,
       6.61289879, 8.02911548, 7.07841396, 6.47193712, 8.67922999,
       7.67940285, 5.66819163])

In [17]:
pickle.dump(y_test_pred, open( "data_Poly/y_test_pred_poly25.p", "wb" ))

In [18]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [19]:
train_r2

0.931661350504696

In [20]:
pickle.dump(train_r2, open( "data_Poly/new_train_poly_25.p", "wb" ))

In [21]:
test_r2

0.7406481047399054

In [22]:
pickle.dump(test_r2, open( "data_Poly/new_test_poly_25.p", "wb" ))

In [23]:
# # Load Pickle 
params5 = pickle.load(open("data_Poly/new_params_poly_5.p", "rb"))
train_poly5 = pickle.load(open("data_Poly/new_train_poly_5.p", "rb"))
test_poly5 = pickle.load(open("data_Poly/new_test_poly_5.p", "rb"))
train_pred_poly5 = pickle.load(open("data_Poly/y_train_pred_poly5.p", "rb"))
test_pred_poly5 = pickle.load(open("data_Poly/y_test_pred_poly5.p", "rb"))

params10 = pickle.load(open("data_Poly/new_params_poly_10.p", "rb"))
train_poly10 = pickle.load(open("data_Poly/new_train_poly_10.p", "rb"))
test_poly10 = pickle.load(open("data_Poly/new_test_poly_10.p", "rb"))
train_pred_poly10 = pickle.load(open("data_Poly/y_train_pred_poly10.p", "rb"))
test_pred_poly10 = pickle.load(open("data_Poly/y_test_pred_poly10.p", "rb"))

params15 = pickle.load(open("data_Poly/new_params_poly_15.p", "rb"))
train_poly15 = pickle.load(open("data_Poly/new_train_poly_15.p", "rb"))
test_poly15 = pickle.load(open("data_Poly/new_test_poly_15.p", "rb"))
train_pred_poly15 = pickle.load(open("data_Poly/y_train_pred_poly15.p", "rb"))
test_pred_poly15 = pickle.load(open("data_Poly/y_test_pred_poly15.p", "rb"))

params20 = pickle.load(open("data_Poly/new_params_poly_20.p", "rb"))
train_poly20 = pickle.load(open("data_Poly/new_train_poly_20.p", "rb"))
test_poly20 = pickle.load(open("data_Poly/new_test_poly_20.p", "rb"))
train_pred_poly20 = pickle.load(open("data_Poly/y_train_pred_poly20.p", "rb"))
test_pred_poly20 = pickle.load(open("data_Poly/y_test_pred_poly20.p", "rb"))

params25 = pickle.load(open("data_Poly/new_params_poly_25.p", "rb"))
train_poly25 = pickle.load(open("data_Poly/new_train_poly_25.p", "rb"))
test_poly25 = pickle.load(open("data_Poly/new_test_poly_25.p", "rb"))
train_pred_poly25 = pickle.load(open("data_Poly/y_train_pred_poly25.p", "rb"))
test_pred_poly25 = pickle.load(open("data_Poly/y_test_pred_poly25.p", "rb"))

In [24]:
params5, train_poly5, test_poly5, train_pred_poly5, test_pred_poly5 

({'C': 0.1, 'degree': 2, 'epsilon': 1},
 0.5019202907335509,
 0.6067007704581142,
 array([6.9580642 , 6.13485099, 6.43295203, 6.60566814, 6.26101431,
        7.30228369, 6.4925248 , 7.86732744, 6.23861714, 6.8205084 ,
        7.28285178, 6.65835851, 7.74975298, 6.1712018 , 7.12332089,
        6.22875812, 7.0096708 , 6.44931812, 6.38018275, 7.92708047,
        6.95435002, 7.70268314, 6.44319943, 6.44052987, 7.42896397,
        6.55969478, 6.23861714, 6.88877182, 7.82357494, 6.77771697,
        6.64161945, 6.16369218, 6.21326229, 6.2285749 , 6.41883537,
        6.51062261, 6.62468097, 6.21006051, 6.47038201, 6.22039243,
        6.21364999, 6.27655587, 7.34111241, 6.13979397, 6.51442771,
        6.26371472, 6.24508884, 6.27764905, 6.21314034, 6.21664872,
        6.2336765 , 6.16352414, 6.35049574, 6.36895046, 7.06159961,
        6.59172507, 7.39992336, 6.5847292 , 6.29692959, 6.92986207,
        6.55204276, 6.15306698, 6.28600526, 6.81043687, 6.19111127,
        6.18956604, 8.0427213 , 7.

In [25]:
params10, train_poly10, test_poly10, train_pred_poly10, test_pred_poly10

({'C': 0.1, 'degree': 3, 'epsilon': 1},
 0.6289558681779742,
 0.611463485528541,
 array([6.95756127, 5.9461098 , 6.5074985 , 6.55914474, 6.01682361,
        7.63797168, 7.22451431, 7.47471695, 6.47488856, 6.50829111,
        6.56877264, 6.54196031, 6.86439769, 6.25528471, 9.16686103,
        5.99633288, 6.99494162, 6.4691895 , 6.06651998, 7.7372334 ,
        8.0656578 , 7.85740725, 6.77815525, 7.15032224, 7.88620832,
        6.46447237, 6.47488856, 7.01339573, 8.27734383, 6.46931814,
        6.01560643, 6.11673218, 6.45663189, 6.04829142, 6.14912239,
        6.27098392, 6.54471625, 6.56104383, 6.57136211, 5.96175569,
        5.95046343, 6.54056113, 7.72122729, 6.23124541, 6.33800524,
        6.24484741, 6.15322716, 6.03542799, 6.15509135, 6.58618752,
        6.03730957, 6.05218714, 6.5246911 , 6.04887304, 6.53663373,
        7.35177967, 7.20086169, 7.16809172, 6.04132643, 6.47322524,
        6.34755091, 6.08223172, 6.07416496, 6.14064645, 5.98293223,
        5.93500091, 7.76827715, 6.4

In [26]:
params15, train_poly15, test_poly15, train_pred_poly15, test_pred_poly15

({'C': 10, 'degree': 2, 'epsilon': 0.1},
 0.8833293558756712,
 0.7646203039267745,
 array([6.88153726, 5.75429238, 6.16581299, 7.02464767, 5.58917304,
        8.53812287, 6.37882987, 8.55831574, 6.06709932, 7.27569513,
        7.14103462, 6.64059013, 8.16804121, 6.34202547, 8.26730986,
        5.86220152, 6.89967656, 6.05864074, 5.53186707, 8.21926   ,
        7.44100286, 8.85931916, 7.06391601, 6.48468151, 8.98634985,
        6.48657199, 6.01341327, 5.92432624, 8.72420615, 6.09719128,
        6.63837176, 5.68352819, 5.61433728, 5.28142561, 6.56563153,
        6.3803036 , 6.75694463, 6.16923952, 6.43819722, 5.36554971,
        5.65446324, 5.94119249, 6.82107707, 5.89611633, 6.39527789,
        5.93046953, 5.62385562, 5.59345237, 5.62305586, 5.85528224,
        6.01915268, 5.84514629, 6.26707418, 5.15062569, 6.79932326,
        7.15847051, 7.45177439, 6.70980366, 5.56502509, 6.908441  ,
        6.59670228, 6.2325261 , 5.65121045, 6.32763175, 5.61604214,
        5.42739531, 8.15165096, 6

In [27]:
params20, train_poly20, test_poly20, train_pred_poly20, test_pred_poly20

({'C': 10, 'degree': 2, 'epsilon': 1},
 0.6627867219196113,
 0.5623997408236687,
 array([6.95839232, 5.84606795, 6.01473931, 6.92109379, 5.9381873 ,
        7.64805148, 6.30115364, 7.65800007, 6.24000404, 6.82671122,
        6.56864895, 6.30413989, 7.43435916, 6.00914427, 7.34773694,
        6.08961953, 6.81615135, 5.99657211, 6.36018543, 8.29034157,
        7.24287391, 8.14311348, 7.34207346, 7.08667585, 8.67119966,
        6.36066326, 6.24000404, 6.8238645 , 7.85033006, 6.78357529,
        6.48432929, 5.98979256, 5.80984908, 5.83338038, 6.93441087,
        6.13490127, 6.44653778, 6.31402981, 6.09151803, 5.74470256,
        5.9527913 , 6.317729  , 7.63017003, 5.79735397, 6.04684193,
        5.84042092, 5.88591942, 5.90538412, 6.01009524, 5.94017717,
        6.24004285, 5.90705521, 6.41910026, 5.83866595, 6.60303138,
        7.26545864, 7.20046811, 6.60485448, 5.88150609, 6.56486536,
        6.3289909 , 5.84807133, 6.13111452, 7.22803817, 5.87140619,
        5.73704017, 7.60968982, 6.6

In [28]:
params25, train_poly25, test_poly25, train_pred_poly25, test_pred_poly25

({'C': 1, 'degree': 3, 'epsilon': 0.1},
 0.931661350504696,
 0.7406481047399054,
 array([7.27893014, 5.75434228, 6.39183009, 7.48947913, 5.60252644,
        8.53798416, 7.02912403, 8.43088412, 5.63120197, 6.97487923,
        7.46870273, 6.64095026, 7.83401467, 6.25585076, 8.26700644,
        5.76832761, 6.89971992, 5.85049165, 5.53199835, 8.46251566,
        8.06983315, 8.85891359, 7.26978544, 6.73735353, 8.98613179,
        6.37863155, 5.66880564, 5.92382909, 8.72378739, 6.21559431,
        6.31733491, 5.58442377, 5.79968321, 5.39811966, 6.29682512,
        5.91258081, 6.41024795, 6.16884426, 6.57672577, 5.56792125,
        5.67000123, 5.72097921, 6.82134846, 5.80458848, 6.22515125,
        5.96598215, 5.65038131, 5.72223904, 5.62298909, 6.28538737,
        5.79918896, 5.79070995, 6.06736546, 5.15124433, 6.82114543,
        6.89607473, 7.9308452 , 7.0737685 , 5.20914433, 7.07635105,
        6.54512989, 6.33811972, 5.61873426, 6.32813354, 5.63741444,
        5.55829499, 8.15253885, 6.6