In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from joblib import dump, load

In [2]:
# Load Data Train csv
train = pickle.load(open('Train_Test/train_bams.p','rb'))
test = pickle.load(open('Train_Test/test_bams.p','rb'))

In [3]:
# Load Pickle POLY
label_25 = pickle.load(open("desc/new_desc25.pkl", "rb"))
label_25

['C2SP3',
 'maxssCH2',
 'AATSC6e',
 'RDF30u',
 'MATS2i',
 'RDF30e',
 'ATSC6m',
 'SpMax3_Bhs',
 'SRW5',
 'MDEC-34',
 'minHBint4',
 'TDB4i',
 'nssCH2',
 'SHCsats',
 'SHBint4',
 'SC-4',
 'StN',
 'ntsC',
 'maxtsC',
 'ATSC4p',
 'ATSC4p',
 'SsF',
 'SC-6',
 'MDEC-24',
 'LipoaffinityIndex']

In [4]:
x_train = train.loc[:,label_25]
x_test = test.loc[:,label_25]
y_train = train.iloc[:,[-1]]
y_test = test.iloc[:,[-1]]

In [5]:
x_test.shape

(27, 25)

In [6]:
mmscaler = MinMaxScaler()
mmscaler.fit(x_train)
sc_x_train = mmscaler.transform(x_train)
sc_x_test = mmscaler.transform(x_test)

In [7]:
paramgrid = {
    'C' : [0.1,1,10,100,1000],
    'degree' : [1],
    'epsilon' : [0.1,1,10,100,1000]
}

In [8]:
grid = GridSearchCV(
        estimator = SVR(kernel = 'linear'),
        param_grid = paramgrid,
        cv = 5,
        scoring = 'neg_mean_squared_error',
        n_jobs = -1
)
grid.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, error_score=nan,
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='scale', kernel='linear',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100, 1000], 'degree': [1],
                         'epsilon': [0.1, 1, 10, 100, 1000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [9]:
grid.best_params_

{'C': 1, 'degree': 1, 'epsilon': 0.1}

In [10]:
pickle.dump(grid.best_params_, open( "data_Linear/new_params_linear_25.p", "wb" ))

In [11]:
# model
model = SVR(kernel = 'linear', C = grid.best_params_['C'],
           degree = grid.best_params_['degree'],
           epsilon = grid.best_params_['epsilon'])
model.fit(sc_x_train, y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1, cache_size=200, coef0=0.0, degree=1, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [12]:
y_train_pred = model.predict(sc_x_train)
y_test_pred = model.predict(sc_x_test)

In [13]:
y_train_pred

array([7.33708704, 5.35348418, 6.31719983, 7.09837409, 5.8547953 ,
       8.46759313, 6.46156406, 7.98487862, 5.77144886, 7.01536238,
       6.92728941, 6.7804741 , 7.39949579, 5.58337676, 8.21655589,
       5.63673453, 7.07221142, 6.25763072, 5.96574519, 8.41884527,
       7.50078076, 8.17019376, 7.11565268, 6.6090477 , 8.1063342 ,
       6.2864638 , 5.77501734, 7.13497389, 8.20698038, 6.45266386,
       6.18253338, 5.89804969, 5.83143409, 5.67069287, 6.10141612,
       6.40820646, 6.29972383, 6.18602118, 6.48703662, 5.60217473,
       5.70201909, 5.89585114, 8.59081406, 5.44128094, 6.44932811,
       5.96609151, 5.84416785, 5.895787  , 6.02454054, 6.0391037 ,
       5.79224012, 5.64484517, 6.25011755, 5.90697662, 6.82091812,
       7.6302005 , 7.33046413, 6.5105026 , 5.95807066, 6.67177784,
       6.65808242, 5.70602531, 5.74360836, 6.33892118, 5.69325138,
       5.50592354, 8.15244413, 6.57159399, 6.67595397, 7.81094099,
       6.21773134, 6.20925371, 5.50684518, 6.25535555, 6.83883

In [14]:
pickle.dump(y_train_pred, open( "data_Linear/y_train_pred_linear25.p", "wb" ))

In [15]:
y_test_pred

array([5.58773352, 5.48352327, 5.86333804, 5.9316879 , 7.95080685,
       7.03430436, 8.59832202, 5.59085318, 8.59521871, 5.41912921,
       5.7754192 , 9.03694348, 8.15469262, 6.69843532, 6.33482345,
       5.80087419, 8.11561539, 5.79646117, 6.19898703, 7.2242482 ,
       6.65942994, 7.71013898, 6.60492875, 6.80727726, 8.74423665,
       8.34138589, 5.68799947])

In [16]:
pickle.dump(y_test_pred, open( "data_Linear/y_test_pred_linear25.p", "wb" ))

In [17]:
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [18]:
train_r2

0.719197736274034

In [19]:
pickle.dump(train_r2, open( "data_Linear/new_train_r2_linear25.p", "wb" ))

In [20]:
test_r2

0.7380223823889662

In [21]:
pickle.dump(test_r2, open( "data_Linear/new_test_r2_linear25.p", "wb" ))

In [22]:
# Load Pickle 
params5 = pickle.load(open("data_Linear/new_params_linear_5.p", "rb"))
train_linear5 = pickle.load(open("data_Linear/new_train_r2_linear5.p", "rb"))
test_linear5 = pickle.load(open("data_Linear/new_test_r2_linear5.p", "rb"))
train_pred_linear5 = pickle.load(open("data_Linear/y_train_pred_linear5.p", "rb"))
test_pred_linear5 = pickle.load(open("data_Linear/y_test_pred_linear5.p", "rb"))

params10 = pickle.load(open("data_Linear/new_params_linear_10.p", "rb"))
train_linear10 = pickle.load(open("data_Linear/new_train_r2_linear10.p", "rb"))
test_linear10 = pickle.load(open("data_Linear/new_test_r2_linear10.p", "rb"))
train_pred_linear10 = pickle.load(open("data_Linear/y_train_pred_linear10.p", "rb"))
test_pred_linear10 = pickle.load(open("data_Linear/y_test_pred_linear10.p", "rb"))

params15 = pickle.load(open("data_Linear/new_params_linear_15.p", "rb"))
train_linear15 = pickle.load(open("data_Linear/new_train_r2_linear15.p", "rb"))
test_linear15 = pickle.load(open("data_Linear/new_test_r2_linear15.p", "rb"))
train_pred_linear15 = pickle.load(open("data_Linear/y_train_pred_linear15.p", "rb"))
test_pred_linear15 = pickle.load(open("data_Linear/y_test_pred_linear15.p", "rb"))

params20 = pickle.load(open("data_Linear/new_params_linear_20.p", "rb"))
train_linear20 = pickle.load(open("data_Linear/new_train_r2_linear20.p", "rb"))
test_linear20 = pickle.load(open("data_Linear/new_test_r2_linear20.p", "rb"))
train_pred_linear20 = pickle.load(open("data_Linear/y_train_pred_linear20.p", "rb"))
test_pred_linear20 = pickle.load(open("data_Linear/y_test_pred_linear20.p", "rb"))

params25 = pickle.load(open("data_Linear/new_params_linear_25.p", "rb"))
train_linear25 = pickle.load(open("data_Linear/new_train_r2_linear25.p", "rb"))
test_linear25 = pickle.load(open("data_Linear/new_test_r2_linear25.p", "rb"))
train_pred_linear25 = pickle.load(open("data_Linear/y_train_pred_linear25.p", "rb"))
test_pred_linear25 = pickle.load(open("data_Linear/y_test_pred_linear25.p", "rb"))

In [23]:
params5, train_linear5, test_linear5, train_pred_linear5, test_pred_linear5

({'C': 1000, 'degree': 1, 'epsilon': 1},
 0.5393448099407998,
 0.5881345029047804,
 array([7.15191585, 5.68759174, 6.50831453, 6.44540042, 6.17973704,
        7.27320189, 6.42384789, 8.15574757, 6.08943353, 6.87959744,
        7.21111661, 6.40289662, 7.78764706, 5.70845877, 7.39968828,
        6.01706426, 7.2441162 , 6.46930661, 6.47146186, 8.20930388,
        7.11059696, 7.93393608, 6.73816118, 6.14524598, 7.7395347 ,
        6.44158702, 6.08943353, 6.95632531, 8.56112142, 6.7080846 ,
        6.6996401 , 5.96133987, 5.8260194 , 6.13635462, 6.27559637,
        6.25073804, 6.41771182, 5.88344874, 6.43522757, 6.16419957,
        5.85712019, 6.37271084, 7.7208166 , 5.62193513, 6.8598791 ,
        6.24895125, 5.57360215, 6.27420582, 6.10045041, 5.83918371,
        6.01601562, 5.40404251, 5.83472494, 6.22744443, 7.18894102,
        6.57151113, 7.31264998, 6.33170255, 6.03538153, 7.09279197,
        6.42541079, 6.14989464, 6.13123258, 6.69871787, 6.06802947,
        6.12773708, 7.84645518, 7

In [24]:
params10, train_linear10, test_linear10, train_pred_linear10, test_pred_linear10

({'C': 1000, 'degree': 1, 'epsilon': 1},
 0.5945903236325225,
 0.49919994942214574,
 array([6.95779272, 5.30805652, 6.3443045 , 6.92090299, 5.74294471,
        7.63788569, 7.10297325, 7.5940058 , 6.56961292, 6.58078099,
        6.85995239, 6.45881634, 6.94574021, 6.00953969, 9.09435974,
        5.67566386, 7.23798403, 6.46901251, 5.90632737, 7.82698322,
        8.0603233 , 7.95867181, 7.14991439, 7.07643455, 7.590811  ,
        6.35356999, 6.56961292, 7.25650838, 7.82414563, 6.34866659,
        5.62309595, 6.05572131, 6.49577096, 5.79583529, 5.96075828,
        6.24825508, 6.41671552, 6.54387798, 6.36578365, 5.69854506,
        5.67783579, 6.3792489 , 8.4831969 , 6.13083442, 6.24251066,
        6.15208287, 6.07625686, 5.83989666, 6.09412369, 6.52996081,
        5.76388369, 5.84296674, 5.16656933, 6.05073498, 6.68508213,
        7.67990862, 7.13541746, 7.17125326, 6.02330922, 6.54424871,
        5.93129356, 5.88866467, 5.88760369, 6.02010838, 5.75693442,
        5.65465178, 7.39990775, 

In [25]:
params15, train_linear15, test_linear15, train_pred_linear15, test_pred_linear15

({'C': 1, 'degree': 1, 'epsilon': 1},
 0.6331632850742906,
 0.7479398253444045,
 array([6.94748346, 5.80138214, 6.11767193, 6.92061635, 6.1683016 ,
        7.63774404, 6.30053128, 7.65845999, 6.55789693, 7.28204026,
        6.69756121, 6.60743005, 7.28458548, 6.07077921, 7.9111636 ,
        6.0970296 , 6.98694753, 6.15897879, 6.26679142, 7.90201198,
        7.13348376, 7.95947648, 7.03936024, 6.33402714, 7.89036176,
        6.31591114, 6.53673134, 6.82452774, 7.82451837, 6.41641231,
        6.31371157, 6.17201253, 5.83122808, 6.05920963, 6.57936035,
        6.48664466, 6.48921449, 6.36474883, 6.332583  , 5.89996211,
        6.05999467, 6.12327114, 7.39074373, 5.81212527, 6.38111075,
        5.97090816, 6.07665045, 6.03894706, 6.15729511, 5.94945615,
        6.09984362, 6.21114214, 5.98607556, 6.30712044, 7.11945241,
        7.17510431, 6.89940136, 6.51266191, 6.23878856, 7.08759043,
        6.73837346, 6.11232834, 6.13061621, 6.91701647, 5.99543599,
        5.80049412, 7.25210524, 7.05

In [26]:
params20, train_linear20, test_linear20, train_pred_linear20, test_pred_linear20

({'C': 10, 'degree': 1, 'epsilon': 1},
 0.6454402386109779,
 0.5696985251382829,
 array([7.01609287, 5.92561964, 5.74786566, 6.920879  , 5.80075944,
        7.63774745, 6.300834  , 7.38672016, 6.32673582, 6.68845233,
        6.56874993, 6.21507399, 7.26790154, 6.00877241, 7.28713297,
        5.93850518, 6.91734215, 6.01571805, 6.31031677, 7.71234335,
        7.25779977, 7.95911048, 7.37940787, 6.97939306, 8.44238589,
        6.23232175, 6.32673582, 6.82435499, 7.82411894, 6.52186306,
        5.82081445, 5.83623039, 5.82224122, 5.86192221, 6.54648056,
        6.09641349, 6.3708097 , 6.24935239, 5.82150265, 5.70276354,
        5.70737843, 6.40514784, 7.32963664, 5.62363692, 5.77919009,
        5.55157024, 5.82119172, 5.74652432, 5.86868965, 5.82282611,
        5.96174476, 5.8198701 , 6.62005974, 5.80649244, 6.79742184,
        7.25647852, 7.08979683, 6.58262548, 5.94885062, 6.54797152,
        6.1854547 , 5.31838127, 6.13115006, 6.95340066, 5.69314466,
        5.67960356, 7.35183683, 6.5

In [27]:
params25, train_linear25, test_linear25, train_pred_linear25, test_pred_linear25

({'C': 1, 'degree': 1, 'epsilon': 0.1},
 0.719197736274034,
 0.7380223823889662,
 array([7.33708704, 5.35348418, 6.31719983, 7.09837409, 5.8547953 ,
        8.46759313, 6.46156406, 7.98487862, 5.77144886, 7.01536238,
        6.92728941, 6.7804741 , 7.39949579, 5.58337676, 8.21655589,
        5.63673453, 7.07221142, 6.25763072, 5.96574519, 8.41884527,
        7.50078076, 8.17019376, 7.11565268, 6.6090477 , 8.1063342 ,
        6.2864638 , 5.77501734, 7.13497389, 8.20698038, 6.45266386,
        6.18253338, 5.89804969, 5.83143409, 5.67069287, 6.10141612,
        6.40820646, 6.29972383, 6.18602118, 6.48703662, 5.60217473,
        5.70201909, 5.89585114, 8.59081406, 5.44128094, 6.44932811,
        5.96609151, 5.84416785, 5.895787  , 6.02454054, 6.0391037 ,
        5.79224012, 5.64484517, 6.25011755, 5.90697662, 6.82091812,
        7.6302005 , 7.33046413, 6.5105026 , 5.95807066, 6.67177784,
        6.65808242, 5.70602531, 5.74360836, 6.33892118, 5.69325138,
        5.50592354, 8.15244413, 6.5