In [20]:
from functools import partial
from itertools import product

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_validate
from sklearn.svm import SVC
import numpy as np

from utils.data_loaders.cvs_loader import CVSLoader
from utils.label_convertors import convert2vec

In [2]:
DATAPATH = "../data/fromraw_cid_inchi_smiles_fp_labels_onehots.csv"

In [12]:
data_loader = CVSLoader(DATAPATH)
x_train, y_train, x_test, y_test = data_loader.load_data(
    ["ECFP", "onehot_label"],
    ratio=1,
    shuffle=True
)

In [15]:
convert2vec_float = partial(convert2vec, dtype=float)
x_train, y_train = list(
    map(convert2vec_float, [x_train, y_train]))
y_train = np.argmax(y_train, axis=1)

In [16]:
x_train.shape

(5162, 2048)

In [17]:
y_train.shape

(5162,)

In [10]:
results = dict()
for c, gamma in product(np.arange(1, 11)/10, ["auto", "scale"]):
    clf = SVC(C=c, gamma=gamma)
    clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    print(f"C: {c}, gamma: {gamma}, score: {score}")
    results[str(c)+","+str(gamma)] = score

C: 0.1, gamma: auto, score: 0.42027114267269206
C: 0.1, gamma: scale, score: 0.42027114267269206
C: 0.2, gamma: auto, score: 0.42027114267269206
C: 0.2, gamma: scale, score: 0.4364105874757908
C: 0.3, gamma: auto, score: 0.42027114267269206
C: 0.3, gamma: scale, score: 0.4564234990316333
C: 0.4, gamma: auto, score: 0.42027114267269206
C: 0.4, gamma: scale, score: 0.46675274370561654
C: 0.5, gamma: auto, score: 0.42027114267269206
C: 0.5, gamma: scale, score: 0.4854744996772111
C: 0.6, gamma: auto, score: 0.42027114267269206
C: 0.6, gamma: scale, score: 0.4970948999354422
C: 0.7, gamma: auto, score: 0.42027114267269206
C: 0.7, gamma: scale, score: 0.5041962556488057
C: 0.8, gamma: auto, score: 0.42156229825693997
C: 0.8, gamma: scale, score: 0.5112976113621691
C: 0.9, gamma: auto, score: 0.42479018721755973
C: 0.9, gamma: scale, score: 0.5145255003227889
C: 1.0, gamma: auto, score: 0.42479018721755973
C: 1.0, gamma: scale, score: 0.5183989670755326


In [7]:
np.arange(1, 11) / 10

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [21]:
svc_clf = SVC(C=1.0, gamma='scale')
cv_results = cross_validate(svc_clf, x_train, y_train, cv=5)



In [22]:
print(cv_results)

{'fit_time': array([85.0697248 , 83.73526287, 84.51744199, 85.33605337, 85.3516624 ]), 'score_time': array([7.06337166, 7.02187681, 7.0462327 , 7.06263924, 7.00557494]), 'test_score': array([0.55759923, 0.54888674, 0.54748062, 0.53972868, 0.53197674])}
