In [1]:
import sys
sys.path.append("..")

In [2]:
import os
import random
import statistics
from itertools import product

from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import numpy as np

from src.training.experiment_conventional_multiclass import init_data
from src.utils.training_utils import open_log
from src.utils.label_convertors import convert2vec
from src.training.experiment_conventional_multiclass import experiment

In [3]:
DATAPATH = os.path.join("..", "data", "cyp450_smiles_GINfp_labels.json")
LOGPATH = os.path.join("..", "logs", "convention", "svm_ginfp")

In [4]:
def experiment_svm(data_path, log_path, regulation_factor, kernel, gamma, degree, rand_seed=None):
    model = SVC(
        C=regulation_factor,
        kernel=kernel,
        gamma=gamma,
        degree=degree
    )
    acc, model = experiment(data_path, model, log_path, rand_seed)
    return acc, model

def do_experiment(data_path, log_path, regulation_factor, rand_seeds, kernel=["rbf"], gamma=["scale"], degree=[3]):
    best = dict()
    for rf, kn, gm, dg, rs in product(regulation_factor, kernel, gamma, degree, rand_seeds):
        best.setdefault(rs, [0, 0, 0, 0, 0])
        acc, _ = experiment_svm(
            data_path=data_path,
            log_path=log_path,
            regulation_factor=rf,
            kernel=kn,
            gamma=gm,
            degree=dg,
            rand_seed=rs,
        )
        print(f"regulation factor: {rf}, kernel: {kn}, gamma: {gm}, degree: {dg}, acc: {acc:.5f}")
        if acc > best.get(rs)[-1]:
            best.get(rs)[0] = rf
            best.get(rs)[1] = kn
            best.get(rs)[2] = gm
            best.get(rs)[3] = dg
            best.get(rs)[4] = acc
    print("="*80)
    for k, v in best.items():
        print(f"Random seed: {str(k)}")
        print(f"Accuracy: {v[-1]:.5f}")
        print("Best params:")
        print(f"  regulation factor: {v[0]}")
        print(f"  kernel: {v[1]}")
        print(f"  gamma: {v[2]}")
        print(f"  degree: {v[3]}")
        print("="*80)

In [5]:
experiment_id = 0

In [6]:
logp = os.path.join(LOGPATH, "experiment_"+str(experiment_id))

regulation_factor=[1.0, 2.0, 3.0, 4.0, 5.0]
kernel=["rbf"]
gamma=["scale", "auto"]
degree=[3]
rand_seeds=[0]

do_experiment(
    data_path=DATAPATH,
    log_path=logp,
    regulation_factor=regulation_factor,
    kernel=kernel,
    gamma=gamma,
    degree=degree,
    rand_seeds=rand_seeds
)

experiment_id += 1

regulation factor: 1.0, kernel: rbf, gamma: scale, degree: 3, acc: 0.56165
regulation factor: 1.0, kernel: rbf, gamma: auto, degree: 3, acc: 0.57327
regulation factor: 2.0, kernel: rbf, gamma: scale, degree: 3, acc: 0.58812
regulation factor: 2.0, kernel: rbf, gamma: auto, degree: 3, acc: 0.58425
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 3, acc: 0.59199
regulation factor: 3.0, kernel: rbf, gamma: auto, degree: 3, acc: 0.58167
regulation factor: 4.0, kernel: rbf, gamma: scale, degree: 3, acc: 0.59070
regulation factor: 4.0, kernel: rbf, gamma: auto, degree: 3, acc: 0.57908
regulation factor: 5.0, kernel: rbf, gamma: scale, degree: 3, acc: 0.58618
regulation factor: 5.0, kernel: rbf, gamma: auto, degree: 3, acc: 0.57715
Random seed: 0
Accuracy: 0.59199
Best params:
  regulation factor: 3.0
  kernel: rbf
  gamma: scale
  degree: 3


In [7]:
logp = os.path.join(LOGPATH, "experiment_"+str(experiment_id))

regulation_factor=[1.0, 2.0, 3.0, 4.0, 5.0]
kernel=["poly"]
gamma=["auto"]
degree=[2, 3, 4, 5, 6]
rand_seeds=[0]

do_experiment(
    data_path=DATAPATH,
    log_path=logp,
    regulation_factor=regulation_factor,
    kernel=kernel,
    gamma=gamma,
    degree=degree,
    rand_seeds=rand_seeds
)

experiment_id += 1

regulation factor: 1.0, kernel: poly, gamma: auto, degree: 2, acc: 0.57844
regulation factor: 1.0, kernel: poly, gamma: auto, degree: 3, acc: 0.54874
regulation factor: 1.0, kernel: poly, gamma: auto, degree: 4, acc: 0.54680
regulation factor: 1.0, kernel: poly, gamma: auto, degree: 5, acc: 0.54099
regulation factor: 1.0, kernel: poly, gamma: auto, degree: 6, acc: 0.52937
regulation factor: 2.0, kernel: poly, gamma: auto, degree: 2, acc: 0.55778
regulation factor: 2.0, kernel: poly, gamma: auto, degree: 3, acc: 0.54229
regulation factor: 2.0, kernel: poly, gamma: auto, degree: 4, acc: 0.54035
regulation factor: 2.0, kernel: poly, gamma: auto, degree: 5, acc: 0.54099
regulation factor: 2.0, kernel: poly, gamma: auto, degree: 6, acc: 0.52808
regulation factor: 3.0, kernel: poly, gamma: auto, degree: 2, acc: 0.54680
regulation factor: 3.0, kernel: poly, gamma: auto, degree: 3, acc: 0.53906
regulation factor: 3.0, kernel: poly, gamma: auto, degree: 4, acc: 0.54035
regulation factor: 3.0, k

In [8]:
logp = os.path.join(LOGPATH, "experiment_"+str(experiment_id))

regulation_factor=[1.0, 2.0, 3.0, 4.0, 5.0]
kernel=["sigmoid"]
gamma=["scale", "auto"]
degree=[6]
rand_seeds=[0]

do_experiment(
    data_path=DATAPATH,
    log_path=logp,
    regulation_factor=regulation_factor,
    kernel=kernel,
    gamma=gamma,
    degree=degree,
    rand_seeds=rand_seeds
)

experiment_id += 1

regulation factor: 1.0, kernel: sigmoid, gamma: scale, degree: 6, acc: 0.50613
regulation factor: 1.0, kernel: sigmoid, gamma: auto, degree: 6, acc: 0.45707
regulation factor: 2.0, kernel: sigmoid, gamma: scale, degree: 6, acc: 0.51775
regulation factor: 2.0, kernel: sigmoid, gamma: auto, degree: 6, acc: 0.45384
regulation factor: 3.0, kernel: sigmoid, gamma: scale, degree: 6, acc: 0.50936
regulation factor: 3.0, kernel: sigmoid, gamma: auto, degree: 6, acc: 0.43770
regulation factor: 4.0, kernel: sigmoid, gamma: scale, degree: 6, acc: 0.49968
regulation factor: 4.0, kernel: sigmoid, gamma: auto, degree: 6, acc: 0.43189
regulation factor: 5.0, kernel: sigmoid, gamma: scale, degree: 6, acc: 0.49709
regulation factor: 5.0, kernel: sigmoid, gamma: auto, degree: 6, acc: 0.43899
Random seed: 0
Accuracy: 0.51775
Best params:
  regulation factor: 2.0
  kernel: sigmoid
  gamma: scale
  degree: 6


In [9]:
logp = os.path.join(LOGPATH, "experiment_"+str(experiment_id))

regulation_factor=[3.0]
kernel=["rbf"]
gamma=["scale"]
degree=[6]
rand_seeds=[0, 1029, 1829, 189, 9382, 128989, 812, 204, 486, 6987]

do_experiment(
    data_path=DATAPATH,
    log_path=logp,
    regulation_factor=regulation_factor,
    kernel=kernel,
    gamma=gamma,
    degree=degree,
    rand_seeds=rand_seeds
)

experiment_id += 1

regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.59199
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.58748
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.56811
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.57134
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.58102
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.57908
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.58167
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.58296
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.58102
regulation factor: 3.0, kernel: rbf, gamma: scale, degree: 6, acc: 0.59393
Random seed: 0
Accuracy: 0.59199
Best params:
  regulation factor: 3.0
  kernel: rbf
  gamma: scale
  degree: 6
Random seed: 1029
Accuracy: 0.58748
Best params:
  regulation factor: 3.0
  kernel: rbf
  gamma: scale
  degree: 6
Random seed: 1829
Accur