In [1]:
import os
import numpy as np
import pandas as pd
from utils import *
from models import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

# global variables
DATA_FOLDER = "machine-learning-with-kernel-methods-2021"
TRAIN_FILE = "Xtr{}.csv"
LABEL_FILE = "Ytr{}.csv"
TEST_FILE = "Xte{}.csv"
N = 3  # number of datasets

HIST_FILE = "results-history.txt"

baselines = {
    "ridge": kernelRidge,
    "svm": kernelSVM,
    "logistic": KernelLogistic,
}

In [2]:
def run_instance(args):
    """
    This returns the results on the tests sets of the model corresponding to args
    """

    feature_type = args['features'].lower()
    fmaker = feature_extractor(
        feature_type=feature_type,
        k=args['k'],
        m=args['m'],
        lmbda=args['lmbda'],
        order_of_fourier_kmers=args['order_of_fourier_kmers'],
        nb_of_fourier_coeffs=args['order_of_fourier_kmers'],
    )

    kernel_type = args['kernel'].lower() #if feature_type == "bow" else "linear"
    
    clf = Baseline(
        baseline_type=args['baseline'].lower(), 
        kernel_type=kernel_type, 
        d=args['d'], 
        sigma=args['sigma'], 
        c=args['c'])
    
    index = []
    pred = []
    accs = []
    pred_val = []
    true_val = []
    for i in range(N):
        xtrain, ytrain, xte, ids = fmaker(i)

        scaler = StandardScaler()
        scaler.fit(xtrain)
        xtrain = scaler.transform(xtrain)
        xte = scaler.transform(xte)
        xtr, xval, ytr, yval = train_test_split(
            xtrain, ytrain, test_size=0.3, random_state=42
        )

        _ = clf.fit(xtr, ytr)
        predval = clf.predict(xtr, xval)
        print(confusion_matrix(yval, predval))
        
        acc = accuracy_score(yval, predval)
        print("accuracy is " + str(acc))
        ytest = clf.predict(xtr, xte)

        index.extend(ids)
        pred.extend(ytest)
        accs.append(acc)
        pred_val.append(predval)
        true_val.extend(yval)
        
    # name = args['features'].lower() + "_k_" + str(args['k']) + clf.type
    # save_file(index, pred, name=name)
    # save_results(name, accs, sum(accs)/N)
    
    return index, pred