# Kernel methods for biological sequence classification

MVA 2019 - Kernel methods for machine learning

*Éloïse Berthier, Guillaume Dalle, Clément Mantoux*

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

In [3]:
import backend
from kernels import *

Kernel computed on the fly on imported data

In [4]:
kernels1a = LinearKernel().load("mat100", indices=[0, 1, 2])
kernels1b = GaussianKernel(0.1).load("mat100", indices=[0, 1, 2])

Linear kernel computed on the fly on imported features

In [5]:
kernels2a = FeatureCSVKernel("SpectralKernel6", {}).load("spectr6", indices=[0, 1, 2])
kernels2b = FeatureCSVKernel("SpectralKernel4", {}).load("spectr4", indices=[0, 1, 2])
kernels2c = FeatureCSVKernel("TranslatedKernel", {}).load("trans6", indices=[0, 1, 2])
kernels2d = FeatureCSVKernel("TfIdfKernel6", {}).load("tfidf6", indices=[0, 1, 2])

Precomputed kernel defined from stored Gram matrices

In [16]:
kernels3 = GramCSVKernel("SubstringKernel", {}).load("substring4_0.7", indices=[0, 1])

Gaussian spectrum kernel

In [5]:
#kernels4 = CauchyKernel(0.005).load("spectr6", indices=[0, 1, 2])#+CauchyKernel(0.1).load("spectr6",
#                                 indices=[1])+CauchyKernel(0.5).load("spectr6", indices=[0]) # 0.005 is cool!

In [11]:
kernels5 = GaussianKernel(80).load("spectr6", indices=[0, 1, 2])#+CauchyKernel(0.1).load("spectr6",
                                 #indices=[1])+CauchyKernel(0.5).load("spectr6", indices=[0]) # 1e2 -> 0.68 same - 50

In [6]:
kernels6 = GaussianKernel(50).load("shapespectr4", indices=[0, 1, 2]) # good good

In [7]:
kernels7 = GaussianKernel(20).load("trans6", indices=[0, 1, 2]) # good good

In [33]:
kernels8 = GaussianKernel(50).load("tfidf6", indices=[0, 1, 2])

In [38]:
kernels9 = GaussianKernel(10).load("HMM_MCK2", indices=[0, 1, 2])

Boosting for Spectrum6 (better than Spectrum 4 and 5)

In [93]:
#kernelsBoost = BoostingKernel(LinearKernel(), iterations=20).load("spectr6", [0,1,2])

Parameter tuning

In [41]:
lambdas = np.logspace(-4., -3.2, 5) #-4 -3.2

In [None]:
three_kernels = []
three_lambdas = []

use_multiple_kernel = True

for d in [0, 1, 2]:
    
    kernels_to_combine_or_compare = [
#        kernels1a[d],
#         kernels1b[d],
#         kernels2a[d],
#         kernels2b[d],
#         kernels2c[d],
#         kernels2d[d],
#        kernels4[d],
#        kernels3[d],
#        kernels5[d],
         kernels6[d],
        kernels7[d],
       kernels8[d]
#        kernels9[d]
#         kernelsBoost[d]
    ]
#     if d != 2:
#         kernels_to_combine_or_compare.append(kernels3[0])

    if use_multiple_kernel:
        best_kernel = MultipleKernel(
            kernels_to_combine_or_compare,
            grad_step=1, iterations=3,
            entropic=1
        )
        best_lambd = backend.tune_parameters(
            [best_kernel], lambdas,
            plot=True, result="best_lambdas")[0]
        
    else:
        best_kernel, best_lambd = backend.tune_parameters(
            kernels_to_combine_or_compare, lambdas,
            plot=True, result="best_kernel_lambda")
        
    three_kernels.append(best_kernel)
    three_lambdas.append(best_lambd)

In [None]:
62 - 74 - 63 // 63 - 73 - 64

In [31]:
backend.final_prediction(three_kernels, three_lambdas)

DATASET 0
Eta [0.36398327 0.28964685 0.34636988]
Eta [0.29846082 0.38554377 0.31599542]
Eta [0.43754041 0.18584615 0.37661344]
DATASET 1
Eta [0.34785515 0.28421513 0.36792971]
Eta [0.31122135 0.39539666 0.29338199]
Eta [0.38914237 0.15850278 0.45235485]
DATASET 2
Eta [0.35920482 0.25716738 0.3836278 ]
Eta [0.29232247 0.43593082 0.27174671]
Eta [0.43183446 0.0552384  0.51292714]


Fast test for boosting kernels :

In [12]:
kernels = BoostingKernel(LinearKernel(), iterations=20).load("spectr6", [0,1,2])

100%|██████████| 20/20 [00:08<00:00,  2.56it/s]
100%|██████████| 20/20 [00:07<00:00,  2.26it/s]
100%|██████████| 20/20 [00:07<00:00,  2.62it/s]


In [13]:
for d in [0,1,2]:
    print("Dataset "+str(d))
    print(backend.cross_validate(kernels[d], 0.01)[1].mean())

Dataset 0
0.7465
Dataset 1
0.795
Dataset 2
0.7205
