In [1]:
from sklearn.datasets import load_breast_cancer
ds = load_breast_cancer()
X,Y = ds.data, ds.target
from sklearn.datasets import load_svmlight_file


In [2]:
from MKLpy.preprocessing import \
    normalization, rescale_01, rescale, centering
X = rescale_01(X)       #feature scaling in [0,1]
X = normalization(X)    #row (example) normalization ||X_i||_2^2 = 1

In [3]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
Xscikit = scaler.fit_transform(X)

In [8]:

'''
WARNING: be sure that your matrix is not sparse! EXAMPLE:
from sklearn.datasets import load_svmlight_file
X,Y = load_svmlight_file(...)
X = X.toarray()
'''

#preprocess data
print ('preprocessing data...', end='')
from MKLpy.preprocessing import normalization, rescale_01
X = rescale_01(X)	#feature scaling in [0,1]
X = normalization(X) #||X_i||_2^2 = 1

#train/test split
from sklearn.model_selection import train_test_split
Xtr,Xte,Ytr,Yte = train_test_split(X,Y, test_size=.75, random_state=42)
print ('done')

preprocessing data...done


In [9]:
print(Xtr.shape, Xte.shape)

torch.Size([142, 30]) torch.Size([427, 30])


In [10]:
#compute homogeneous polynomial kernels with degrees 0,1,2,...,10.
print ('computing Homogeneous Polynomial Kernels...', end='')
from MKLpy.metrics import pairwise
KLtr = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(11)]
KLte = [pairwise.homogeneous_polynomial_kernel(Xte,Xtr, degree=d) for d in range(11)]
print ('done')

computing Homogeneous Polynomial Kernels...done


In [11]:

#evaluate kernels in terms of margin, radius etc...
print ('evaluating metrics...', end='')
from MKLpy.metrics import margin, radius, ratio, trace, frobenius
from MKLpy.preprocessing import kernel_normalization
deg = 5
K = KLtr[deg]					#the HPK with degree 5
K = kernel_normalization(K)		#normalize the kernel K (useless in the case of HPK computed on normalized data)

score_margin = margin(K,Ytr)	#the distance between the positive and negative classes in the kernel space
score_radius = radius(K)		#the radius of the Einimum Enclosing Ball containing data in the kernel space
score_ratio  = ratio (K,Ytr)	#the radius/margin ratio defined as (radius**2/margin**2)/n_examples
#the ratio can be also computed as score_radius**2/score_margin**2/len(Ytr)
score_trace  = trace (K)		#the trace of the kernel matrix
score_froben = frobenius(K)		#the Frobenius norm of a kernel matrix
print ('done')
print ('results of the %d-degree HP kernel:' % deg)
print ('margin: %.4f, radius: %.4f, radiu-margin ratio: %.4f,' % (score_margin, score_radius, score_ratio))
print ('trace: %.4f, frobenius norm: %.4f' % (score_trace, score_froben))


#evaluate the empirical complexity of the kernel matrix, i.e. the Spectral Ratio
# Michele Donini, Fabio Aiolli: "Learning deep kernels in the space of dot-product polynomials". Machine Learning (2017)
# Ivano Lauriola, Mirko Polato, Fabio Aiolli: "The Minimum Effort Maximum Output principle applied to Multiple Kernel Learning". ESANN (2018)
print ('computing Spectral Ratio...', end='')
from MKLpy.metrics import spectral_ratio
SR = spectral_ratio(K, norm=True)
print ('%.4f' % SR)

evaluating metrics...done
results of the 5-degree HP kernel:
margin: 0.0819, radius: 0.8779, radiu-margin ratio: 0.8088,
trace: 142.0000, frobenius norm: 78.8652
computing Spectral Ratio...0.0733


In [12]:
from MKLpy import generators
# 10 homogeneous poly. kernels 
KL_hpk = generators.HPK_generator(X, degrees=range(1,11))

# 3 rbf kernels
KL_rbf = generators.RBF_generator(X, gamma=[.001, .01, .1])

# from MKLpy.metrics import pairwise
# # 2 custom kernels (linear and polynomial)
# ker_functions = [pairwise.linear_kernel, lambda X,Z : pairwise.polynomial_kernel(X,Z, degree=5)]
# KL_mix = generators.Lambda_generator(X, ker_functions)

In [17]:
import jsonpickle
obj_str = jsonpickle.encode(KL_rbf)
restored_obj = jsonpickle.decode(obj_str)

3

In [13]:
KL = generators.RBF_generator(X, gamma=[.001, .01, .1])  #our list of base kernels (or a generator)

#usually, base kernels are normalize to prevent scaling and numerical issues
from MKLpy.preprocessing import kernel_normalization
KL_norm = [kernel_normalization(K) for K in KL]

#let us divide trainig (70%) and test (30%) examples
from MKLpy.model_selection import train_test_split
KLtr, KLte, Ytr, Yte = train_test_split(KL, Y, test_size=.8, random_state=42)

In [18]:
from MKLpy.algorithms import AverageMKL
#AverageMKL simply computes the average of input kernels
#It looks bad but it is a really strong baseline in MKL ;)
mkl = AverageMKL().fit(KLtr, Ytr)       #combine kernels and train the classifier
y_preds  = mkl.predict(KLte)            #predict the output class
y_scores = mkl.decision_function(KLte)  #returns the projection on the distance vector

In [19]:
from MKLpy.algorithms import AverageMKL
%timeit
mkl = AverageMKL(multiclass_strategy='ova').fit(KLtr, Ytr)

In [20]:
from MKLpy.algorithms import EasyMKL
from MKLpy.multiclass import OneVsRestMKLClassifier, OneVsOneMKLClassifier
mkl = EasyMKL(lam=.1)
clf = OneVsRestMKLClassifier(mkl).fit(KLtr, Ytr)

In [27]:
restored_obj.classes_
from collections import defaultdict
dict_list = defaultdict(dict)

In [21]:
from MKLpy.model_selection import cross_val_score
from MKLpy.algorithms import EasyMKL
from sklearn.svm import SVC
from itertools import product
lam_values = [0, 0.1, 0.2,0.8, 1]
C_values   = [0.01, 1,10, 50, 100]
test_list = list()
for lam, C in product(lam_values, C_values):    
    svm = SVC(C=C)
    mkl = EasyMKL(lam=lam, learner=svm)
    scores = cross_val_score(KL, Y, mkl, n_folds=3, scoring='roc_auc')
    dict_list[lam][ C]= scores
   

NameError: name 'dict_list' is not defined

In [54]:
obj_str = jsonpickle.encode(mkl)

In [55]:
restored_obj = jsonpickle.decode(obj_str)

In [56]:
restored_obj

EasyMKL(lam=1, learner=SVC(C=100, kernel='precomputed'), max_iter=-1,
        multiclass_strategy='ova', tolerance=1e-07, verbose=False)

In [41]:
import os
test_location = '/media/ak/DataOnly/'
file_name = os.path.join(test_location,'test_file.json')

In [57]:
f = open(file_name, 'w')
f.write(obj_str)
f.close()

In [58]:
f=open(file_name)

In [59]:
json_str =f.read()

In [60]:
obj =jsonpickle.decode(json_str)

In [61]:
obj.classes_

tensor([0, 1])

In [63]:
def jsonpickle_store_obj(obj, filename_location):
    """
    function to encode an object in jsonpickle and store it
    obj: object to be encoded in jsonpickle
    location: where to store it
    
    """
    obj_str = jsonpickle.encode(obj)
    f = open(filename_location, 'w')
    f.write(obj_str)
    f.close()
    print('encoded and saved in :', filename_location)

In [64]:
jsonpickle_store_obj(mkl, file_name)

encoded and saved in : /media/ak/DataOnly/test_file.json


In [69]:
def jsonpickle_load_decode(filename_location):
    f = open(filename_location)
    json_str = f.read()
    obj = jsonpickle.decode(json_str)
    return obj
    

In [70]:
thawed_obj =jsonpickle_load_decode('/media/ak/DataOnly/test_file.json')