In [1]:
import time
import random
import gc

import numpy as np
from numpy import linalg
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

from model.Lib_L2_SVC_NCH import L2_SVC_NCH_ByL2SVC, L2_SVC_NCH_Python
from utils.data_load import *

### Train

In [6]:
datasets_id_list = np.arange(2,3,1)
train_mean_list = []
train_std_list = []
test_mean_list = []
test_std_list = []

for dset_id in datasets_id_list:
    X, y, random_state = load_data(dataset=dset_id)
    print('数据集id：{0}，数据集的大小：{1}'.format(dset_id, X.shape))
    train_scores = []
    test_scores = []

    kf = StratifiedShuffleSplit(n_splits=5, test_size=0.2, train_size=0.8, random_state=random_state)

    for train_index, valid_index in kf.split(X, y):
        X_trains, X_test = X[train_index], X[valid_index]
        y_trains, y_test = y[train_index], y[valid_index]

        X_train, X_valid, y_train, y_valid = train_test_split(
            X_trains, y_trains, test_size=0.2, random_state=42)

        best_score = 0.0
        # for gamma in [0.001, 0.01, 0.02, 0.08, 0.1, 1, 2, 4, 6, 8, 10, 100, 1000]:
        #     for C in [0.001, 0.01, 0.02, 0.08, 0.1, 0.4, 0.6, 0.8, 1, 2, 4, 6, 8, 10, 10, 100]:
        for gamma in [2**(-15), 2**(-13), 2**(-11),2**(-9), 2**(-7), 2**(-5),2**(-3), 2**(-1), 2**(1),2**(3)]:
            for C in [2**(-5), 2**(-3), 2**(-1),2**(1), 2**(3), 2**(5),2**(7), 2**(9), 2**(11),2**(13), 2**(15)]:
                print('gamma：{0}, C：{1}'.format(gamma, C))
                
                luo_svm_smo = L2_SVC_NCH_Python(C=C, gamma=gamma, max_iter=2000, epsilon=1e-3, need_optValue=True)
                # 核矩阵计算
                # def gram_matrix(X1, X2):
                #     K = np.zeros((len(X1), len(X1)), dtype=np.float64)
                #     K = np.exp(-gamma * ((X1**2).sum(1).reshape(-1, 1) + (X1**2).sum(1) - 2 * X1 @ X1.T))
                #     K += 1.0 / C * np.identity(len(X1))
                #     return K
                # luo_svm_smo = L2_SVC_NCH_ByL2SVC(kernel_mat=gram_matrix, 
                #                                  kernel_name='kernel_gaussian', 
                #                                  gamma=gamma, 
                #                                  C=np.PINF, 
                #                                  myC=C, 
                #                                  max_iter=2000, 
                #                                  tol=1e-3,
                #                                  need_optValue=True)

                luo_svm_smo.fit(X_train, y_train)
                score = accuracy_score(luo_svm_smo.predict(X_valid), y_valid)     
                if score > best_score:
                    best_score = score
                    best_parameters = {'gamma': gamma, 'C': C}
                    best_model = luo_svm_smo
        print("搜索出的最佳参数：",best_parameters)
        train_acc = accuracy_score(best_model.predict(X_train), y_train)
        test_acc = accuracy_score(best_model.predict(X_test), y_test)

        train_scores.append(np.around(train_acc, 4))
        test_scores.append(np.around(test_acc, 4))
        break

    train_mean_list.append(np.around(np.mean(train_scores), 4))
    train_std_list.append(np.around(np.std(train_scores), 4))
    test_mean_list.append(np.around(np.mean(test_scores), 4))
    test_std_list.append(np.around(np.std(test_scores), 4))
    print('train acc mean and std:', np.around(np.mean(train_scores), 4), np.around(np.std(train_scores), 4))
    print('test acc mean and std:', np.around(np.mean(test_scores), 4), np.around(np.std(test_scores), 4))
    print('train acc:', train_scores)
    print('test acc:', test_scores)

数据集id：2，数据集的大小：(351, 33)
gamma：3.0517578125e-05, C：0.03125
已满足kkt！
gamma：3.0517578125e-05, C：0.125
已满足kkt！
gamma：3.0517578125e-05, C：0.5
已满足kkt！
gamma：3.0517578125e-05, C：2
已满足kkt！
gamma：3.0517578125e-05, C：8
已满足kkt！
gamma：3.0517578125e-05, C：32
已满足kkt！
gamma：3.0517578125e-05, C：128
已满足kkt！
gamma：3.0517578125e-05, C：512
已满足kkt！
gamma：3.0517578125e-05, C：2048
已满足kkt！
gamma：3.0517578125e-05, C：8192
已满足kkt！
gamma：3.0517578125e-05, C：32768
已满足kkt！
gamma：0.0001220703125, C：0.03125
已满足kkt！
gamma：0.0001220703125, C：0.125
已满足kkt！
gamma：0.0001220703125, C：0.5
已满足kkt！
gamma：0.0001220703125, C：2
已满足kkt！
gamma：0.0001220703125, C：8
已满足kkt！
gamma：0.0001220703125, C：32
已满足kkt！
gamma：0.0001220703125, C：128
已满足kkt！
gamma：0.0001220703125, C：512
已满足kkt！
gamma：0.0001220703125, C：2048
已满足kkt！
gamma：0.0001220703125, C：8192
已满足kkt！
gamma：0.0001220703125, C：32768
已满足kkt！
gamma：0.00048828125, C：0.03125
已满足kkt！
gamma：0.00048828125, C：0.125
已满足kkt！
gamma：0.00048828125, C：0.5
已满足kkt！
gamma：0.00048828125, C：2
已满足k

In [19]:
best_model.opt_value

0.4300933306754818

### Test

In [3]:
X, y, random_state = load_data(dataset=3)

length = int(len(y) * 0.8)
X_train, X_test, y_train, y_test = X[:length], X[length:], y[:length], y[length:]

len(y_train)

455

In [4]:
gamma, C = 0.03125, 1
luo_svm_smo = L2_SVC_NCH_Python(C=C, 
                       gamma=gamma, 
                       max_iter=2000, 
                       epsilon=1e-3, 
                       need_optValue=True) 

# def gram_matrix(X1, X2):
#       # K = np.zeros((len(X1), len(X1)), dtype=np.float64)
#       K = np.exp(-gamma * ((X1**2).sum(1).reshape(-1, 1) + (X1**2).sum(1) - 2 * X1 @ X1.T))
#       K += 1.0 / C * np.identity(len(X1))
#       return K
# luo_svm_smo = L2_SVC_NCH_ByL2SVC(kernel_mat=gram_matrix, 
#                                  kernel_name='kernel_gaussian', 
#                                  gamma=gamma, 
#                                  C=np.PINF, 
#                                  myC=C, 
#                                  max_iter=2000, 
#                                  tol=1e-3)
luo_svm_smo.fit(X_train, y_train)
y_pred = luo_svm_smo.predict(X_test)
print('Accuracy of Luo_SVM by SMO: ',
      accuracy_score(y_test, y_pred))

已满足kkt！
Accuracy of Luo_SVM by SMO:  0.41228070175438597


In [5]:
luo_svm_smo.opt_value

0.03572692661867557

####