In [1]:
import sys
import random
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append("/home/share/libraries/bdpy")
import bdpy
from bdpy.preproc import select_top
from bdpy.ml import add_bias, make_cvindex
from bdpy.stats import corrcoef

from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

from util import make_dataset
from util import cross_validation

In [2]:
v_spec = np.load("record/ventricle_spectrum.npy")
a_spec = np.load("record/article_spectrum.npy")
# list for sampling features
NUM_FEATURES = [i for i in range(1, 100)] + [i * 50 for i in range(2, 45)]

In [None]:
# use linar SVM and calculate the score
def classify(train_data, test_data, train_label, test_label, n_features):
    # scale data
    scaler = StandardScaler()
    scaler.fit(train_data)
    scaled_train_data = scaler.transform(train_data)
    scaled_test_data = scaler.transform(test_data)
    # train classifier
    clf = LinearSVC()
    # use n_features
    clf.fit(scaled_train_data[:, : n_features], train_label)
    # get score
    score = clf.score(scaled_test_data[:, :n_features], test_label)

    return score

In [None]:
%matplotlib
#put data to score_record and get sum 
score_record = []
line, = plt.plot(0,0)
plt.xlim(0,2250)
plt.ylim(0.45, 0.75)
plt.axhline(0.5, linestyle="--", color="black")
trial = 0
for count, num_features in enumerate(NUM_FEATURES):
    if num_features % 100 == 0:
        print("number of features:{0}".format(num_features))
    accuracy = []
    for i in range(10):
        data, label = make_dataset(v_spec, a_spec)
        cross_valid = cross_validation(data, label, n=10)

        # for each train-test in cross validation
        for  train_data, test_data, train_label, test_label in cross_valid:
            score = classify(train_data, test_data, train_label, test_label, num_features)
            accuracy.append(score)
    score_record.append(accuracy)
    mean = np.array(score_record)[:count].mean(axis=1)
    line.set_data(NUM_FEATURES[:count], mean)
    plt.pause(0.001)
    

In [None]:
score_record = np.array(score_record)
mean = score_record.mean(axis=1)
std = score_record.std(axis=1)

In [None]:
%matplotlib inline
# plot figure
plt.plot(NUM_FEATURES, mean)
plt.fill_between(NUM_FEATURES, mean - std, mean + std, alpha = 0.2, color = "green")
plt.xlim(0, 2250)
plt.ylim((0.4, 0.75))
plt.axhline(0.5, linestyle="--", color="black")