In [40]:
import dtw
import pandas
import numpy as np
import random
from matplotlib import pyplot as plt

### URC数据集
Ｍ为训练集条数，Ｋ为测试集条数，Ｎ为序列长度

- train_series：训练数据集，每一行一个序列，为Ｍ×Ｎ的矩阵
- train_serise_labels：训练数据集标签，为Ｍ×１的列向量
- test_series：测试数据集，每一行一个序列，为Ｋ×Ｎ的矩阵
- test_series_labels：测试数据集标签，为Ｋ×１的列向量

In [41]:
def get_data(data_name):
    '''
    获取数据
    paras：data_name(数据名)
    return：train_series（训练数据集），train_series_labels（训练数据集标签），test_series（测试数据集），test_series_labels（测试数据集标签）
    '''
    train = pandas.read_csv(r'../data/'+ data_name + r'/'+ data_name + r'_TRAIN.tsv', sep='\t').values[:, :]    
    train_series = train[:, 1:]
    train_series_labels = train[:, 0]

    test = pandas.read_csv(r'../data/'+ data_name + r'/'+ data_name + r'_TEST.tsv', sep='\t').values[:, :]

    random_arange = np.arange(0, test.shape[0])
    random.shuffle(random_arange)
    
    test_series = test[random_arange[0:50], 1:]
    test_series_labels = test[random_arange[0:50], 0]    
    
    return train_series, train_series_labels, test_series, test_series_labels

### K近邻算法

In [42]:
class K_nearest_neighor():
    
    K = 1 # 初始为最近邻
    train_series = [] # 训练数据
    train_series_labels = [] # 训练数据标签
    dist_lambda = lambda x, y: np.abs(x - y) # DTW判据，初始化为欧几里得距离
    
    def __init__(self, train_series, train_series_labels, K=1,  dist_lambda=lambda x, y: np.abs(x - y)):
        
        self.K = K
        self.train_series = train_series
        self.train_series_labels = train_series_labels
        self.dist_lambda = dist_lambda
    
    
    def predict(self, test_seris):
        
        predict_labels = [] #预测标签
        
        for test_series_signal in test_series:
            t_dis = [] # 每一个序列与所有训练样本的相似度
            
            for train_series_signal in train_series:
                # DTW计算序列相似度
                dis, _, _, _ = dtw.accelerated_dtw(test_series_signal.reshape(-1,1),
                                                   train_series_signal.reshape(-1,1), 
                                                   dist=self.dist_lambda)
                t_dis.append(dis)
            
            # 与测试样本最相近的Ｋ个训练样本
            nearest_series_labels = np.array(train_series_labels[np.argpartition(t_dis, self.K)[:self.K]]).astype(int)
            # 分类类别
            preditc_labels_signal = np.argmax(np.bincount(nearest_series_labels))
            predict_labels.append(preditc_labels_signal)
            
        return predict_labels

In [45]:
# 所有数据名
data_names = [
    'SmoothSubspace',
    'ItalyPowerDemand',
    'Chinatown',
    'SonyAIBORobotSurface2',
    'TwoLeadECG', 
    'MoteStrain',
    'CBF',
    'BME',
]
# 不同的数据在不同的K值下的精度
accuracies = []

for d in data_names:
    
    accuracy = [] # 同种数据在不同的K值下的精度数组
   
    train_series, train_series_labels, test_series, test_series_labels = get_data(d) # 获取数据
    
    # 不同的K值预测
    for k in range(1, 6):
        my_knn = K_nearest_neighor(train_series=train_series, 
                                   train_series_labels=train_series_labels, 
                                   K=k)
                # 预测标签
        predict_labels = my_knn.predict(test_series)
        # 精度计算
        c = np.mean(np.array(predict_labels) == test_series_labels)
        accuracy.append(c)
        print(d, k, c)
    print("-----------------------------------------------------")
    accuracies.append(accuracy)
    with open("./accuracy.txt", "a+") as f:
        f.write(str(accuracy))

SmoothSubspace 1 0.86
SmoothSubspace 2 0.86
SmoothSubspace 3 0.9
SmoothSubspace 4 0.92
SmoothSubspace 5 0.92
-----------------------------------------------------
ItalyPowerDemand 1 0.94
ItalyPowerDemand 2 0.9
ItalyPowerDemand 3 0.92
ItalyPowerDemand 4 0.94
ItalyPowerDemand 5 0.94
-----------------------------------------------------
Chinatown 1 0.94
Chinatown 2 0.94
Chinatown 3 1.0
Chinatown 4 0.92
Chinatown 5 0.96
-----------------------------------------------------
SonyAIBORobotSurface2 1 0.8
SonyAIBORobotSurface2 2 0.72
SonyAIBORobotSurface2 3 0.7
SonyAIBORobotSurface2 4 0.7
SonyAIBORobotSurface2 5 0.68
-----------------------------------------------------
TwoLeadECG 1 0.84
TwoLeadECG 2 0.76
TwoLeadECG 3 0.78
TwoLeadECG 4 0.74
TwoLeadECG 5 0.76
-----------------------------------------------------
MoteStrain 1 0.94
MoteStrain 2 0.9
MoteStrain 3 0.92
MoteStrain 4 0.9
MoteStrain 5 0.92
-----------------------------------------------------
CBF 1 1.0
CBF 2 1.0
CBF 3 0.98
CBF 4 0.98
CB