In [None]:
import sys
import os
sys.path.append(os.getcwd()[:-5])
from sklearn import preprocessing
from src.timeseries.TimeSeriesLoader import uv_load
from  src.classification.WEASELClassifier import *
from src.timeseries.TimeSeries import TimeSeries
import pandas as pd
from os.path import dirname
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import pickle


def covert_list_to_dic(XX_list):
    result_dic ={}
    for i,item in enumerate(XX_list):
        result_dic[item]=i
    return result_dic

def load_WEASEL_result(name, ratio_number_list, ind_number_list  = [0,1,2,3,4,5,6,7,8,9]):
    father_path = './weasel_acc_log/' + name
    path = father_path + '/' + name + '_log.txt'
    ratio_dic = covert_list_to_dic(ratio_number_list)
    ind_dic = covert_list_to_dic(ind_number_list)
    proto_result = np.zeros([len(ratio_number_list), len(ind_number_list)])
    try:
        f = open(path, "r")
        for x in f:
            temp = x.split('\t')
            if float(temp[3]) == 1:
                proto_result[-1:, :] = np.ones((1, len(ind_number_list))) * float(temp[-1])
                continue

            proto_result[ratio_dic[float(temp[3])]][ind_dic[float(temp[5])]] = float(temp[-1])
    except:
        print(name, 'cannot find')

    return proto_result


def TSC_data_loader(dataset_name):
    Train_dataset = np.loadtxt(
        dirname(os.getcwd())+ '/datasets/UCRArchive_2018/' + dataset_name + '/' + dataset_name + '_TRAIN.tsv')
    Test_dataset = np.loadtxt(
        dirname(os.getcwd())+ '/datasets/UCRArchive_2018/' + dataset_name + '/' + dataset_name + '_TEST.tsv')
    Train_dataset = Train_dataset.astype(np.float32)
    Test_dataset = Test_dataset.astype(np.float32)

    X_train = Train_dataset[:, 1:]
    y_train = Train_dataset[:, 0:1]

    X_test = Test_dataset[:, 1:]
    y_test = Test_dataset[:, 0:1]
    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y_train, axis=1))
    y_train = le.transform(np.squeeze(y_train, axis=1))
    y_test = le.transform(np.squeeze(y_test, axis=1))

    # X_train = np.nan_to_num(X_train)
    # X_test  = np.nan_to_num(X_test)

    return X_train, y_train, X_test, y_test

def save_to_log(sentence,dataset_name):
    father_path = './weasel_acc_log/' + dataset_name
    if not os.path.exists(father_path):
        os.makedirs(father_path)
    path = father_path+ '/' +dataset_name+ '_log.txt'
    with open(path, "a") as myfile:
        myfile.write(sentence+'\n')


def good_back_to_bad(x_train,y_train,x_test,y_test):
    train = {}
    test = {}
    
    train_data = np.concatenate((np.expand_dims(y_train, axis=-1),x_train), axis=1)
    test_data = np.concatenate((np.expand_dims(y_test, axis=-1),x_test), axis=1)
    train_raw = pd.DataFrame(data=train_data)
    test_raw = pd.DataFrame(data=test_data)

    train["Type"] = "UV"
    train["Samples"] = train_raw.shape[0]
    train["Size"] = train_raw.shape[1] - 1
    train["Labels"] = []

    test["Type"] = "UV"
    test["Samples"] = test_raw.shape[0]
    test["Size"] = test_raw.shape[1] - 1
    test["Labels"] = []

    for i in range(train["Samples"]):
        label = int(train_raw.iloc[i, 0])
        train["Labels"].append(label)
        series = train_raw.iloc[i, 1:].tolist()
        train[i] = TimeSeries(series, label)
        train[i].NORM(True)

    for i in range(test["Samples"]):
        label = int(test_raw.iloc[i, 0])
        test["Labels"].append(label)
        series = test_raw.iloc[i, 1:].tolist()
        test[i] = TimeSeries(series, label)
        test[i].NORM(True)
    return train, test

def save_mode(model,name):
    father_path = os.path.join(os.getcwd(),'Weasel_model',name)
    if not os.path.exists(father_path):
        os.makedirs(father_path)
    path =os.path.join(father_path,name+'_model.pkl')
    print(path)
    with open(path, 'wb') as output:
        pickle.dump(model, output, pickle.HIGHEST_PROTOCOL)

def obtain_boss_feature_and_save_model(name):
    train, test = uv_load(name)
    WEASEL = WEASELClassifier(name)
    scores = WEASEL.eval(train,test)
    return scores

name_list = [
    'ArrowHead',
    'BME',
    'CBF',
    'Chinatown',
    'ECG200',
    'GunPoint',
    'GunPointAgeSpan',
    'GunPointOldVersusYoung',
    'ItalyPowerDemand',
    'MoteStrain',
    'Plane',
    'SonyAIBORobotSurface1',
    'SonyAIBORobotSurface2',
    'SyntheticControl',
    'ToeSegmentation1',
    'TwoLeadECG',
    'UMD',
    'Wine',
]

train_ratio_list = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
for name in name_list:
    X_train_ori, y_train_ori, X_test, y_test = TSC_data_loader(name)
    
    
    for train_ind_index, train_ratio in enumerate(train_ratio_list):  
        
        if train_ratio ==1:
            continue
        sss = StratifiedShuffleSplit(n_splits=10, test_size=1 - train_ratio, random_state=0)
        sss.get_n_splits(X_train_ori, y_train_ori)
        
        
        ind = 0
        
        result_matrix=load_WEASEL_result(name, train_ratio_list)
        if (result_matrix[train_ind_index+1][ind]!=0) and (train_ratio!=0.9):
            print(train_ratio,ind,'already done result = ', result_matrix[train_ind_index])
            continue
        
        for train_index, test_index in sss.split(X_train_ori, y_train_ori):
            result_matrix=load_WEASEL_result(name, train_ratio_list)
            if  result_matrix[train_ind_index][ind]!=0:
                print(train_ratio,ind,'already done result = ', result_matrix[train_ind_index][ind])
                ind =ind+1
                continue    

            try:
                print(ind)
                X_train = X_train_ori[train_index,:]
                y_train = y_train_ori[train_index]
                train, test = good_back_to_bad(X_train, y_train, X_test, y_test)
                WEASEL = WEASELClassifier(name)
                scores = WEASEL.eval(train, test)
                
                acc = float(scores[0].split('; ')[-1])
                sentence = 'dataset_name=\t'+name+'\t'+'ratio=\t'+str(train_ratio)+'\t'+'ind=\t'+str(ind)+'\t'+'test_acc=\t'+str(acc)
                save_to_log(sentence,name)
            except:
                print(name,train_ratio,ind)
            ind = ind+1

    