In [1]:
# https://plotly.com/python/t-sne-and-umap-projections/

from glob import glob
from itertools import product
from noise import add_noise, decompress_pickle
import mat73
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from training_sktime import normalizing, format_dataframe
from sklearn.metrics import accuracy_score
import pickle
from  sktime.transformations.panel.reduce import Tabularizer
from sklearn.manifold import TSNE
import plotly.express as px
import plotly

MODEL_PATH = './models/new_dataset/'
INPUT_DATA_PATH = '../input-data/'

In [2]:
def open_tabular_data(signal, cycle):
    X_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/X_train')
    y_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/y_train')
    X_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/X_val')
    y_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/y_val')

    X_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/X_train')
    y_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/y_train')
    X_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/X_val')
    y_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/y_val')

    X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)
    y_train = np.concatenate([y_train_flavio, y_train_robson])
    X_val = pd.concat([X_val_flavio, X_val_robson]).reset_index(drop=True)
    y_val = np.concatenate([y_val_flavio, y_val_robson])

    t= Tabularizer()
    X_train_flavio = t.fit_transform(X_train_flavio)
    X_train_robson = t.fit_transform(X_train_robson)
    X_train = t.fit_transform(X_train)
    feat_cols = list(X_train_flavio.columns)
    
    X_train_flavio['y'] = y_train_flavio
    X_train_robson['y'] = y_train_robson
    X_train['y'] = y_train
    
    return X_train_flavio, X_train_robson, X_train, feat_cols

In [3]:
def plot_tsne(X, n, string='', save=False):
    features = X.iloc[:, :-2]
    tsne = TSNE(n_components=n, random_state=0)
    projections = tsne.fit_transform(features)
    s_title = '1' if string == 'cycle_1' else '1/' + string.split('_')[-1]
    title = f"t-SNE para dados de treinamento com {s_title} ciclo pós falta."
    hue_order = ['AT', 'BT', 'CT', 'AB', 'BC', 'CA', 'ABT', 'BCT', 'CAT', 'ABC']
    arg_dict = {'symbol': X.y_set, 'symbol_sequence': ['circle', 'cross'],
                'color': X.y, 'labels': {'color': 'y'}, 'category_orders': {'color': hue_order},
                'width': 1200, 'height': 800, 'title': title}

    if n == 2:
        size = 12
        fig = px.scatter(projections, x=0, y=1, **arg_dict)
    else:
        size = 6
        fig = px.scatter_3d(projections, x=0, y=1, z=2, **arg_dict)

    fig.update_traces(marker=dict(size=size, line=dict(width=1.5, color='DarkSlateGrey')),
                    selector=dict(mode='markers'))
    if not save:
        fig.show()
    else:
        fig.write_image(f"./figs/tsne-plots-all/tsne_{string}_cicle_{n}D.svg") 

# Gerando plot dos sinais transformados

In [4]:
cycle = ["cycle_1", "cycle_2", "cycle_4", "cycle_8", "cycle_16", "cycle_32", "cycle_64", "cycle_128"]
features = [100, 300, 400, 400, 300, 500, 300, 700]
cycle_features = dict(zip(cycle, features))

In [5]:
signal = 'i'
for c, n in cycle_features.items():
    X_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{c}/X_train')
    y_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{c}/y_train')
    X_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{c}/X_val')
    y_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{c}/y_val')

    X_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{c}/X_train')
    y_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{c}/y_train')
    X_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{c}/X_val')
    y_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{c}/y_val')

    with open("./models/new_dataset/" + f'rocket_{c}_{n}.pkl', 'rb') as f:
        minirocket = pickle.load(f)

    with open("./models/new_dataset/" + f'rocket_{c}_max_values_{n}.pkl', 'rb') as f:
        max_values = pickle.load(f)

    X_train_flavio = normalizing(X_train_flavio, max_values)
    X_train_flavio = minirocket.transform(X_train_flavio)

    X_train_robson = normalizing(X_train_robson, max_values)
    X_train_robson = minirocket.transform(X_train_robson)
    
    X_train_flavio['y'] = y_train_flavio
    X_train_robson['y'] = y_train_robson

    X_train_flavio['y_set'] = 'dataset 1'
    X_train_robson['y_set'] = 'dataset 2'

    X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)
    
    plot_tsne(X_train, 2, c, save=True)
    plot_tsne(X_train, 3, c, save=True)

# Testes

In [6]:
signal, cycle = 'i', 'cycle_1'
X_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/X_train')
y_train_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/y_train')
X_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/X_val')
y_val_flavio = decompress_pickle(INPUT_DATA_PATH + f'folds/{signal}/{cycle}/y_val')

X_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/X_train')
y_train_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/y_train')
X_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/X_val')
y_val_robson = decompress_pickle(INPUT_DATA_PATH + f'folds-robson/{signal}/{cycle}/y_val')

# X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)
# y_train = np.concatenate([y_train_flavio, y_train_robson])
# X_val = pd.concat([X_val_flavio, X_val_robson]).reset_index(drop=True)
# y_val = np.concatenate([y_val_flavio, y_val_robson])
# feat_cols = list(X_train_flavio.columns)

In [7]:
with open("./models/new_dataset/" + f'rocket_{cycle}_100.pkl', 'rb') as f:
    minirocket_cycle_1 = pickle.load(f)

with open("./models/new_dataset/" + f'rocket_{cycle}_max_values_100.pkl', 'rb') as f:
    max_values_cycle_1 = pickle.load(f)

X_train_flavio = normalizing(X_train_flavio, max_values_cycle_1)
X_train_flavio = minirocket_cycle_1.transform(X_train_flavio)

X_train_robson = normalizing(X_train_robson, max_values_cycle_1)
X_train_robson = minirocket_cycle_1.transform(X_train_robson)

In [8]:
X_train_flavio['y'] = y_train_flavio
X_train_robson['y'] = y_train_robson

X_train_flavio['y_set'] = 'dataset 1'
X_train_robson['y_set'] = 'dataset 2'

X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)

In [9]:
plot_tsne(X_train_robson, 2, '1', save=False)

In [10]:
plot_tsne(X_train_robson, 3, '1', save=False)

In [53]:
signal = 'i'
cycle = 'cycle_1'
X_train_flavio, X_train_robson, _, feat_cols = open_tabular_data(signal, cycle)

X_train_flavio['y_set'] = 'dataset 1'
X_train_robson['y_set'] = 'dataset 2'

plot_tsne(X_train_flavio, 2, '1')

In [None]:
X_train_flavio['y'] = y_train_flavio
X_train_robson['y'] = y_train_robson
X_train['y'] = y_train

In [7]:
max_values_cycle_1

[6075.605580855044, 6077.654717323059, 6068.883200964617, 3100.6294160039506]

In [5]:
X_train_flavio

Unnamed: 0,A,B,C,Z,y
310,0 -598.765221 1 -597.653301 2 -593...,0 239.535111 1 226.758458 2 213...,0 358.440861 1 373.648332 2 ...,0 -0.496481 1 -0.529718 2 -0...,CAT
14,0 -478.212278 1 -468.870099 2 -459...,0 -78.813581 1 -93.685318 2 -...,0 556.186337 1 561.534912 2 566...,0 -1.305999e-05 1 1.711759e-05 2 ...,BC
743,0 495.123521 1 486.504819 2 477...,0 49.269891 1 63.342346 2 ...,0 -544.400164 1 -549.994177 2 -555...,0 0.612399 1 -1.302051 2 -0...,BT
688,0 -227.175710 1 -243.991815 2 -...,0 596.392428 1 598.024542 2 599...,0 -373.008355 1 -363.924544 2 -...,0 0.000049 1 -0.000004 2 0.00005...,CA
616,0 -522.038921 1 -528.520150 2 -534...,0 523.537090 1 513.335727 2 506...,0 -0.422278 1 13.298228 2 29...,0 -0.000028 1 0.000003 2 -0.00001...,BC
...,...,...,...,...,...
347,0 -318.541943 1 -331.094376 2 -...,0 601.028672 1 600.966600 2 600...,0 -284.454289 1 -270.559656 2 -257...,0 -0.315516 1 0.340797 2 0...,AT
552,0 -571.616737 1 -564.713827 2 -...,0 116.337477 1 100.090437 2 84...,0 455.426342 1 465.941814 2 474...,0 0.122364 1 0.455934 2 -0...,ABT
841,0 -534.154784 1 -526.362102 2 -518...,0 24.226209 1 10.717093 2 -6...,0 508.654310 1 514.227047 2 ...,0 -0.348723 1 -0.137317 2 -0...,BCT
133,0 -351.137013 1 -365.288353 2 -...,0 596.773598 1 599.613805 2 ...,0 -244.346261 1 -231.408248 2 -217...,0 0.000088 1 0.000004 2 -0.00012...,AB


# Generating plot dos sinais originais

In [12]:
cycles = ['1', '1/2', '1/4', '1/8', '1/16', '1/32', '1/64', '1/128']
signal = 'i'
for item in cycles:
    denominador = item.split('/')[-1]
    cycle = f'cycle_{denominador}'
    X_train_flavio, X_train_robson, _, feat_cols = open_tabular_data(signal, cycle)
    X_train_flavio['y_set'] = 'dataset 1'
    X_train_robson['y_set'] = 'dataset 2'
    X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)
    plot_tsne(X_train, 2, cycle, save=True)
    plot_tsne(X_train, 3, cycle, save=True)

# 1 ciclo pós falta

In [4]:
signal = 'i'
cycle = 'cycle_1'
X_train_flavio, X_train_robson, _, feat_cols = open_tabular_data(signal, cycle)

X_train_flavio['y_set'] = 'dataset 1'
X_train_robson['y_set'] = 'dataset 2'

X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)

In [5]:
plot_tsne(X_train, 2, '1')

In [6]:
plot_tsne(X_train, 3, '1')

# 1/32 Ciclo Pós Falta

In [7]:
signal = 'i'
cycle = 'cycle_32'
X_train_flavio, X_train_robson, _, feat_cols = open_tabular_data(signal, cycle)

X_train_flavio['y_set'] = 'dataset 1'
X_train_robson['y_set'] = 'dataset 2'

X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)

In [8]:
plot_tsne(X_train, 2, '1/32')

In [9]:
plot_tsne(X_train, 3, '1/32')

# 1/128 Ciclo Pós Falta

In [10]:
signal = 'i'
cycle = 'cycle_128'
X_train_flavio, X_train_robson, _, feat_cols = open_tabular_data(signal, cycle)

X_train_flavio['y_set'] = 'dataset 1'
X_train_robson['y_set'] = 'dataset 2'

X_train = pd.concat([X_train_flavio, X_train_robson]).reset_index(drop=True)

In [11]:
plot_tsne(X_train, 2, '1/128')

In [12]:
plot_tsne(X_train, 3, '1/128')