In [1]:
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
import seaborn as sns
import skorch
from skorch_extra.netbase import NeuralNetBase, NeuralNetClassifier, NeuralNetTransformer
import sys
import numpy as np
sys.path.append('..')
from benchmarks.RPDBCS.models.RPDBCS2020Net import RPDBCS2020Net

plt.rcParams['figure.figsize']=(24,12)
plt.rcParams['figure.dpi']=128
plt.rcParams['font.size']=18

# New

Let us encode all datasets

In [2]:
from vibdata.datahandler.transforms.TransformDataset import PickledDataset
from benchmarks.RPDBCS.datasets import TransformsDataset
# from benchmarks.RPDBCS.experiment_vibnet import DEFAULT_NETPARAMS, DEFAULT_OPTIM_PARAMS, NetPerDomain, MetricNet
from benchmarks.RPDBCS.experiment_finetunning import DEFAULT_NETPARAMS, DEFAULT_OPTIM_PARAMS, MetricNetPerDomain, MetricNet, NetPerDomain

def encode_datas(model_fpath='../saved_models/10-02-2022/train_end_vibnet_mfpt-cwru-pu.pt'):
    module_params = {
        # 'n_classes': None,
        'n_domains': 4,
        'encode_size': 32, 'input_size': 6100,
    }
    module_params = {"module__"+key: v for key, v in module_params.items()}
    module_params['module'] = MetricNetPerDomain
    # module_params['module'] = MetricNet

    vibnet = NetPerDomain(**DEFAULT_NETPARAMS, **module_params, **DEFAULT_OPTIM_PARAMS)
    vibnet.initialize()
    vibnet.load_params(f_params=model_fpath)
    data_names = ['rpdbcs', 'mfpt', 'pu', 'cwru','UOC']

    Xf = []
    Yf = []
    Sf = []
    for dname in data_names:
        D = PickledDataset('/tmp/sigdata_cache/%s' % dname)
        X = np.empty((len(D), 6100), dtype=np.float32)
        Y = np.empty(len(D), dtype=int)
        for i, x in enumerate(D):
            X[i] = x['signal']
            Y[i] = x['label']

        Xe = vibnet.transform({'X': X, 'domain': [0]*len(D)})
        Sf.append([dname]*len(D))
        Xf.append(Xe)
        Yf.append(Y)

    Xe = np.vstack(Xf)
    Y = np.hstack(Yf)
    Domain = np.hstack(Sf)
    return Xe,Y,Domain

Xe,Y,Domain = encode_datas(model_fpath='../saved_models/tripletnet/train_end_vibnet_cwru-UOC-pu-rpdbcs.pt')
# encode_datas(model_fpath='../saved_models/tripletnet/train_end_vibnet_cwru-UOC-pu-rpdbcs.pt')

TypeError: __init__() missing 1 required positional argument: 'criterion'

Plot all datasets (PCA)

In [4]:
from vibdata.datahandler.transforms.TransformDataset import PickledDataset
from benchmarks.RPDBCS.datasets import TransformsDataset
from benchmarks.RPDBCS.experiment_finetunning import DEFAULT_NETPARAMS, DEFAULT_OPTIM_PARAMS, MLP6ClassifierPerDomain, MyNet, _transform_output
from sklearn.decomposition import PCA
from ipywidgets import interact
import os


# @interact(fname=['train_end_vibnet_mfpt-cwru-pu','train_end_vibnet_cwru-pu-rpdbcs','train_end_vibnet_mfpt-cwru-rpdbcs','train_end_vibnet_mfpt-pu-rpdbcs'])
@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:]=='.pt'])
def _f(fname):
    module_params = {
        # 'n_classes': None,
        'n_domains': 4,
        'encode_size': 32, 'input_size': 6100,
    }
    module_params = {"module__"+key: v for key, v in module_params.items()}
    module_params['module'] = MetricNetPerDomain
    # module_params = {
    #     'n_classes': None,
    #     'encode_size': 32, 'input_size': 6100,
    # }
    # module_params = {"module__"+key: v for key, v in module_params.items()}
    # module_params['module'] = MetricNet

    vibnet = NetPerDomain(**DEFAULT_NETPARAMS, **module_params, **DEFAULT_OPTIM_PARAMS)

    vibnet = MyNet(**DEFAULT_NETPARAMS, **module_params, **DEFAULT_OPTIM_PARAMS)
    vibnet.initialize();
    vibnet.load_params(f_params='../saved_models/tripletnet/%s' % fname)

    data_names = ['rpdbcs','mfpt','pu','cwru', 'UOC']

    Xf = []
    Yf = []
    Sf = []
    for dname in data_names:
        D = PickledDataset('/tmp/sigdata_cache/%s' % dname)
        X = np.empty((len(D), 6100), dtype=np.float32)
        Y = np.empty(len(D), dtype=int)
        for i, x in enumerate(D):
            X[i] = x['signal']
            Y[i] = x['label']

        Xe = vibnet.transform({'X': X, 'domain': [0]*len(D)})
        Sf.append([dname]*len(D))
        Xf.append(Xe)
        Yf.append(Y)

    Xe = np.vstack(Xf)
    Y = np.hstack(Yf)
    Domain = np.hstack(Sf)


    Xe_pca = PCA(2).fit_transform(Xe)
    df = pd.DataFrame(Xe_pca, columns=['pca1','pca2'])
    df['label']=Y#.astype(str)
    df['domain']=Domain
    df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl:orig_palette[i % 5] for i,dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'
    
    _,(ax1,ax2) = plt.subplots(1,2)
    
    mask1 = df['label']==0
    mask2 = df['label']!=0
    
    # sns.scatterplot(data=df,x='pca1',y='pca2',hue='domain',alpha=0.8, palette=palette, style='domain');
    sns.scatterplot(data=df[mask1],x='pca1',y='pca2',hue='domain',alpha=0.8, palette=palette, style='domain', ax=ax1);
    sns.scatterplot(data=df[mask2],x='pca1',y='pca2',hue='domain',alpha=0.8, palette=palette, style='domain', ax=ax2);
    



interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0.pt',), value='train_end_vibne…

- Claramente há uma distinção dos domínios
- RPDBCS, apesar de não ser treinado, tem um espaço bem-definido.
- CWRU é parecido com todos, mesmo nunca visto.

Eu acho que mais importante agora é achar padrões nos resultados para nos ajudar na construção de futuros modelos.

# Which samples are the most similar across domains?

In [12]:
from itertools import combinations
from scipy.spatial import distance_matrix


def calc_metrics(M, name, axis):
    return {'min_dist_%s' % name: M.min(axis=axis), 'max_dist_%s' % name: M.max(axis=axis),
            'avg_dist_%s' % name: M.mean(axis=axis)}


uniq_domains = np.unique(Domain)
data = {d: {} for d in uniq_domains}
for d1, d2 in combinations(uniq_domains, 2):
    print(d1, d2)
    d1_mask = Domain == d1
    d2_mask = Domain == d2
    Y1, Y2 = Y[d1_mask], Y[d2_mask]
    X1, X2 = Xe[d1_mask], Xe[d2_mask]
    M = distance_matrix(X1, X2)
    metrics = calc_metrics(M, d2, 1)
    data[d1].update(metrics)
    data[d1]['label'] = Y1
    data[d1]['domain'] = np.full(len(Y1), d1)

    metrics = calc_metrics(M, d1, 0)
    data[d2].update(metrics)
    data[d2]['label'] = Y2
    data[d2]['domain'] = np.full(len(Y2), d2)

df = pd.DataFrame()
for _, values in data.items():
    dftmp = pd.DataFrame(values)
    df = pd.concat((df, dftmp), ignore_index=True)
df

UOC cwru
UOC mfpt
UOC pu
UOC rpdbcs
cwru mfpt
cwru pu
cwru rpdbcs
mfpt pu
mfpt rpdbcs
pu rpdbcs


Unnamed: 0,min_dist_cwru,max_dist_cwru,avg_dist_cwru,label,domain,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs,min_dist_UOC,max_dist_UOC,avg_dist_UOC
0,0.051353,0.311074,0.079447,0,UOC,0.059566,0.088860,0.073248,0.028103,0.456045,0.077959,0.049380,0.895800,0.082465,,,
1,0.043154,0.316766,0.072322,0,UOC,0.051367,0.081947,0.065949,0.028333,0.449796,0.070980,0.039760,0.888974,0.078156,,,
2,0.031873,0.328560,0.058430,0,UOC,0.037216,0.067561,0.051704,0.020799,0.449484,0.057559,0.030048,0.889599,0.066341,,,
3,0.056194,0.310265,0.082368,0,UOC,0.064128,0.092529,0.077708,0.045225,0.457722,0.082613,0.046232,0.890270,0.092213,,,
4,0.058014,0.309956,0.084089,0,UOC,0.065460,0.094922,0.079307,0.047110,0.452895,0.083954,0.043009,0.886544,0.092429,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86521,0.007425,0.367445,0.040118,0,rpdbcs,0.027411,0.047775,0.033200,0.025659,0.430040,0.042017,,,,0.058294,0.306201,0.123588
86522,0.005730,0.370089,0.036453,0,rpdbcs,0.023918,0.044346,0.029535,0.021442,0.429407,0.038921,,,,0.053872,0.305564,0.123620
86523,0.005276,0.367792,0.033072,0,rpdbcs,0.019264,0.040732,0.025231,0.017182,0.430027,0.035419,,,,0.052264,0.302852,0.122120
86524,0.004101,0.368876,0.031108,0,rpdbcs,0.017046,0.039902,0.023137,0.014960,0.430804,0.033725,,,,0.050822,0.302069,0.122149


In [17]:
df.groupby(['domain']).mean()

Unnamed: 0_level_0,min_dist_cwru,max_dist_cwru,avg_dist_cwru,label,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs,min_dist_UOC,max_dist_UOC,avg_dist_UOC
domain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
UOC,0.08374,0.364303,0.124236,4.0,0.107648,0.134789,0.12108,0.086638,0.440398,0.125071,0.085407,0.893,0.128981,,,
cwru,,,,4.533814,0.014982,0.042114,0.025011,0.012438,0.433506,0.03582,0.013647,0.884857,0.041006,0.047661,0.294505,0.124236
mfpt,0.00539,0.368363,0.025011,1.05,,,,0.003451,0.434616,0.027695,0.006574,0.885958,0.033706,0.04379,0.294345,0.12108
pu,0.010447,0.370207,0.03582,1.325385,0.012786,0.047601,0.027695,,,,0.013079,0.884207,0.043815,0.04457,0.297239,0.125071
rpdbcs,0.014511,0.374145,0.041006,0.539298,0.026385,0.050449,0.033706,0.021667,0.43389,0.043815,,,,0.055002,0.304451,0.128981


- UOC é o super diferentão. Como mostrado abaixo, há algumas classes que o fazem ser bem diferente.
- RPDBCS é bem diferente dos demais tbm. Pela mesma razão.

In [16]:
df.groupby(['domain','label']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs,min_dist_UOC,max_dist_UOC,avg_dist_UOC
domain,label,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
UOC,0,0.060243,0.306033,0.087024,0.068248,0.097528,0.082077,0.046167,0.455795,0.086716,0.04841,0.888602,0.095314,,,
UOC,1,0.064373,0.296959,0.092757,0.073648,0.10223,0.087389,0.041278,0.462307,0.092048,0.062424,0.894518,0.100223,,,
UOC,2,0.067792,0.403922,0.094984,0.078339,0.099616,0.091718,0.074054,0.414853,0.096349,0.077198,0.88737,0.102467,,,
UOC,3,0.132919,0.445477,0.193519,0.180831,0.205499,0.193761,0.129416,0.330296,0.194525,0.105699,0.896592,0.189216,,,
UOC,4,0.079832,0.361353,0.101997,0.07646,0.109789,0.096917,0.06721,0.391056,0.10144,0.084487,0.847854,0.108995,,,
UOC,5,0.051974,0.372569,0.143269,0.119586,0.150376,0.136953,0.093987,0.474262,0.140422,0.119418,0.916863,0.143894,,,
UOC,6,0.076906,0.391449,0.114711,0.105685,0.132098,0.114776,0.09682,0.483477,0.120304,0.050218,0.915784,0.122128,,,
UOC,7,0.065223,0.333635,0.092605,0.077103,0.109563,0.087356,0.059349,0.469843,0.09233,0.066254,0.910988,0.090362,,,
UOC,8,0.154399,0.36733,0.197256,0.188931,0.206401,0.198773,0.17146,0.481692,0.201508,0.154558,0.878427,0.208226,,,
cwru,0,,,,0.027215,0.048231,0.032611,0.025436,0.441256,0.042191,0.023465,0.884616,0.051643,0.053542,0.296345,0.124536


- Normal é quase sempre o mais proximo do resto
- PU tem exemplos parecidos com defeitos do rpdbcs.
- Defeito 2 do rpdbcs é muito diferente do resto.

Conclusão:  
Talvez seja melhor focar em criar diferentes modulos para cada classe.