# experiment_finetunning2

In [1]:
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
import seaborn as sns
import skorch
from skorch_extra.netbase import NeuralNetBase, NeuralNetClassifier, NeuralNetTransformer
import sys
import numpy as np
sys.path.append('..')
from benchmarks.RPDBCS.models.RPDBCS2020Net import RPDBCS2020Net

plt.rcParams['figure.figsize']=(24,12)
plt.rcParams['figure.dpi']=128
plt.rcParams['font.size']=18

In [3]:
from vibdata.datahandler.transforms.TransformDataset import PickledDataset
from benchmarks.RPDBCS.datasets import TransformsDataset
from benchmarks.RPDBCS.experiment_finetunning2 import DEFAULT_NETPARAMS, DEFAULT_OPTIM_PARAMS, _transform_output, NetPerDomain, MetricNetPerDomain
from sklearn.decomposition import PCA
from ipywidgets import interact
import os
from benchmarks.RPDBCS.coral import CoralLoss
from sklearn.manifold import TSNE, Isomap

# @interact(fname=['train_end_vibnet_mfpt-cwru-pu','train_end_vibnet_cwru-pu-rpdbcs','train_end_vibnet_mfpt-cwru-rpdbcs','train_end_vibnet_mfpt-pu-rpdbcs'])

DATA_NAMES = ['rpdbcs', 'mfpt', 'pu', 'cwru', 'UOC', 'XJTU']

def encode_features(fname, sampling):
    module_params = {
        'n_domains': 5,
        'encode_size': 32, 'input_size': 6100,
        'head_encode_size': 8,
        'backbone': RPDBCS2020Net 
    }
    module_params = {"module__"+key: v for key, v in module_params.items()}
    module_params['module'] = MetricNetPerDomain
    # module_params['module'] = MetricNet

    net_params = DEFAULT_NETPARAMS.copy()
    ### Criterion parameters ###
    net_params.update({
        # 'device':'cuda',
        'max_epochs': 100,
        'criterion': CoralLoss,
        'criterion__clf_loss': None,
        'criterion__lamb': 1.0,
        'batch_size': 128
    })
    ############################

    vibnet = NetPerDomain(**net_params, **module_params, **DEFAULT_OPTIM_PARAMS)
    vibnet.initialize()
    vibnet.load_params(f_params='../saved_models/coral_analysis/%s' % fname)

    

    Xf = []
    Yf = []
    Sf = []
    for dname in DATA_NAMES:
        D = PickledDataset('/tmp/sigdata_cache/%s' % dname)
        n = len(D)
        n2 = int(n*sampling)
        idxs = np.random.permutation(n)[:n2]
        X = np.empty((n2, 6100), dtype=np.float32)
        Y = np.empty(n2, dtype=int)
        for i, j in enumerate(idxs):
            X[i] = D[j]['signal']
            Y[i] = D[j]['label']

        Xe = vibnet.transform({'X': X, 'domain': [0]*n2})
        Sf.append([dname]*n2)
        Xf.append(Xe)
        Yf.append(Y)

    Xe = np.vstack(Xf)
    Y = np.hstack(Yf)
    Domain = np.hstack(Sf)
    return Xe, Y, Domain


def dimension_reduction(fname, sampling):
    Xe, Y, Domain = encode_features(fname, sampling)

    # dim_rec_alg = Isomap(n_components=2, n_jobs=6)
    # dim_rec_alg = PCA(2)
    dim_rec_alg = TSNE(2, init='pca', learning_rate='auto', n_jobs=6, n_iter=1000)
    Xe_pca = dim_rec_alg.fit_transform(Xe)
    df = pd.DataFrame(Xe_pca, columns=['comp1', 'comp2'])
    df['label'] = Y  # .astype(str)
    df['domain'] = Domain
    return df, Xe, Y


@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    # sns.scatterplot(data=df,x='pca1',y='pca2',hue='domain',alpha=0.8, palette=palette, style='domain');
    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0_lamb1.pt', 'train_end_vibnet_…

In [49]:
@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0_lamb1.pt', 'train_end_vibnet_…

In [54]:
def dimension_reduction(fname, sampling):
    Xe, Y, Domain = encode_features(fname, sampling)

    # dim_rec_alg = Isomap(n_components=2, n_jobs=6)
    dim_rec_alg = PCA(2)
    # dim_rec_alg = TSNE(2, init='pca', learning_rate='auto', n_jobs=6, n_iter=1000)
    # Xe_pca = dim_rec_alg.fit_transform(Xe)
    Xe_pca = Xe[:,[2,3]]
    df = pd.DataFrame(Xe_pca, columns=['comp1', 'comp2'])
    df['label'] = Y  # .astype(str)
    df['domain'] = Domain
    return df, Xe, Y

In [55]:
@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0_lamb1.pt', 'train_end_vibnet_…

Pode ser que os head_classifiers estão fazendo o feature_space do backnone ser fácil de separar, pois isso pode facilitar para classificar. 
Talvez seja melhor criar um backbone com dois feature_space onde somente um é aplicado coral loss.

# Which samples are the most similar across domains?

In [13]:
Xe,Y,Domain = encode_features('train_end_vibnet_0_lamb100.pt',0.3)

In [38]:
from itertools import combinations
from scipy.spatial import distance_matrix


def calc_metrics(M, name, axis):
    M[M == 0] = M.mean()
    return {'min_dist_%s' % name: M.min(axis=axis),
            'max_dist_%s' % name: M.max(axis=axis),
            'avg_dist_%s' % name: M.mean(axis=axis)}

sampling = 20000

uniq_domains = np.unique(Domain)
data = {d: {} for d in uniq_domains}
# for d1, d2 in combinations(uniq_domains, 2):
for i in range(len(uniq_domains)):
    d1 = uniq_domains[i]
    for j in range(i, len(uniq_domains)):
        d2 = uniq_domains[j]
        print(d1, d2)
        d1_mask = Domain == d1
        d2_mask = Domain == d2
        Y1, Y2 = Y[d1_mask], Y[d2_mask]
        X1, X2 = Xe[d1_mask], Xe[d2_mask]

        idxs1 = np.random.permutation(len(X1))[:sampling]
        idxs2 = np.random.permutation(len(X2))[:sampling]
        X1, Y1 = X1[idxs1], Y1[idxs1]
        X2, Y2 = X2[idxs2], Y2[idxs2]

        M = distance_matrix(X1, X2)
        metrics = calc_metrics(M, d2, 1)
        data[d1].update(metrics)
        data[d1]['label'] = Y1
        data[d1]['domain'] = np.full(len(Y1), d1)

        metrics = calc_metrics(M, d1, 0)
        data[d2].update(metrics)
        data[d2]['label'] = Y2
        data[d2]['domain'] = np.full(len(Y2), d2)

df = pd.DataFrame()
for _, values in data.items():
    dftmp = pd.DataFrame(values)
    df = pd.concat((df, dftmp), ignore_index=True)
df

UOC UOC
UOC XJTU
UOC cwru
UOC mfpt
UOC pu
UOC rpdbcs
XJTU XJTU
XJTU cwru
XJTU mfpt
XJTU pu
XJTU rpdbcs
cwru cwru
cwru mfpt
cwru pu
cwru rpdbcs
mfpt mfpt
mfpt pu
mfpt rpdbcs
pu pu
pu rpdbcs
rpdbcs rpdbcs


Unnamed: 0,min_dist_UOC,max_dist_UOC,avg_dist_UOC,label,domain,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs
0,0.003528,0.289551,0.146373,2,UOC,0.075918,0.837216,0.139887,0.121488,0.360720,0.150168,0.080772,0.185880,0.126274,0.089478,0.344369,0.126869,0.125800,1.295266,0.159801
1,0.005497,0.287117,0.143550,0,UOC,0.119024,0.927113,0.155491,0.125673,0.362582,0.156713,0.038870,0.140618,0.091068,0.062826,0.300056,0.132463,0.046080,1.209990,0.087249
2,0.011518,0.260294,0.153172,5,UOC,0.040459,0.906306,0.113755,0.131097,0.368502,0.161366,0.061798,0.175380,0.113846,0.116123,0.303472,0.168842,0.136537,1.275715,0.163632
3,0.016091,0.261986,0.136207,8,UOC,0.105176,0.906678,0.135245,0.159103,0.382519,0.214559,0.040353,0.149193,0.095474,0.037418,0.336977,0.123082,0.143297,1.202761,0.212216
4,0.018812,0.244187,0.179998,4,UOC,0.078351,0.868454,0.121943,0.128635,0.359569,0.184167,0.080716,0.160814,0.115447,0.091382,0.346923,0.129668,0.088745,1.256608,0.119448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45953,0.072995,0.232674,0.131209,0,rpdbcs,0.054688,0.885622,0.103495,0.065769,0.300768,0.090425,0.045995,0.161250,0.094277,0.014514,0.299461,0.096398,0.001428,1.276010,0.021319
45954,0.072619,0.236583,0.133379,0,rpdbcs,0.053955,0.884775,0.103302,0.062122,0.297867,0.087148,0.033020,0.149834,0.090383,0.029623,0.292934,0.109585,0.000828,1.275130,0.020934
45955,0.068565,0.246096,0.139991,2,rpdbcs,0.051062,0.886243,0.099943,0.060467,0.298933,0.086020,0.039263,0.160592,0.093510,0.017215,0.308723,0.102644,0.010741,1.285628,0.037415
45956,0.072640,0.242935,0.138129,0,rpdbcs,0.051555,0.890627,0.101971,0.055761,0.288998,0.079531,0.031159,0.156148,0.092401,0.017404,0.299802,0.095222,0.003701,1.260262,0.027713


In [40]:
df.groupby(['domain']).mean().drop('label',axis=1)

Unnamed: 0_level_0,min_dist_UOC,max_dist_UOC,avg_dist_UOC,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs
domain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
UOC,0.010125,0.254843,0.15037,0.082166,0.916195,0.150449,0.088708,0.301081,0.1304,0.099116,0.197686,0.144818,0.093742,0.306361,0.14986,0.10142,1.252969,0.137045
XJTU,0.086527,0.275846,0.150449,0.004549,0.874306,0.081434,0.042884,0.267332,0.090316,0.034517,0.179118,0.10527,0.052207,0.30888,0.111612,0.067872,1.290237,0.107586
cwru,0.063811,0.244184,0.1304,0.037974,0.905236,0.090316,0.003314,0.256786,0.061282,0.04376,0.141368,0.092297,0.039191,0.277049,0.094886,0.05368,1.252193,0.089871
mfpt,0.069278,0.260706,0.144818,0.036601,0.883081,0.10527,0.047117,0.272057,0.092297,0.01125,0.174137,0.099701,0.026421,0.288605,0.11042,0.046736,1.258568,0.097027
pu,0.075226,0.256642,0.14986,0.059905,0.894232,0.111612,0.061805,0.269892,0.094886,0.045494,0.179804,0.11042,0.004294,0.30765,0.103647,0.064037,1.26961,0.106137
rpdbcs,0.073254,0.241387,0.137045,0.054463,0.895483,0.107586,0.063578,0.299809,0.089871,0.039646,0.161723,0.097027,0.020686,0.306027,0.106137,0.002906,1.26586,0.030795


- UOC é o super diferentão. Como mostrado abaixo, há algumas classes que o fazem ser bem diferente.
- RPDBCS é bem diferente dos demais tbm. Pela mesma razão.

In [17]:
df.groupby(['domain','label']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs,min_dist_UOC,max_dist_UOC,avg_dist_UOC
domain,label,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
UOC,0,0.032269,0.940289,0.112536,0.043421,0.274911,0.077042,0.040728,0.149024,0.095752,0.046445,0.251605,0.109074,0.041594,1.215182,0.084569,,,
UOC,1,0.043171,0.931591,0.12054,0.050638,0.274701,0.091686,0.051053,0.158537,0.102545,0.063412,0.258976,0.120528,0.040874,1.223169,0.090188,,,
UOC,2,0.072637,0.904674,0.1278,0.077725,0.300484,0.116128,0.061068,0.217916,0.13662,0.042629,0.33951,0.127311,0.101876,1.304542,0.141049,,,
UOC,3,0.079972,0.860829,0.144645,0.088696,0.244184,0.137049,0.120426,0.221557,0.152488,0.108942,0.367336,0.146815,0.12481,1.319305,0.143136,,,
UOC,4,0.076834,0.890164,0.127077,0.069886,0.248994,0.118122,0.088508,0.159945,0.127098,0.095786,0.267062,0.14696,0.089239,1.256212,0.121054,,,
UOC,5,0.112148,0.926449,0.147739,0.110458,0.349743,0.141847,0.102445,0.206445,0.150442,0.102855,0.302648,0.160984,0.126287,1.273444,0.15332,,,
UOC,6,0.088544,1.014863,0.215287,0.131898,0.359953,0.184458,0.1713,0.255431,0.209632,0.154823,0.32786,0.200496,0.17874,1.212233,0.201002,,,
UOC,7,0.094024,0.901407,0.133495,0.094217,0.282151,0.115204,0.082633,0.160055,0.120467,0.064486,0.288384,0.127718,0.079186,1.252124,0.106169,,,
UOC,8,0.138237,1.029008,0.243685,0.147668,0.379889,0.211826,0.203808,0.26232,0.227301,0.190429,0.368699,0.225793,0.147781,1.216642,0.207914,,,
XJTU,0,,,,0.039796,0.260021,0.085557,0.029143,0.179015,0.100798,0.049333,0.318677,0.105398,0.068504,1.311449,0.103987,0.089293,0.280108,0.149007
