# experiment_finetunning2

In [1]:
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
import seaborn as sns
import skorch
from skorch_extra.netbase import NeuralNetBase, NeuralNetClassifier, NeuralNetTransformer
import sys
import numpy as np
sys.path.append('..')
from benchmarks.RPDBCS.models.RPDBCS2020Net import RPDBCS2020Net

plt.rcParams['figure.figsize']=(24,12)
plt.rcParams['figure.dpi']=128
plt.rcParams['font.size']=18

In [2]:
from vibdata.datahandler.transforms.TransformDataset import PickledDataset
from benchmarks.RPDBCS.datasets import TransformsDataset
from benchmarks.RPDBCS.experiment_finetunning2 import DEFAULT_NETPARAMS, DEFAULT_OPTIM_PARAMS, _transform_output, NeuralNetDomainAdapter, MetricNetPerDomain
from sklearn.decomposition import PCA
from ipywidgets import interact
import os
from benchmarks.RPDBCS.coral import CoralLoss
from sklearn.manifold import TSNE, Isomap

# @interact(fname=['train_end_vibnet_mfpt-cwru-pu','train_end_vibnet_cwru-pu-rpdbcs','train_end_vibnet_mfpt-cwru-rpdbcs','train_end_vibnet_mfpt-pu-rpdbcs'])

DATA_NAMES = ['rpdbcs', 'mfpt', 'pu', 'cwru', 'UOC', 'XJTU']

def encode_features(fname, sampling):
    module_params = {
        'n_domains': 5,
        'encode_size': 32, 'input_size': 6100,
        'head_encode_size': 8,
        'all_outputs': False,
        'backbone': RPDBCS2020Net 
    }
    module_params = {"module__"+key: v for key, v in module_params.items()}
    module_params['module'] = MetricNetPerDomain
    # module_params['module'] = MetricNet

    net_params = DEFAULT_NETPARAMS.copy()
    ### Criterion parameters ###
    net_params.update({
        # 'device':'cuda',
        'max_epochs': 100,
        'criterion': CoralLoss,
        'criterion__clf_loss': None,
        'criterion__lamb': 1.0,
        'batch_size': 128
    })
    ############################

    vibnet = NeuralNetDomainAdapter(**net_params, **module_params, **DEFAULT_OPTIM_PARAMS)
    vibnet.initialize()
    vibnet.load_params(f_params='../saved_models/bw/%s' % fname)

    

    Xf = []
    Yf = []
    Sf = []
    for dname in DATA_NAMES:
        D = PickledDataset('/tmp/sigdata_cache/%s' % dname)
        n = len(D)
        n2 = int(n*sampling)
        idxs = np.random.permutation(n)[:n2]
        X = np.empty((n2, 6100), dtype=np.float32)
        Y = np.empty(n2, dtype=int)
        for i, j in enumerate(idxs):
            X[i] = D[j]['signal']
            Y[i] = D[j]['label']

        Xe = vibnet.transform({'X': X, 'domain': [0]*n2})
        Sf.append([dname]*n2)
        Xf.append(Xe)
        Yf.append(Y)

    Xe = np.vstack(Xf)
    Y = np.hstack(Yf)
    Domain = np.hstack(Sf)
    return Xe, Y, Domain


def dimension_reduction(fname, sampling):
    Xe, Y, Domain = encode_features(fname, sampling)

    # dim_rec_alg = Isomap(n_components=2, n_jobs=6)
    # dim_rec_alg = PCA(2)
    dim_rec_alg = TSNE(2, init='pca', learning_rate='auto', n_jobs=6, n_iter=1000)
    Xe_pca = dim_rec_alg.fit_transform(Xe)
    df = pd.DataFrame(Xe_pca, columns=['comp1', 'comp2'])
    df['label'] = Y  # .astype(str)
    df['domain'] = Domain
    return df, Xe, Y


@interact(fname=[f for f in os.listdir('../saved_models/bw') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    # sns.scatterplot(data=df,x='pca1',y='pca2',hue='domain',alpha=0.8, palette=palette, style='domain');
    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_2.pt', 'train_end_vibnet_0.pt',…

In [49]:
@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0_lamb1.pt', 'train_end_vibnet_…

In [54]:
def dimension_reduction(fname, sampling):
    Xe, Y, Domain = encode_features(fname, sampling)

    # dim_rec_alg = Isomap(n_components=2, n_jobs=6)
    dim_rec_alg = PCA(2)
    # dim_rec_alg = TSNE(2, init='pca', learning_rate='auto', n_jobs=6, n_iter=1000)
    # Xe_pca = dim_rec_alg.fit_transform(Xe)
    Xe_pca = Xe[:,[2,3]]
    df = pd.DataFrame(Xe_pca, columns=['comp1', 'comp2'])
    df['label'] = Y  # .astype(str)
    df['domain'] = Domain
    return df, Xe, Y

In [55]:
@interact(fname=[f for f in os.listdir('../saved_models/coral_analysis') if f[-3:] == '.pt'],
          sampling=(0.1, 1.0, 0.1))
def _f(fname, sampling=0.5):
    alpha = 0.65

    df, _, _ = dimension_reduction(fname, sampling)
    # df['domain-label'] = Domain + df['label']
    orig_palette = sns.color_palette()
    palette = {dl: orig_palette[i % len(DATA_NAMES)] for i, dl in enumerate(df['domain'].unique())}
    # palette['rpdbcs0'] = 'purple'
    # mask = df['domain'] == 'rpdbcs'

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(24, 16))

    mask1 = df['label'] == 0
    mask2 = df['label'] != 0

    sns.scatterplot(data=df[mask1], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax1)
    ax1.set_title('Only Normal')
    sns.scatterplot(data=df[mask2], x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax2)
    ax2.set_title('Only Defects')
    sns.scatterplot(data=df, x='comp1', y='comp2', hue='domain',
                    alpha=alpha, palette=palette, style='domain', ax=ax3);
    ax3.set_title('All')

interactive(children=(Dropdown(description='fname', options=('train_end_vibnet_0_lamb1.pt', 'train_end_vibnet_…

Pode ser que os head_classifiers estão fazendo o feature_space do backnone ser fácil de separar, pois isso pode facilitar para classificar. 
Talvez seja melhor criar um backbone com dois feature_space onde somente um é aplicado coral loss.

# Which samples are the most similar across domains?

In [7]:
Xe,Y,Domain = encode_features('train_end_vibnet_2.pt',0.3)

In [8]:
from itertools import combinations
from scipy.spatial import distance_matrix


def calc_metrics(M, name, axis):
    M[M == 0] = M.mean()
    return {'min_dist_%s' % name: M.min(axis=axis),
            'max_dist_%s' % name: M.max(axis=axis),
            'avg_dist_%s' % name: M.mean(axis=axis)}

sampling = 20000

uniq_domains = np.unique(Domain)
data = {d: {} for d in uniq_domains}
# for d1, d2 in combinations(uniq_domains, 2):
for i in range(len(uniq_domains)):
    d1 = uniq_domains[i]
    for j in range(i, len(uniq_domains)):
        d2 = uniq_domains[j]
        print(d1, d2)
        d1_mask = Domain == d1
        d2_mask = Domain == d2
        Y1, Y2 = Y[d1_mask], Y[d2_mask]
        X1, X2 = Xe[d1_mask], Xe[d2_mask]

        idxs1 = np.random.permutation(len(X1))[:sampling]
        idxs2 = np.random.permutation(len(X2))[:sampling]
        X1, Y1 = X1[idxs1], Y1[idxs1]
        X2, Y2 = X2[idxs2], Y2[idxs2]

        M = distance_matrix(X1, X2)
        metrics = calc_metrics(M, d2, 1)
        data[d1].update(metrics)
        data[d1]['label'] = Y1
        data[d1]['domain'] = np.full(len(Y1), d1)

        metrics = calc_metrics(M, d1, 0)
        data[d2].update(metrics)
        data[d2]['label'] = Y2
        data[d2]['domain'] = np.full(len(Y2), d2)

df = pd.DataFrame()
for _, values in data.items():
    dftmp = pd.DataFrame(values)
    df = pd.concat((df, dftmp), ignore_index=True)
df

UOC UOC
UOC XJTU
UOC cwru
UOC mfpt
UOC pu
UOC rpdbcs
XJTU XJTU
XJTU cwru
XJTU mfpt
XJTU pu
XJTU rpdbcs
cwru cwru
cwru mfpt
cwru pu
cwru rpdbcs
mfpt mfpt
mfpt pu
mfpt rpdbcs
pu pu
pu rpdbcs
rpdbcs rpdbcs


Unnamed: 0,min_dist_UOC,max_dist_UOC,avg_dist_UOC,label,domain,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs
0,0.000596,0.099631,0.049598,6,UOC,0.019392,0.257049,0.055740,0.003559,0.162458,0.046792,0.001581,0.100806,0.051749,0.014249,0.208972,0.067530,0.039750,1.415024,0.053908
1,0.001448,0.093616,0.043948,3,UOC,0.000773,0.292668,0.022389,0.013527,0.150075,0.052711,0.034367,0.089864,0.061223,0.000964,0.207925,0.066865,0.057506,1.436047,0.080613
2,0.003022,0.093417,0.043168,1,UOC,0.032771,0.262940,0.078144,0.016715,0.158730,0.078965,0.038481,0.096976,0.061702,0.036726,0.188501,0.072771,0.020795,1.431599,0.036301
3,0.002181,0.103512,0.077905,1,UOC,0.057740,0.274918,0.090107,0.001576,0.158882,0.044208,0.035091,0.099845,0.054107,0.035079,0.187121,0.070094,0.024984,1.430107,0.039474
4,0.001100,0.106843,0.056178,4,UOC,0.060159,0.275797,0.092197,0.001551,0.168914,0.054376,0.069384,0.109711,0.087623,0.026598,0.253730,0.090472,0.036186,1.436256,0.052309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44231,0.018816,0.097395,0.046373,0,rpdbcs,0.018989,0.263459,0.047214,0.012997,0.162892,0.050419,0.023622,0.087084,0.055165,0.006001,0.190233,0.059003,0.000630,1.456756,0.011189
44232,0.013115,0.093336,0.046005,0,rpdbcs,0.014399,0.266636,0.043701,0.013532,0.165177,0.049483,0.020916,0.089978,0.056315,0.008388,0.189429,0.059085,0.000489,1.456463,0.010977
44233,0.020271,0.101488,0.049098,0,rpdbcs,0.011920,0.269354,0.041132,0.019780,0.170197,0.052170,0.023360,0.089597,0.056342,0.005379,0.189968,0.059542,0.001879,1.454038,0.011399
44234,0.009539,0.098459,0.047209,1,rpdbcs,0.008531,0.275521,0.035978,0.021234,0.170159,0.051523,0.023660,0.088658,0.055855,0.006541,0.189389,0.058876,0.000716,1.456725,0.013870


In [10]:
df.groupby(['domain']).mean().drop('label',axis=1)

Unnamed: 0_level_0,min_dist_UOC,max_dist_UOC,avg_dist_UOC,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs
domain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
UOC,0.002471,0.089543,0.05066,0.025998,0.275076,0.057509,0.007859,0.153418,0.053487,0.035436,0.095863,0.06168,0.021441,0.206634,0.069385,0.03086,1.43606,0.049888
XJTU,0.014241,0.102469,0.057509,0.001219,0.288239,0.036366,0.00817,0.167745,0.060957,0.005781,0.110043,0.058569,0.012724,0.209391,0.072211,0.018173,1.453865,0.044476
cwru,0.015848,0.088224,0.053487,0.026855,0.242764,0.060957,0.00222,0.15126,0.055017,0.036734,0.100457,0.063344,0.024441,0.208984,0.070909,0.032245,1.435475,0.053222
mfpt,0.032724,0.094113,0.06168,0.003633,0.252854,0.058569,0.018757,0.156284,0.063344,0.003323,0.121643,0.06064,0.030313,0.211667,0.076499,0.032573,1.438674,0.058129
pu,0.032942,0.108537,0.069385,0.041779,0.23331,0.072211,0.020727,0.168236,0.070909,0.04598,0.114373,0.076499,0.002312,0.223711,0.074578,0.040778,1.449715,0.063168
rpdbcs,0.021399,0.100493,0.049888,0.014757,0.271536,0.044476,0.018598,0.168951,0.053222,0.026073,0.090918,0.058129,0.008117,0.194868,0.063168,0.001229,1.451865,0.016948


- UOC é o super diferentão. Como mostrado abaixo, há algumas classes que o fazem ser bem diferente.
- RPDBCS é bem diferente dos demais tbm. Pela mesma razão.

In [11]:
df.groupby(['domain','label']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,min_dist_UOC,max_dist_UOC,avg_dist_UOC,min_dist_XJTU,max_dist_XJTU,avg_dist_XJTU,min_dist_cwru,max_dist_cwru,avg_dist_cwru,min_dist_mfpt,max_dist_mfpt,avg_dist_mfpt,min_dist_pu,max_dist_pu,avg_dist_pu,min_dist_rpdbcs,max_dist_rpdbcs,avg_dist_rpdbcs
domain,label,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
UOC,0,0.002341,0.091953,0.052239,0.027575,0.274551,0.060812,0.005563,0.155824,0.051812,0.038048,0.095643,0.063573,0.019545,0.205889,0.067939,0.008452,1.456918,0.035458
UOC,1,0.002468,0.089167,0.050916,0.028276,0.275142,0.060334,0.007898,0.152662,0.053325,0.034238,0.096972,0.060754,0.02306,0.204623,0.069203,0.025506,1.429981,0.03966
UOC,2,0.002807,0.090967,0.049202,0.026141,0.274337,0.058219,0.007771,0.153772,0.054333,0.036,0.094989,0.062172,0.023671,0.208568,0.070472,0.018456,1.426656,0.050254
UOC,3,0.002347,0.087943,0.04856,0.02078,0.272413,0.053123,0.007935,0.154271,0.052734,0.030538,0.094136,0.058866,0.022052,0.205264,0.069521,0.064814,1.438149,0.088021
UOC,4,0.002684,0.08987,0.051439,0.02757,0.274129,0.059113,0.006992,0.155992,0.052536,0.040046,0.097615,0.064651,0.021331,0.206348,0.069234,0.037945,1.436112,0.053897
UOC,5,0.002427,0.086633,0.051305,0.026893,0.276522,0.058103,0.008048,0.15187,0.055875,0.031522,0.09521,0.06035,0.023192,0.203609,0.069105,0.032375,1.443447,0.04705
UOC,6,0.002248,0.090142,0.050978,0.029164,0.276655,0.059994,0.009426,0.153031,0.055195,0.036798,0.095368,0.062343,0.020336,0.204619,0.068673,0.04146,1.411746,0.057255
UOC,7,0.002327,0.088653,0.050084,0.026607,0.275462,0.057105,0.008968,0.151449,0.05407,0.034973,0.096624,0.059933,0.019462,0.213012,0.070563,0.014053,1.441371,0.026694
UOC,8,0.002563,0.089507,0.05082,0.020099,0.276959,0.049282,0.008883,0.150984,0.052487,0.03523,0.095657,0.061665,0.020555,0.207694,0.070022,0.042951,1.4367,0.057331
XJTU,0,0.014155,0.102407,0.057436,0.001214,0.288257,0.036469,0.008096,0.16777,0.060846,0.005787,0.110084,0.058587,0.012783,0.209417,0.072233,0.010338,1.457422,0.036999
