This notebook contains:

- SVCCA Net Similarity calculation for branch 1 as target and correlation with transferability
- SVCCA Net Similarity calculation for all branches and correlation with transferability
- Row-wise layer similarity comparison with transferability for all branches
- Column-wise/head-wise layer similarity comparison with transferability for all branches
- Single layer visualization of SVCCA Similarity for target branch 1 compared with branch 1 to branch 6

In [None]:
import os, sys
from matplotlib import pyplot as plt
%matplotlib inline
import time
import numpy as np
import pickle
import pandas
import gzip
import seaborn as sns
from scipy import stats

sys.path.append("..")
import cca_core

In [None]:
def _plot_helper(arr, xlabel, ylabel):
    plt.plot(arr, lw=2.0)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid()
    
def calculate_SVCCA(activations1, activations2, layer_number):
    # SVCCA different x
    # print("Results using SVCCA keeping 60 dims")
    # load activations
    acts1 = np.genfromtxt(activations1 + str(layer_number) + '.csv', delimiter=',')
    acts2 = np.genfromtxt(activations2 + str(layer_number) + '.csv', delimiter=',')

    # Mean subtract activations
    cacts1 = acts1  # - np.mean(acts1, axis=0, keepdims=True)
    cacts2 = acts2  # - np.mean(acts2, axis=0, keepdims=True)

    # Perform SVD
    U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False)
    U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False)

    svacts1 = np.dot(s1[:60] * np.eye(60), V1[:60])  # default: np.dot(s1[:20]*np.eye(20), V1[:20]), 49
    # can also compute as svacts1 = np.dot(U1.T[:20], cacts1)
    svacts2 = np.dot(s2[:60] * np.eye(60), V2[:60])  # default: np.dot(s2[:20]*np.eye(20), V2[:20]), 49
    # can also compute as svacts1 = np.dot(U2.T[:20], cacts2)

    svcca_results = cca_core.get_cca_similarity(svacts1, svacts2, epsilon=1e-10, verbose=False)  # 1e-10
    # print("Layer Number:", layer_number)
    # print("SVCCA Correlation Coefficient:", np.mean(svcca_results["cca_coef1"]))
    return np.mean(svcca_results["cca_coef1"])  # , acts1, cacts1, U1, s1, V1, svacts1


def calaculate_mean_model_correlation(activation1, activation2, conv_layers):
    # list for storing layer correlations
    layer_corr = []
    # calculate and save SVCCA correlation between all layers of two base models
    for conv in conv_layers:
        corr = calculate_SVCCA(activation1, activation2, conv)  # , acts1, cacts1, U1, s1, V1, svacts1
        layer_corr.append(corr)
    # calculate mean model correlation of stored layer correlation
    mean_model_corr = np.mean(layer_corr)
    print(activation1[-6:] + activation2[-6:-1] + " Mean Model Correlation:", mean_model_corr)
    return mean_model_corr

# SVCCA with Activation Vectors based on Target data, Branch 1 as Target

In [None]:
# list of all convolutional layers for which we can calculate SVCCA
conv_layers = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
               23]

print('Correlation between Models comparing all available Convolutional Layers:')
act1_target = 'activations/m1_x1/'
act1_list = ['activations/m1_x1/',
             'activations/m2_x1/',
             'activations/m3_x1/',
             'activations/m4_x1/',
             'activations/m5_x1/',
             'activations/m6_x1/']
svcca_corr = []
for act1 in act1_list:
    mean_model_corr = calaculate_mean_model_correlation(act1_target, act1, conv_layers)
    svcca_corr.append(mean_model_corr)

In [None]:
# Branch 1 as Target and all values for transferability
transferability_branch1 = [0.0000,-0.0888,-0.0644,-0.0584,-0.0275,-0.1820]

In [None]:
# Rank (Spearman) and Pearson Correlation 
# without transferability = 0 and net similarity = 1 for branch 1
print('Correlations without transferability=0 and net_similartiy=1:')
spearman_corr, spearman_pvalue = stats.spearmanr(transferability_branch1[1:], svcca_corr[1:])
print("Spearman Correlation:", spearman_corr)
print("Spearman p_value:", spearman_pvalue)
pearson_corr, pearson_pvalue = stats.pearsonr(transferability_branch1[1:], svcca_corr[1:])
print("Pearson Correlation:", pearson_corr)
print("Pearson p_value:", pearson_pvalue)

If we don't take the values for Net Similarity S_N(M1,X1)=1 and Transferability T(M1,X1)=0 into calculation, we get a small negative correlation of the other five value pairs (S_N(M2,X1), T(M2,X1)) ... (S_N(M6,X1), T(M6,X1))

but p-value > 0.05. So we can not disprove H0.

# SVCCA with Activation vectors based on Target data, for all branches

In [None]:
def run_svcca_for_all_branches(conv_layers):
    svcca_corr_all = []
    
    act1_target = 'activations/m1_x1/'
    act1_list = ['activations/m1_x1/',
                 'activations/m2_x1/',
                 'activations/m3_x1/',
                 'activations/m4_x1/',
                 'activations/m5_x1/',
                 'activations/m6_x1/']
    for act1 in act1_list:
        corr_branch1 = calaculate_mean_model_correlation(act1_target, act1, conv_layers)
        svcca_corr_all.append(corr_branch1)

    act2_target = 'activations/m2_x2/'
    act2_list = ['activations/m1_x2/',
                 'activations/m2_x2/',
                 'activations/m3_x2/',
                 'activations/m4_x2/',
                 'activations/m5_x2/',
                 'activations/m6_x2/']
    for act2 in act2_list:
        corr_branch2 = calaculate_mean_model_correlation(act2_target, act2, conv_layers)
        svcca_corr_all.append(corr_branch2)

    act3_target = 'activations/m3_x3/'
    act3_list = ['activations/m1_x3/',
                 'activations/m2_x3/',
                 'activations/m3_x3/',
                 'activations/m4_x3/',
                 'activations/m5_x3/',
                 'activations/m6_x3/']
    for act3 in act3_list:
        corr_branch3 = calaculate_mean_model_correlation(act3_target, act3, conv_layers)
        svcca_corr_all.append(corr_branch3)

    act4_target = 'activations/m4_x4/'
    act4_list = ['activations/m1_x4/',
                 'activations/m2_x4/',
                 'activations/m3_x4/',
                 'activations/m4_x4/',
                 'activations/m5_x4/',
                 'activations/m6_x4/']
    for act4 in act4_list:
        corr_branch4 = calaculate_mean_model_correlation(act4_target, act4, conv_layers)
        svcca_corr_all.append(corr_branch4)

    act5_target = 'activations/m5_x5/'
    act5_list = ['activations/m1_x5/',
                 'activations/m2_x5/',
                 'activations/m3_x5/',
                 'activations/m4_x5/',
                 'activations/m5_x5/',
                 'activations/m6_x5/']
    for act5 in act5_list:
        corr_branch5 = calaculate_mean_model_correlation(act5_target, act5, conv_layers)
        svcca_corr_all.append(corr_branch5)

    act6_target = 'activations/m6_x6/'
    act6_list = ['activations/m1_x6/',
                 'activations/m2_x6/',
                 'activations/m3_x6/',
                 'activations/m4_x6/',
                 'activations/m5_x6/',
                 'activations/m6_x6/']
    for act6 in act6_list:
        corr_branch6 = calaculate_mean_model_correlation(act6_target, act6, conv_layers)
        svcca_corr_all.append(corr_branch6)
    return svcca_corr_all
svcca_corr_all = run_svcca_for_all_branches(conv_layers)

In [None]:
# table for visualization
all_corr = np.array(svcca_corr_all)
all_corr = np.array_split(all_corr, 6)
df = pandas.DataFrame.from_records(all_corr)
df = df.T
cm = sns.light_palette("green", as_cmap=True)
df = df.style.background_gradient(cmap=cm)
df
#sns.heatmap(df, cmap='RdYlGn_r', annot=True)

In [None]:
# get transferability values
# transferability values / transfer performance increase in %
transferability = [
0,-0.027949349,-0.004832516,0.000775434,0.029925489,-0.115910142,
0.084846395,0,0.028964833,0.050420863,0.051391465,0.051558979,
-0.011856568,0.003867993,0,-0.005315545,-0.05501885,-0.083033235,
0.156341325,0.05825559,0.117967825,0,0.086251248,0.069640608,
0.097457896,0.096710167,0.096331993,0.065604118,0,0.341597037,
-0.00821286,0.233046817,-0.185002721,-0.015122716,-0.012676524,0
]

In [None]:
print('Visualization of transferability results:')
transfer = np.array_split(transferability, 6)
df_transfer = pandas.DataFrame.from_records(transfer)
df_transfer = df_transfer.T
#cm_trans = sns.light_palette("blue", as_cmap=True)
#df_transfer = df_transfer.style.background_gradient(cmap=cm_trans)
#df_transfer
sns.heatmap(df_transfer, cmap='Greens', annot=True)

In [None]:
# Rank Correlation of SVCCA mean model correlation (net similarity) and Transferability (Transfer Performance increase)
# All Branches
def rank_pearson_corr(transferability, svcca_corr_all):
    spearman_corr, spearman_pvalue = stats.spearmanr(transferability, svcca_corr_all)
    print("Spearman Correlation:", spearman_corr)
    print("Spearman p_value:", spearman_pvalue)
    pearson_corr, pearson_pvalue = stats.pearsonr(transferability, svcca_corr_all)
    print("Pearson Correlation:", pearson_corr)
    print("Pearson p_value:", pearson_pvalue)

In [None]:
# Rank (Spearman) and Pearson Correlation 
# without transferability=0 and net_similarity=1 for all branches
print('Rank and Pearson Correlation without transferability=0 and net_similarity=1 for all branches')
relevant_indices = [1,2,3,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,22,23,24,25,26,27,29,30,31,32,33,34]
transferability_no_zeros = np.array(transferability)
net_sim_no_ones = np.array(svcca_corr_all)
rank_pearson_corr(transferability_no_zeros[relevant_indices], net_sim_no_ones[relevant_indices])

H0: Correlation = 0

Stress H0: positive spearman correlation = 0.20845

but p-value > 0.05. So we can not disprove H0.



# row-wise comparison of correlation, all branches

Try calculating the Correlation by just comparing convolutional layer 4, 5, 6, 7 (first conv row of model)

In [None]:
print('row_1:')
conv_row1 = [4, 5, 6, 7]
svcca_row1 = run_svcca_for_all_branches(conv_row1)
relevant_indices = [1,2,3,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,22,23,24,25,26,27,29,30,31,32,33,34]
transferability = np.array(transferability)
svcca_row1 = np.array(svcca_row1)
rank_pearson_corr(transferability[relevant_indices], svcca_row1[relevant_indices])

Postitive correlation between transferability and net similarity

but p-value > 0.05. So we can not disprove H0.

Correlations for other rows:

In [None]:
print('row_2:')
conv_row2 = [8, 9, 10, 11]
svcca_row2 = np.array(run_svcca_for_all_branches(conv_row2))
rank_pearson_corr(transferability[relevant_indices], svcca_row2[relevant_indices])
print('row_3:')
conv_row3 = [12, 13, 14, 15]
svcca_row3 = np.array(run_svcca_for_all_branches(conv_row3))
rank_pearson_corr(transferability[relevant_indices], svcca_row3[relevant_indices])
print('row_4:')
conv_row4 = [16, 17, 18, 19]
svcca_row4 = np.array(run_svcca_for_all_branches(conv_row4))
rank_pearson_corr(transferability[relevant_indices], svcca_row4[relevant_indices])
print('row_5_pooling:')
conv_row5 = [20, 21, 22, 23]
svcca_row5 = np.array(run_svcca_for_all_branches(conv_row5))
rank_pearson_corr(transferability[relevant_indices], svcca_row5[relevant_indices])

Row 4:

Spearman Correlation: 0.23381535038932147
Spearman p_value: 0.21365894749258096

p-value > 0.05

# Column-wise comparison of correlation (head-wise)

Try calculating the Correlation by just comparing convolutional layer 4, 8, 12, 16, 20 of the first head (revenue)

In [None]:
head1_layers = [4, 8, 12, 16, 20] # revenue
svcca_head1 = np.array(run_svcca_for_all_branches(head1_layers))
rank_pearson_corr(transferability[relevant_indices], svcca_head1[relevant_indices])

Try calculating the Correlation by just comparing convolutional layer 5, 9, 13, 17, 21 of the second head (month)

In [None]:
head2_layers = [5, 9, 13, 17, 21] # month
svcca_head2 = np.array(run_svcca_for_all_branches(head2_layers))
rank_pearson_corr(transferability[relevant_indices], svcca_head2[relevant_indices])

Try calculating the Correlation by just comparing convolutional layer 6, 10, 14, 18, 22 of the third head (weekday)

In [None]:
head3_layers = [6, 10, 14, 18, 22] # weekday
svcca_head3 = np.array(run_svcca_for_all_branches(head3_layers))
rank_pearson_corr(transferability[relevant_indices], svcca_head3[relevant_indices])

Try calculating the Correlation by just comparing convolutional layer 7, 11, 15, 19, 23 of the third head (year)

In [None]:
head4_layers = [7, 11, 15, 19, 23] # year
svcca_head4 = np.array(run_svcca_for_all_branches(head4_layers))
rank_pearson_corr(transferability[relevant_indices], svcca_head4[relevant_indices])

There is a positive correlation between transferability and net similarity if we look at the distinct heads of the multihead CNN models (revenue, month, weekday and year).

but p-value > 0.05. So we can not disprove H0.

# Single layer visualization (layer 4 to 23), Branch 1 as Target

In [None]:
conv_layers = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

model_corr = []
for act1 in act1_list:
    layer_corr = []
    for conv in conv_layers:
        corr = calculate_SVCCA(act1_target, act1, conv)
        layer_corr.append(corr)
    model_corr.append(layer_corr)

In [None]:
df = pandas.DataFrame.from_records(model_corr, columns=conv_layers)

The following table/df shows the distinct layer SVCCA correlations (from layer 4 to layer 23) 
for target branch 1 compared to branch 1 to 6 (index 0-5)

In [None]:
layerwise = df.T

cm = sns.light_palette("green", as_cmap=True)
branch1 = layerwise.style.background_gradient(cmap=cm)
branch1

The model of branch1 is positive correlated to all other 5 branches.

It's more similar to 4, 5, 6 than to 2 and 3.

In the layers 7, 11, 15, 19, 23 model of branch1 is not correlated to model2 and model3. quite different in SVCCA correlation.


In [None]:
# mean model correlation of target branch 1 crosswise with branch1 to branch6 (index 0-5)
layerwise.mean()