# SVCCA for weekly forecast with multihead CNN model

In [None]:
import os, sys
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
from matplotlib import pyplot as plt
import cca_core
%matplotlib inline


In [None]:
# Rank Correlation of SVCCA mean model correlation (net similarity) and Transferability (Transfer Performance increase)
def rank_pearson_corr(transferability, svcca_corr_all):
    # spearman roh rank-correlation
    spearman_corr, spearman_pvalue = stats.spearmanr(transferability, svcca_corr_all)
    print("Spearman Correlation:", spearman_corr)
    print("Spearman p_value:", spearman_pvalue)
    print("")
    # kendall tau rank-correlation
    kendall_tau, kendall_p_value = stats.kendalltau(transferability, svcca_corr_all)
    print("Kendall Correlation:", kendall_tau)
    print("Kendall p_value:", kendall_p_value)
    print("")
    # pearson linear correlation for comparison
    pearson_corr, pearson_pvalue = stats.pearsonr(transferability, svcca_corr_all)
    print("Pearson Correlation:", pearson_corr)
    print("Pearson p_value:", pearson_pvalue)

In [None]:
# load python file to compare SVCCA results with results saved in csv / sanity check
import SVCCA_weekly

In [None]:
net_similarities = pd.read_csv('../temp/net_similarity_weekly.csv')

# net_similarities visualization
print("Net Similarity:")
cm = sns.light_palette("green", as_cmap=True)
net_sim_visual = net_similarities.style.background_gradient(cmap=cm)
net_sim_visual

# Comparison with transferability 

In [None]:
transferability = [
    [0,-0.0026408,0.0053692,-0.0118591,0.1293025,0.0212494],
    [0.0657007,0,-0.0159039,-0.0119574,0.1552900,0.0607466],
    [0.0504179,0.0727003,0,-0.0820202,0.0724330,0.0207745],
    [0.0448396,0.0131069,-0.0678191,0,0.0717826,0.0415678],
    [0.0517473,0.0129677,-0.0214962,-0.0590644,0,0.0530523],
    [0.0448200,0.0059016,-0.0314071,-0.0415958,0.2059353,0]
]
transferability = pd.DataFrame(transferability)
# transferability
#sns.heatmap(transferability, cmap='Greens', annot=True)
sns.set(font_scale=1.1)

f, ax = plt.subplots(figsize=(9, 8))
ax.set_title('Transferability', fontsize=26)
sns.heatmap(transferability, annot=True, fmt="f", linewidths=.5, ax=ax,robust=True, cmap='Greens') #RdBu_r
plt.show()

In [None]:
# flatten dataframes to array for correlation calculation

# Net Similarity data frame
net_sim_array = np.ravel(net_similarities.values)

# Transferability data frame
transfer_array = np.ravel(transferability.values)

In [None]:
# Rank (Spearman) and Pearson Correlation 
# without transferability=0 and net_similarity=1 for all branches / diagonal in matrix
print('Rank and Pearson Correlation without transferability=0 and net_similarity=1 for all branches')
print("")

relevant_indices = [1,2,3,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,22,23,24,25,26,27,29,30,31,32,33,34]
# len(transfer_array[relevant_indices])
#rank_pearson_corr(transfer_array, net_sim_array)
rank_pearson_corr(transfer_array[relevant_indices], net_sim_array[relevant_indices])

p-value to test for non-correlation:
 
The two-sided p-value for a hypothesis test whose null hypothesis is that two sets of data are uncorrelated, has same dimension as rho.


p-value ≤ α: The correlation is statistically significant (usually α = 0.05 or 0.01)

If the p-value is less than or equal to the significance level, then you can conclude that the correlation is different from 0.

p-value >> 0.05 or 0.01, H0 can't be disproved.

In [None]:
# sanity check for correlation example

a = [1,2,3,4,5,6]
b = [2,4,6,8,10,12]

rank_pearson_corr(a,b)

In [None]:
c = [4,1,5,-1,-10,14]
rank_pearson_corr(a,c)

# head-wise comparison of net similarity and transferability

In [None]:
# methods for calculating SVCCA for all branches

def SVCCA(activations1, activations2, layer_number):
    # SVCCA different x
    # print("Results using SVCCA keeping 30 dims")
    # load activations
    acts1 = np.genfromtxt(activations1 + str(layer_number) + '.csv', delimiter=',')
    acts2 = np.genfromtxt(activations2 + str(layer_number) + '.csv', delimiter=',')

    # Mean subtract activations
    cacts1 = acts1 - np.mean(acts1, axis=0, keepdims=True)
    cacts2 = acts2 - np.mean(acts2, axis=0, keepdims=True)

    # Perform SVD
    U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False)
    U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False)

    svacts1 = np.dot(s1[:30] * np.eye(30), V1[:30])  # default: np.dot(s1[:20]*np.eye(20), V1[:20]), 49
    # can also compute as svacts1 = np.dot(U1.T[:20], cacts1)
    svacts2 = np.dot(s2[:30] * np.eye(30), V2[:30])  # default: np.dot(s2[:20]*np.eye(20), V2[:20]), 49
    # can also compute as svacts1 = np.dot(U2.T[:20], cacts2)

    svcca_results = cca_core.get_cca_similarity(svacts1, svacts2, epsilon=1e-10, verbose=False)  # 1e-10
    # print("Layer Number:", layer_number)
    # print("SVCCA Correlation Coefficient:", np.mean(svcca_results["cca_coef1"]))
    return np.mean(svcca_results["cca_coef1"])  # , acts1, cacts1, U1, s1, V1, svacts1


def calaculate_mean_model_correlation(activation1, activation2, conv_layers):
    # list for storing layer correlations
    layer_corr = []
    # calculate and save SVCCA correlation between all layers of two base models
    for conv in conv_layers:
        corr = SVCCA(activation1, activation2, conv)  # , acts1, cacts1, U1, s1, V1, svacts1
        layer_corr.append(corr)
    # calculate mean model correlation of stored layer correlation
    mean_model_corr = np.mean(layer_corr)
    # print(activation1[-13:-8] + activation2[-14:-8] + " Mean Model Correlation:", mean_model_corr)
    return mean_model_corr


def calculate_SVCCA_for_all_branches(conv_layers):
    # list of target branch activations on target branch input data
    act_targets = ['activations/weekly/m1_x1_weekly/',
                   'activations/weekly/m2_x2_weekly/',
                   'activations/weekly/m3_x3_weekly/',
                   'activations/weekly/m4_x4_weekly/',
                   'activations/weekly/m5_x5_weekly/',
                   'activations/weekly/m6_x6_weekly/']

    # list to store mean model correlations
    model_correlations = []

    # list of activations on target branch input data
    act1_list = ['activations/weekly/m1_x1_weekly/',
                 'activations/weekly/m2_x1_weekly/',
                 'activations/weekly/m3_x1_weekly/',
                 'activations/weekly/m4_x1_weekly/',
                 'activations/weekly/m5_x1_weekly/',
                 'activations/weekly/m6_x1_weekly/']

    for act1 in act1_list:
        cor1 = calaculate_mean_model_correlation(act_targets[0], act1, conv_layers)
        model_correlations.append(cor1)

    act2_list = ['activations/weekly/m1_x2_weekly/',
                 'activations/weekly/m2_x2_weekly/',
                 'activations/weekly/m3_x2_weekly/',
                 'activations/weekly/m4_x2_weekly/',
                 'activations/weekly/m5_x2_weekly/',
                 'activations/weekly/m6_x2_weekly/']

    for act2 in act2_list:
        cor2 = calaculate_mean_model_correlation(act_targets[1], act2, conv_layers)
        model_correlations.append(cor2)

    act3_list = ['activations/weekly/m1_x3_weekly/',
                 'activations/weekly/m2_x3_weekly/',
                 'activations/weekly/m3_x3_weekly/',
                 'activations/weekly/m4_x3_weekly/',
                 'activations/weekly/m5_x3_weekly/',
                 'activations/weekly/m6_x3_weekly/']

    for act3 in act3_list:
        cor3 = calaculate_mean_model_correlation(act_targets[2], act3, conv_layers)
        model_correlations.append(cor3)

    act4_list = ['activations/weekly/m1_x4_weekly/',
                 'activations/weekly/m2_x4_weekly/',
                 'activations/weekly/m3_x4_weekly/',
                 'activations/weekly/m4_x4_weekly/',
                 'activations/weekly/m5_x4_weekly/',
                 'activations/weekly/m6_x4_weekly/']

    for act4 in act4_list:
        cor4 = calaculate_mean_model_correlation(act_targets[3], act4, conv_layers)
        model_correlations.append(cor4)

    act5_list = ['activations/weekly/m1_x5_weekly/',
                 'activations/weekly/m2_x5_weekly/',
                 'activations/weekly/m3_x5_weekly/',
                 'activations/weekly/m4_x5_weekly/',
                 'activations/weekly/m5_x5_weekly/',
                 'activations/weekly/m6_x5_weekly/']

    for act5 in act5_list:
        cor5 = calaculate_mean_model_correlation(act_targets[4], act5, conv_layers)
        model_correlations.append(cor5)

    act6_list = ['activations/weekly/m1_x6_weekly/',
                 'activations/weekly/m2_x6_weekly/',
                 'activations/weekly/m3_x6_weekly/',
                 'activations/weekly/m4_x6_weekly/',
                 'activations/weekly/m5_x6_weekly/',
                 'activations/weekly/m6_x6_weekly/']

    for act6 in act6_list:
        cor6 = calaculate_mean_model_correlation(act_targets[5], act6, conv_layers)
        model_correlations.append(cor6)

    # transform into array and split it into array of 6
    model_correlations_array = np.array(model_correlations)
    model_correlations_array = np.split(model_correlations_array, 6)
    # transform into data frame
    model_correlations_df = pd.DataFrame(model_correlations_array)
    # restructure data frame
    model_correlations_df = model_correlations_df.T
    return model_correlations_df


In [None]:
# to check if we select the right layers of head_1 we load a example CNN model and check the enumeration of layers
from keras.models import load_model
example_model = load_model('../models/pretrained/branch1_cnn_weekly.h5')
print(example_model.layers[4].name)
print(example_model.layers[8].name)

# head 1 (revenue)

In [None]:
# Calaculate net similarity just for the first head of CNN model (revenue head) which consists of layer 4 and 8
head_1_layers = [4, 8]
head_1_net_sim = calculate_SVCCA_for_all_branches(head_1_layers)
head_1_visual = head1_net_sim.style.background_gradient(cmap=cm)
head_1_visual

The convolutional layers of head_1 (revenue) are very similar! Mean model correlation > 0.9

In [None]:
# flatten data frame
head_1_net_sim_array = np.ravel(head_1_net_sim.values)

# calculate correlation between net similarity of head 1 and transferability
rank_pearson_corr(head_1_net_sim_array[relevant_indices], transfer_array[relevant_indices])

# head 2 (month)

In [None]:
# Calaculate net similarity just for the first head of CNN model (month head) which consists of layer 5 and 9
head_2_layers = [5, 9]
head_2_net_sim = calculate_SVCCA_for_all_branches(head_2_layers)
head_2_visual = plot_net_sim(head2_net_sim)
head_2_visual

Head 2 (month) of the models differs and shows a symetric behaviour in the matrix.

In [None]:
# flatten data frame
head_2_net_sim_array = np.ravel(head_2_net_sim.values)

# calculate correlation between net similarity of head 2 and transferability
rank_pearson_corr(head_2_net_sim_array[relevant_indices], transfer_array[relevant_indices])

# head 3 (weekday)

In [None]:
# Calaculate net similarity just for the third head of CNN model (weekday head) which consists of layer 6 and 10
head_3_layers = [6, 10]
head_3_net_sim = calculate_SVCCA_for_all_branches(head_3_layers)
head_3_visual = plot_net_sim(head_3_net_sim)
head_3_visual

The convolutional layers of Head_3 (weekday) have a symmetric relationship in the matrix.

In [None]:
# flatten data frame
head_3_net_sim_array = np.ravel(head_3_net_sim.values)

# calculate correlation between net similarity of head 3 and transferability
rank_pearson_corr(head_3_net_sim_array[relevant_indices], transfer_array[relevant_indices])

# head 4 (year)

In [None]:
# Calaculate net similarity just for the fourth head of CNN model (year) which consists of layer 7 and 11
head_4_layers = [7, 11]
head_4_net_sim = calculate_SVCCA_for_all_branches(head_4_layers)
head_4_visual = plot_net_sim(head_4_net_sim)
head_4_visual

Net similarity for Head 4 (year) also shows a symetric relationship in the matrix.

In [None]:
# flatten data frame
head_4_net_sim_array = np.ravel(head_4_net_sim.values)

# calculate correlation between net similarity of head 4 and transferability
rank_pearson_corr(head_4_net_sim_array[relevant_indices], transfer_array[relevant_indices])

Overall there is no significant correlation between the net similarity of the distinct heads and the transfer performance delta

# Row-wise comparison of net similarity and transferability

# row 1 (layer 4, 5, 6, 7)

In [None]:
# Calaculate net similarity just for the first row which consists of layer 4, 5, 6, 7
row_1_layers = [4, 5, 6, 7]
row_1_net_sim = calculate_SVCCA_for_all_branches(row_1_layers)
row_1_visual = plot_net_sim(row_1_net_sim)
row_1_visual

In [None]:
# flatten data frame
row_1_net_sim_array = np.ravel(row_1_net_sim.values)

# calculate correlation between net similarity of convolutional row 1 and transferability
rank_pearson_corr(row_1_net_sim_array[relevant_indices], transfer_array[relevant_indices])

# row 2 (layers 8, 9, 10, 11)

In [None]:
# Calaculate net similarity just for the first row which consists of layer 8, 9, 10, 11
row_2_layers = [8, 9, 10, 11]
row_2_net_sim = calculate_SVCCA_for_all_branches(row_2_layers)
row_2_visual = plot_net_sim(row_2_net_sim)
row_2_visual

In [None]:
# flatten data frame
row_2_net_sim_array = np.ravel(row_2_net_sim.values)

# calculate correlation between net similarity of convolutional row 2 and transferability
rank_pearson_corr(row_2_net_sim_array[relevant_indices], transfer_array[relevant_indices])

Overall there is no significant correlation between the net similarity of the two rows of the CNN models and the transfer performance delta
