In [1]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
from joblib import Parallel, delayed
sys.path.append('../')

from packages import actv_analysis, svm, stats

## plot accuracy heatmap

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

net=1
num_units=200

for relu in range(2,3):
    epoch_results = []
    for epoch in np.arange(0,91,10):
        exp_results = []
        for exp in range(10):
            test_csv = f'csv/svm_test_set{exp}.csv'
            pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'
            exp_results.append(svm.get_svm_matrix(test_csv, pred_csv))
        # Convert list of dataframes to 3D numpy array
        data_3d = np.array([df.to_numpy() for df in exp_results])

        # Compute mean along the first axis (the one representing different dataframes)
        with np.errstate(invalid='ignore'):
            avg_svm_matrix = np.nanmean(data_3d, axis=0)

        # Convert back to DataFrame (if desired)
        avg_svm_matrix_df = pd.DataFrame(avg_svm_matrix, index=exp_results[0].index, columns=exp_results[0].columns)
        epoch_results.append(avg_svm_matrix_df)

    # Create figure with 10 subplots arranged in a 5x2 grid
    fig, axes = plt.subplots(5, 2, figsize=(10,20))

    for i, ax in enumerate(axes.flat):
        df = epoch_results[i]
        sns.heatmap(df, cmap="rocket", ax=ax, annot=True, cbar_kws={'label': 'accuracy'})
        ax.set_xticklabels(np.arange(2, 21, 2))
        ax.set_yticklabels(np.arange(2, 21, 2))
        ax.set_title(f'Epoch {i*10}')
        ax.set_aspect('equal')

    plt.tight_layout()
    #plt.savefig(f'svm_heatmap_for_relu{relu}_all_epochs_top{num_units}_monotonic_response_units.pdf')
    plt.show()

## number of units used in SVM vs accuracy

In [None]:
net=1
relu=2
epoch=90
num_units=2000

In [None]:
accuracy_epochs = []
for epoch in range(0,91,10):
    accuracies = []
    for exp in range(10):
        test_csv = f'csv/svm_test_set{exp}.csv'
        pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'

        test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
        pred = pd.read_csv(pred_csv)['0'].to_numpy()
        ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere
        # Check for equality element-wise:
        equal_elements = np.equal(pred, ans)
        accuracies.append(np.sum(equal_elements)/len(test))
    accuracy = np.mean(accuracies)
    accuracy_epochs.append(accuracy)

In [None]:
from itertools import product
# define the range for each variable
nets = np.arange(1,3)
relus = np.arange(5, 6, 1).astype(int)
epochs = np.arange(0, 91, 10).astype(int)
num_units = np.arange(200, 2001, 400).astype(int)

# Create a cartesian product of all three lists
all_combinations = list(product(nets, relus, epochs, num_units))

# Convert the combinations into a dataframe
df = pd.DataFrame(all_combinations, columns=['net', 'relu', 'epoch', 'num_units'])

# Initially set accuracy to NaN (or some other default value)
df['accuracy'] = np.nan

In [None]:
for idx, row in df.iterrows():
    net = int(row['net'])
    relu = int(row['relu'])
    epoch = int(row['epoch'])
    num_units = int(row['num_units'])

    accuracies = []
    for exp in range(10):
        test_csv = f'csv/svm_test_set{exp}.csv'
        pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'

        test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
        pred = pd.read_csv(pred_csv)['0'].to_numpy()
        ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere

        # Check for equality element-wise:
        equal_elements = np.equal(pred, ans)
        accuracies.append(np.sum(equal_elements)/len(test))

    df.at[idx, 'accuracy'] = np.mean(accuracies)


In [None]:
# Convert 'num_units' to a categorical type
df['num_units'] = df['num_units'].astype('category')

# Create a cubehelix color palette
palette = sns.color_palette("cubehelix", len(df['num_units'].unique()))

sns.lineplot(data=df, x='epoch', y='accuracy', hue='num_units', palette=palette)
plt.yticks = np.arange(0,91,10)
plt.title(f'Relu{relu} epoch vs accuracy per number of units used in svm')
plt.tight_layout()
plt.savefig(f'Relu{relu} epoch vs accuracy per number of units used in svm.pdf')
plt.show()

## epoch & number distance vs congruency effect (accuracy of congruent - incongruent)

In [None]:
def get_num_sz_dist(test):
    df_num_sz_dist = pd.DataFrame(index=range(len(test)), columns=['num_dist', 'sz_dist', 'congruency']) 
    df_num_sz_dist['num_dist'] = (test['num1'] - test['num2']) * 2
    df_num_sz_dist['sz_dist'] = test['sz1'] - test['sz2']
    df_num_sz_dist['congruency'] = np.sign(df_num_sz_dist['num_dist']) == np.sign(df_num_sz_dist['sz_dist']) 
    return df_num_sz_dist

In [None]:
from itertools import product
# define the range for each variable
nets = np.arange(1,3)
relus = np.arange(5, 6, 1).astype(int)
epochs = np.arange(0, 91, 10).astype(int)
num_units = np.arange(200, 2001, 400).astype(int)

# Create a cartesian product of all three lists
all_combinations = list(product(nets, relus, epochs, num_units))

# Convert the combinations into a dataframe
df = pd.DataFrame(all_combinations, columns=['net', 'relu', 'epoch', 'num_units'])

# Initially set accuracy to NaN (or some other default value)
df['accuracy'] = np.nan


for idx, row in df.iterrows():
    net = int(row['net'])
    relu = int(row['relu'])
    epoch = int(row['epoch'])
    num_units = int(row['num_units'])

    accuracies = []
    for exp in range(10):
        test_csv = f'csv/svm_test_set{exp}.csv'
        pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'

        test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
        df_num_sz_dist = get_num_sz_dist(test)
        pred = pd.read_csv(pred_csv)['0'].to_numpy()
        ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere

        # Check for equality element-wise:
        equal_elements = np.equal(pred, ans)
        df_num_sz_dist['correctly_predicted'] = equal_elements

In [None]:
from itertools import product

# Define the ranges
nets = range(1, 2)
relus = range(5, 6)
num_units_values = range(2000, 2001, 200)
epochs = range(0, 91, 10)

# Generate all combinations
combinations = product(nets, relus, num_units_values, epochs)

# Convert to a DataFrame
df = pd.DataFrame(combinations, columns=['net', 'relu', 'num_units', 'epoch'])

# Add the 'cong_effect' column
df['cong_effect'] = np.nan

for net in range(1,2):
    for relu in range(5,6):
        for num_units in range(2000,2001,200):
            for epoch in range(0,91,10):
                test_csv = f'csv/svm_test_set{exp}.csv'
                pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'

                test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
                df_num_sz_dist = get_num_sz_dist(test)
                pred = pd.read_csv(pred_csv)['0'].to_numpy()
                ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere

                # Check for equality element-wise:
                equal_elements = np.equal(pred, ans)
                df_num_sz_dist['correctly_predicted'] = equal_elements

                #accs  = []
                for nd in range(2,19,2):
                    df_numdist = df_num_sz_dist[np.abs(df_num_sz_dist['num_dist']) == nd]
                    df_numdist_cong = df_numdist[df_numdist['congruency']==True]
                    accuracy_cong = np.sum(df_numdist_cong['correctly_predicted'])/len(df_numdist_cong)
                    df_numdist_incong = df_numdist[df_numdist['congruency']==False]
                    accuracy_incong = np.sum(df_numdist_incong['correctly_predicted'])/len(df_numdist_incong)
                    cong_effect = accuracy_cong - accuracy_incong
                    #accs.append(cong_effect)
                    # Fill df at the appropriate location            

In [None]:
# Define the ranges
nets = range(1, 3)
relus = range(2, 6)
num_units_values = range(200, 2001, 200)
epochs = range(0, 91, 10)
num_dists = range(2, 19, 2)

# Generate all combinations
combinations = product(nets, relus, num_units_values, epochs, num_dists)

# Convert to a DataFrame
df = pd.DataFrame(combinations, columns=['net', 'relu', 'num_units', 'epoch', 'num_dist'])

# Add the 'cong_effect' column
df['cong_effect'] = np.nan

for index, row in df.iterrows():
    net = int(row['net'])
    relu = int(row['relu'])
    num_units = int(row['num_units'])
    epoch = int(row['epoch'])
    num_dist = int(row['num_dist'])
    
    test_csv = f'csv/svm_test_set{exp}.csv'
    pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'

    test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
    df_num_sz_dist = get_num_sz_dist(test)
    pred = pd.read_csv(pred_csv)['0'].to_numpy()
    ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere

    # Check for equality element-wise:
    equal_elements = np.equal(pred, ans)
    df_num_sz_dist['correctly_predicted'] = equal_elements

    df_numdist = df_num_sz_dist[np.abs(df_num_sz_dist['num_dist']) == num_dist]
    df_numdist_cong = df_numdist[df_numdist['congruency']==True]
    accuracy_cong = np.sum(df_numdist_cong['correctly_predicted'])/len(df_numdist_cong)
    df_numdist_incong = df_numdist[df_numdist['congruency']==False]
    accuracy_incong = np.sum(df_numdist_incong['correctly_predicted'])/len(df_numdist_incong)
    cong_effect = accuracy_cong - accuracy_incong

    # Fill df at the appropriate location
    df.at[index, 'cong_effect'] = cong_effect

### Generate congruency effect heatmap per network and relu

In [None]:
min_val = df['cong_effect'].min()
max_val = df['cong_effect'].max()

for net in nets:
    for relu in relus:
        for num_unit in num_units_values:
            df_sub = df[(df['net']==net) & (df['relu']==relu) & (df['num_units']==num_unit)]
            
            # Reshape the dataframe
            pivot_df = df_sub.pivot(index='num_dist', columns='epoch', values='cong_effect')

            # Create the heatmap
            plt.figure(figsize=(10, 8))
            sns.heatmap(pivot_df, annot=True, cmap='rocket', vmin=min_val, vmax=max_val)
            plt.title(f'Congruency effect net{net} relu {relu} {num_unit} units')
            plt.savefig(f'heatmap for congruency effect of net{net} relu{relu} epoch{epoch} {num_unit} nonzero activity units July2023.pdf')
            #plt.show()

### Generate congruency effect heatmap per relu (averaged across networks)

In [None]:
# Calculate average 'cong_effect' across all nets
df_avg = df.groupby(['num_dist', 'epoch', 'relu', 'num_units']).mean().reset_index()

# Find global min and max of 'cong_effect'
min_val = df_avg['cong_effect'].min()
max_val = df_avg['cong_effect'].max()

for relu in range(3,6):
    for num_unit in num_units_values:
        df_sub = df_avg[(df_avg['relu']==relu) & (df_avg['num_units']==num_unit)]

        # Reshape the dataframe
        pivot_df = df_sub.pivot(index='num_dist', columns='epoch', values='cong_effect')

        # Create the heatmap
        plt.figure(figsize=(10, 8))
        sns.heatmap(pivot_df, annot=True, cmap='rocket', vmin=min_val, vmax=max_val)
        plt.title(f'Average congruency effect for relu {relu} with {num_unit} units')
        plt.savefig(f'heatmap for average congruency effect of relu{relu} with {num_unit} units July2023.pdf')
        #plt.show()

In [None]:
import math

# Calculate average 'cong_effect' across all nets
df_avg = df.groupby(['num_dist', 'epoch', 'relu', 'num_units']).mean().reset_index()

# Find global min and max of 'cong_effect'
min_val = df_avg['cong_effect'].min()
max_val = df_avg['cong_effect'].max()

for relu in range(2, 6):
    # Define number of rows and columns for subplot
    num_unit_len = len(num_units_values)
    num_cols = 2
    num_rows = math.ceil(num_unit_len / num_cols)

    # Create figure and axes for each relu
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(10 * num_cols, 8 * num_rows))  # Adjust the figure size

    # To handle cases when the number of subplots is not exactly filling the grid
    axs = axs.ravel()

    for i, num_unit in enumerate(num_units_values):
        df_sub = df_avg[(df_avg['relu'] == relu) & (df_avg['num_units'] == num_unit)]

        # Reshape the dataframe
        pivot_df = df_sub.pivot(index='num_dist', columns='epoch', values='cong_effect')

        # Create the heatmap on specific subplot
        sns.heatmap(pivot_df, annot=True, cmap='rocket', vmin=min_val, vmax=max_val, ax=axs[i])
        axs[i].set_title(f'Average congruency effect for relu {relu} with {num_unit} units')

    # Remove unused subplots
    for j in range(i+1, num_rows * num_cols):
        fig.delaxes(axs[j])

    # Save the full figure for each relu
    plt.tight_layout()
    plt.savefig(f'heatmaps for average congruency effect of relu {relu} July2023.pdf')
    plt.show()

### Lineplot for epoch vs congruency effect (for num units used 200)

In [None]:
# Get unique num_dist values
num_dist_values = df['num_dist'].unique()

# Define number of rows and columns for subplot
num_dist_len = len(num_dist_values)
num_cols = 3  # Number of columns in the subplot grid
num_rows = int(np.ceil(num_dist_len / num_cols))  # Calculate number of rows needed

# Create figure and axes for each num_dist
fig, axs = plt.subplots(num_rows, num_cols, figsize=(10*num_cols, 8*num_rows))  # Adjust the figure size

# Flatten axs for easy iteration
axs = axs.flatten()

# Determine the global y-axis limits
global_y_min = df['cong_effect'].min()
global_y_max = df['cong_effect'].max()

# Get the unique 'relu' values and create a color map for them
relu_values = df['relu'].unique()

# Make sure the length of relu_values does not exceed the number of specified colors
assert len(relu_values) <= 4, "There are more 'relu' values than colors specified"

colors = ['red', 'orange', 'green', 'blue']
color_dict = dict(zip(relu_values, colors))

for i, num_dist in enumerate(num_dist_values):
    df_sub = df[(df['num_dist']==num_dist)&(df['num_units']==200)]

    # Create the line plot on specific subplot
    sns.lineplot(data=df_sub, x='epoch', y='cong_effect', hue='relu', palette=color_dict, ax=axs[i])
    axs[i].set_title(f'Congruency effect for num_dist {num_dist}', fontsize=20)

    # Set the same y-axis limit for all subplots
    axs[i].set_ylim(global_y_min, global_y_max)

    # Set the x-ticks
    axs[i].set_xticks(range(0, 91, 10))
    
    # Set x-axis and y-axis label size
    axs[i].tick_params(axis='x', labelsize=16)
    axs[i].tick_params(axis='y', labelsize=16)

    # Increase legend size
    leg = axs[i].legend()
    for t in leg.texts:
        t.set_fontsize(16)
        
    # Increase axes label size
    axs[i].xaxis.label.set_size(16)
    axs[i].yaxis.label.set_size(16)
    
    axs[i].set_ylabel("congruency effect", fontsize=16)

# Remove empty subplots
if num_dist_len % num_cols != 0:
    for ax in axs[num_dist_len:]:
        fig.delaxes(ax)

# Set the layout to tight to avoid overlapping
plt.tight_layout()
plt.show()

### Lineplot for Relu vs Congruency effect per epoch number distance (a figure per number distance)

In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt

for nd in np.arange(2,19,2):

    # Generate line plot
    df_sub = df[(df['num_units']==200) & (df['num_dist']==nd)]
    ax = sns.lineplot(data=df_sub, x='relu',y='cong_effect',hue='epoch',err_style='bars', palette='viridis')

    # Set x-ticks
    ax.set_xticks(range(3,6))

    # Draw a red dotted horizontal line at y=0
    ax.axhline(0, color='red', linestyle='--')

    # Set y-label
    ax.set_ylabel('Congruency Effect')

    # Move legend outside the figure
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.85, box.height]) # shrink figure by 15%
    legend = ax.legend(loc='center right', bbox_to_anchor=(1.25, 0.5), ncol=1) 

    # Set the legend title
    legend.set_title('epoch')
    ax.set_title(f'number distance:{nd}')
    plt.tight_layout()
    plt.savefig(f'Lineplot for Relu vs Congruency effect per epoch number distance {num_dist}.pdf')
    plt.show()

### Lineplot for Relu vs Congruency effect per epoch number distance (a subplots)

In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt

# Define the size and layout of the subplots
num_dist_values = np.arange(2,7,2)
num_dist_len = len(num_dist_values)
num_cols = 3
num_rows = int(np.ceil(num_dist_len / num_cols))

# Create the figure and axes for the subplots
fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5))

# Flatten the axes for easy iteration
axs = axs.flatten()

# Get global y-axis limits
global_y_min = df['cong_effect'].min()
global_y_max = df['cong_effect'].max()

lines_labels = []

for i, nd in enumerate(num_dist_values):
    # Generate line plot
    df_sub = df[(df['num_units']==200) & (df['num_dist']==nd)]
    ax = sns.lineplot(data=df_sub, x='relu', y='cong_effect', hue='epoch', err_style='bars', palette='viridis', ax=axs[i])

    # Set y-axis limits
    axs[i].set_ylim(global_y_min, global_y_max)

    # Set x-ticks
    axs[i].set_xticks(range(2,6))

    # Draw a red dotted horizontal line at y=0
    axs[i].axhline(0, color='red', linestyle='--')

    # Set y-label
    axs[i].set_xlabel('ReLu')
    axs[i].set_ylabel('Congruency Effect')

    # Set subplot title
    axs[i].set_title(f'number distance: {nd}')

    # Get the Line2D objects from the Axes object
    lines, labels = ax.get_legend_handles_labels()
    lines_labels.append((lines, labels))

    # Remove the legend of each subplot
    ax.get_legend().remove()

# Remove extra subplots
if num_dist_len % num_cols != 0:
    for ax in axs[num_dist_len:]:
        fig.delaxes(ax)

# Adjust layout to avoid overlapping
plt.tight_layout()

# Create a new legend for the figure using the handles and labels we collected
lines, labels = lines_labels[0]  # All subplots have the same legend, so use the first one
fig.legend(lines, labels, title='epoch', loc='lower center', ncol=len(lines), bbox_to_anchor=(0.5, -0.1))
plt.tight_layout()
# Save the figure
plt.savefig('Lineplot for Relu vs Congruency effect per epoch for different number distances.pdf',bbox_inches='tight')

# Show the figure
plt.show()

## Analysis of how different types of monotonic units (LNLS and LNSS) affect learning

In [None]:
# Define the ranges
nets = range(1, 3)
relus = range(4, 5)
num_units_values = range(200, 201, 200)
epochs = range(90, 91, 10)
num_dists = range(2, 19, 2)
LNSS_prop = np.arange(0,1.1,0.1)


# Generate all combinations
combinations = product(nets, relus, num_units_values, epochs, LNSS_prop, num_dists)

# Convert to a DataFrame
df = pd.DataFrame(combinations, columns=['net', 'relu', 'num_units', 'epoch', 'LNSS_prop', 'num_dist'])

# Add the 'cong_effect' column
df['cong_effect'] = np.nan

for index, row in df.iterrows():
    net = int(row['net'])
    relu = int(row['relu'])
    num_units = int(row['num_units'])
    epoch = int(row['epoch'])
    num_dist = int(row['num_dist'])
    LNSS_prop = round(row['LNSS_prop'], 1)
    LNLS_prop = round(1-LNSS_prop, 1)
    pred_csv = f'csv/svm_results/SVM_prediction_of_He_untrained_net{net}_relu{relu}_epoch{epoch}_{int(100*LNSS_prop)}_percent_LNSS_and_{int(100*LNLS_prop)}_percent_LNLS_units_exp{exp}_July2023.csv'
    
    test_csv = f'csv/svm_test_set{exp}.csv'
    pred_csv = f'csv/svm_results/SVM_prediction_of_He_untrained_net{net}_relu{relu}_epoch{epoch}_{int(100*LNSS_prop)}_percent_LNSS_and_{int(100*LNLS_prop)}_percent_LNLS_units_exp{exp}_July2023.csv'

    test = pd.read_csv(test_csv).drop('Unnamed: 0', axis=1)
    df_num_sz_dist = get_num_sz_dist(test)
    pred = pd.read_csv(pred_csv)['y_pred'].to_numpy()
    ans = svm.get_y(pd.read_csv(test_csv).drop('Unnamed: 0', axis=1))  # Assumes that get_y is defined elsewhere

    # Check for equality element-wise:
    equal_elements = np.equal(pred, ans)
    df_num_sz_dist['correctly_predicted'] = equal_elements

    df_numdist = df_num_sz_dist[np.abs(df_num_sz_dist['num_dist']) == num_dist]
    df_numdist_cong = df_numdist[df_numdist['congruency']==True]
    accuracy_cong = np.sum(df_numdist_cong['correctly_predicted'])/len(df_numdist_cong)
    df_numdist_incong = df_numdist[df_numdist['congruency']==False]
    accuracy_incong = np.sum(df_numdist_incong['correctly_predicted'])/len(df_numdist_incong)
    cong_effect = accuracy_cong - accuracy_incong

    # Fill df at the appropriate location
    df.at[index, 'cong_effect'] = cong_effect

In [None]:
import matplotlib.pyplot as plt

ax = sns.lineplot(data=df, x='LNSS_prop', y='cong_effect', hue='num_dist', palette='tab10')
ax.set_xlabel('Proportion of LNSS units', fontsize=14)
ax.set_xticks(np.arange(0,1.1,0.2))
ax.set_ylabel('Congruency effect', fontsize=14)
ax.set_title('Congruency effect by proportion of LNSS units', fontsize=16)
plt.axvline(0.5, color='red', linestyle='dotted')  # Adds a vertical dotted line at x=0.5
plt.savefig('Congruency effect by proportion of LNSS units.pdf')
plt.show()

In [None]:
df_pdist = pd.read_csv('pdist.csv', index_col=0)

In [None]:
# Compute num_dist for each row
df_pdist['num_dist'] = abs(df_pdist['num1'] - df_pdist['num2'])

# Define congruency
df_pdist['is_congruent'] = ((df_pdist['num1'] < df_pdist['num2']) & (df_pdist['sz1'] < df_pdist['sz2'])) | ((df_pdist['num1'] > df_pdist['num2']) & (df_pdist['sz1'] > df_pdist['sz2']))

# Empty DataFrame to store the result
df_new = pd.DataFrame(columns=['num_dist', 'pdist_congruency_effect'])

# Loop over each unique num_dist
for num_dist in df_pdist['num_dist'].unique():
    df_same_num_dist = df_pdist[df_pdist['num_dist'] == num_dist]

    # Mean pdist for congruent and incongruent conditions
    mean_pdist_congruent = df_same_num_dist[df_same_num_dist['is_congruent']]['pdist'].mean()
    mean_pdist_incongruent = df_same_num_dist[~df_same_num_dist['is_congruent']]['pdist'].mean()

    # Compute pdist_congruency_effect
    pdist_congruency_effect = mean_pdist_congruent - mean_pdist_incongruent

    # Append to the result DataFrame
    df_new = pd.concat([df_new, pd.DataFrame([{'num_dist': num_dist, 'pdist_congruency_effect': pdist_congruency_effect}])], ignore_index=True)



## Euclidean distance in MDS vs accuracy

In [None]:
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from sklearn.manifold import MDS

# Initialize an empty list to store the results
df_pdist_list = []

# Iterate through relu, epoch, and net
for relu in range(2,6):
    for epoch in range(0,91,10):
        for net in range(1,3):
            print(f'net{net} relu{relu} epoch{epoch}')
            # Store the cosine similarity results for each epoch in a dictionary
            cs_dict = stats.cos_similarity(relu=relu, epoch=epoch, nets=range(net, net+1))
            
            # Perform MDS
            mds = MDS(n_components=2, dissimilarity='precomputed', random_state=42)
            mds_results = mds.fit_transform(1 - cs_dict)
            
            # Calculate pairwise distances and convert to square form
            dist_matrix = squareform(pdist(mds_results))

            # Initialize lists to hold the data
            index1_list, index2_list, pdist_list = [], [], []

            # Iterate over pairs of points (upper triangular matrix indices)
            for i in range(dist_matrix.shape[0]):
                for j in range(i+1, dist_matrix.shape[1]):
                    # Store the indices and the distance in the lists
                    index1_list.append(i)
                    index2_list.append(j)
                    pdist_list.append(dist_matrix[i, j])

            # Create the DataFrame
            df_pdist = pd.DataFrame({
                'index1': index1_list,
                'index2': index2_list,
                'pdist': pdist_list,
                'relu': relu,
                'epoch': epoch,
                'net': net
            })
            
            # Append the DataFrame to the list
            df_pdist_list.append(df_pdist)

# Concatenate all DataFrames
df_final = pd.concat(df_pdist_list, ignore_index=True)

print(df_final)

net1 relu2 epoch0
--- 356.467631816864 seconds ---


In [None]:
# Initialize an empty list to store the long format DataFrames
long_df_list = []

for relu in range(2,3):
    for epoch in np.arange(0,91,10):
        exp_results = []
        for exp in range(10):
            test_csv = f'csv/svm_test_set{exp}.csv'
            pred_csv = f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} nonzero activity units exp{exp} June2023.csv'
            exp_results.append(svm.get_svm_matrix(test_csv, pred_csv))

        # Convert list of dataframes to 3D numpy array
        data_3d = np.array([df.to_numpy() for df in exp_results])

        # Compute mean along the first axis (the one representing different dataframes)
        with np.errstate(invalid='ignore'):
            avg_svm_matrix = np.nanmean(data_3d, axis=0)

        # Convert back to DataFrame (if desired)
        avg_svm_matrix_df = pd.DataFrame(avg_svm_matrix, index=exp_results[0].index, columns=exp_results[0].columns)

        # Melt the DataFrame to long format and add the corresponding relu, epoch and experiment number
        df_long = avg_svm_matrix_df.reset_index().melt(id_vars='index', var_name='column', value_name='value')
        
        # Change 'index' and 'column' to 'num1' and 'num2' and map the number from 0 - 9 to 2 to 20 with step size of 2
        df_long.rename(columns={'index': 'num1', 'column': 'num2'}, inplace=True)
        df_long['num1'] = df_long['num1'].astype(int) * 2 + 2
        df_long['num2'] = df_long['num2'].astype(int) * 2 + 2
        
        # Determine if it is congruent or not (upper triangle = True, lower triangle = False)
        df_long['congruent'] = df_long['num1'] < df_long['num2']
        
        # Add the relu, epoch and experiment number
        df_long['relu'] = relu
        df_long['epoch'] = epoch
        df_long['exp'] = exp

        # Reorder the columns
        df_long = df_long[['num1', 'num2', 'congruent', 'relu', 'epoch', 'exp', 'value']]

        # Append the long format DataFrame to the list
        long_df_list.append(df_long)

# Concatenate all the long format DataFrames
df_final = pd.concat(long_df_list, ignore_index=True)

print(df_final)

In [None]:
# Reset the index of the DataFrame so that the indices become a column
df_reset = df.reset_index()

# Melt the DataFrame to long format
df_long = df_reset.melt(id_vars='index', var_name='column', value_name='value')

# Rename the columns to more meaningful names
df_long = df_long.rename(columns={'index': 'axis1', 'column': 'axis2'})

In [None]:
df_final