In [None]:
import logging
import os

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from statsmodels.stats.multicomp import MultiComparison

import divisivenormalization.analysis as analysis
import divisivenormalization.utils as helpers
from divisivenormalization.data import Dataset, MonkeySubDataset

helpers.config_ipython()

logging.basicConfig(level=logging.INFO)

sns.set()
sns.set_style("ticks")
# adjust sns paper context rc parameters
font_size = 8
rc_dict = {
    "font.size": font_size,
    "axes.titlesize": font_size,
    "axes.labelsize": font_size,
    "xtick.labelsize": font_size,
    "ytick.labelsize": font_size,
    "legend.fontsize": font_size,
    "figure.figsize": (helpers.cm2inch(8), helpers.cm2inch(8)),
    "figure.dpi": 300,
    "pdf.fonttype": 42,
    "savefig.transparent": True,
    "savefig.bbox_inches": "tight",
}
sns.set_context("paper", rc=rc_dict)


class args:
    num_best = 10
    num_val = 10
    fname_best_csv = "df_best.csv"
    fname_val_csv = "df_val.csv"
    weights_path = "weights"
    train_logs_path = "train_logs"
    orientation_binsize = np.deg2rad(10)
    stim_full_size = 140  # full size of stimulus w/o subsampling and cropping
    stim_subsample = 2
    oriented_threshold = 0.125



 ### Load data

In [None]:
results_df = pd.read_csv("results.csv")
# Save a simplified version of the csv file, sorted by validation set performance
df_plain = helpers.simplify_df(results_df)
df_plain.to_csv("results_plain.csv")

data_dict = Dataset.get_clean_data()
data = MonkeySubDataset(data_dict, seed=1000, train_frac=0.8, subsample=2, crop=30)



 ### Get and save FEV performance on test set
 Use the 10 best models for analysis.
 Split the csv files accordingly. Also, extract some weights to be used for later analysis and save
 them as pickle. As this operation requires model loading, we do it only if it was not done before.

In [None]:
try:
    df_best = pd.read_csv(args.fname_best_csv)
    logging.info("loaded data from " + args.fname_best_csv)

except FileNotFoundError:
    df_best = df_plain[0 : args.num_best].copy()

    fev_lst = []
    for i in range(args.num_best):
        run_no = df_best.iloc[i]["run_no"]
        logging.info("load run no " + str(run_no))
        model = helpers.load_dn_nonspecific_model(run_no, results_df, data, args.train_logs_path)

        fev = model.evaluate_fev_testset()
        fev_lst.append(fev)

        feve = model.evaluate_fev_testset_per_neuron()
        helpers.pkl_dump(feve, run_no, "feve.pkl", args.weights_path)

        # get weights and normalization input
        (
            features_chanfirst,
            p,
            pooled,
            readout_feat,
            u,
            v,
            dn_exponent,
        ) = helpers.get_weights(model)

        norm_input = analysis.norm_input(pooled, p)

        helpers.pkl_dump(features_chanfirst, run_no, "features_chanfirst.pkl", args.weights_path)
        helpers.pkl_dump(p, run_no, "p.pkl", args.weights_path)
        helpers.pkl_dump(pooled, run_no, "pooled.pkl", args.weights_path)
        helpers.pkl_dump(norm_input, run_no, "norm_input.pkl", args.weights_path)
        helpers.pkl_dump(readout_feat, run_no, "readout_feat_w.pkl", args.weights_path)
        helpers.pkl_dump(u, run_no, "u.pkl", args.weights_path)
        helpers.pkl_dump(v, run_no, "v.pkl", args.weights_path)
        helpers.pkl_dump(dn_exponent, run_no, "dn_exponent.pkl", args.weights_path)

    df_best["fev"] = fev_lst
    df_best.to_csv(args.fname_best_csv)



In [None]:
fev = df_best.fev.values * 100
print("Mean FEV", fev.mean())
print("SEM", stats.sem(fev, ddof=1))
print("max FEV", fev.max())
print("FEV of model with max correlation on validation set", fev[0])



 ### Similarly oriented features contribute stronger

In [None]:
sim_input_lst, dissim_input_lst = [], []
for i in range(args.num_best):
    run_no = df_best.iloc[i].run_no
    features = helpers.pkl_load(run_no, "features_chanfirst.pkl", args.weights_path)
    norm_input = helpers.pkl_load(run_no, "norm_input.pkl", args.weights_path)

    angles = analysis.angles_circ_var(features, args.oriented_threshold)
    angles_diff = analysis.angle_diff(angles)
    unor_mask, sim_mask, dissim_mask = analysis.orientation_masks(angles_diff)
    sim_input = np.sum(norm_input[sim_mask])
    dissim_input = np.sum(norm_input[dissim_mask])

    sim_input_lst.append(sim_input)
    dissim_input_lst.append(dissim_input)

fractions = [s / d for s, d in zip(sim_input_lst, dissim_input_lst)]
fraction_err = stats.sem(fractions, ddof=0)
mean = np.average(fractions)
conf_int = analysis.compute_confidence_interval(fractions)

print("Similar norm. input divided by dissimilar input", np.round(mean, 2))
print("Confidence interval", np.round(conf_int, 2))
print("Plus/minus", np.round(mean - conf_int[0], 2))
print(stats.wilcoxon(sim_input_lst, dissim_input_lst))
print("Cohen's d", np.round(analysis.cohens_d(sim_input_lst, dissim_input_lst), 1))
