Load dataframes with acf and calculate tau

In [1]:
import pandas as pd
import numpy as np
import sys
from scipy.optimize import curve_fit, OptimizeWarning
from sklearn.metrics import r2_score
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import warnings

### Get the data

In [2]:
results_folder = 'D:\\projects_q_30_10_2024\\isttc\\results\\monkey\\fixation_period_1000ms\\'
save_folder = 'D:\\projects_q_30_10_2024\\isttc\\results\\monkey\\fixation_period_1000ms\\'

In [3]:
def get_tau_df(file_name_):
    n_lags = 20
    acf_cols = ['acf_' + str(i) for i in range(n_lags)]
    
    acf_df = pd.read_pickle(results_folder + file_name_)
    
    n_rows_with_nans = acf_df[acf_cols[:-1]].isnull().any(axis=1).sum()
    n_rows_with_nans_perc = n_rows_with_nans / len(acf_df) * 100
    acf_df.dropna(inplace=True)
    
    tau_df = calc_tau(acf_df, acf_cols[:-1])
    n_rows_with_nans_tau = tau_df['tau_ms'].isnull().sum()
    n_rows_with_nans_tau_perc = n_rows_with_nans_tau / len(tau_df) * 100
    tau_df.dropna(inplace=True)
    
    print('acf n_rows_with_nans_perc {}, tau n_rows_with_nans_tau_perc {}'.format(n_rows_with_nans_perc, n_rows_with_nans_tau_perc))

    return tau_df, n_rows_with_nans_perc, n_rows_with_nans_tau_perc

In [11]:
# old_stdout = sys.stdout
# sys.stdout = open(results_folder + 'pfdl_acf_trial_tau_cal_log.txt', 'w')

file_name_pfdl_isttc = '\\non_binned\\pfp\\acf\\acf_trial_isttc_concat_df_50ms_20lags_df.pkl'
tau_df_proper_isttc, n_rows_with_nans_perc_isttc, n_rows_with_nans_tau_perc_isttc = get_tau_df(file_name_pfdl_isttc)

# sys.stdout = old_stdout

Calculating taus for (540, 19)
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000.
RuntimeError: Optimal parameters not found: Number of calls to function has re

In [12]:
print('acf n_rows_with_nans_perc_isttc {}, tau n_rows_with_nans_tau_perc_isttc {}'.format(n_rows_with_nans_perc_isttc, 
                                                                                          n_rows_with_nans_tau_perc_isttc))

acf n_rows_with_nans_perc_isttc 0.0, tau n_rows_with_nans_tau_perc_isttc 7.222222222222221


In [13]:
acf_nans_perc = [n_rows_with_nans_perc_isttc]
tau_nans_perc = [ n_rows_with_nans_tau_perc_isttc]
metric_l = ['iSTTC']

nan_df = pd.DataFrame(np.vstack((acf_nans_perc, tau_nans_perc, metric_l)).T, columns=['acf_nan_perc', 'tau_nan_perc', 'metric'])
nan_df['area'] = 'pfdl'
nan_df['acf_nan_perc'] = nan_df['acf_nan_perc'].astype(float)
nan_df['tau_nan_perc'] = nan_df['tau_nan_perc'].astype(float)
nan_df
nan_df.to_pickle(results_folder + 'pfp_tau_acf_trial_concat_nan_df_50ms_20lags_df.pkl')

In [14]:
# save tau dfs
tau_df_proper_isttc['unit_id'] = tau_df_proper_isttc['unit_id'].astype(int)
tau_df_proper_isttc.to_pickle(results_folder + '\\non_binned\\pfp\\taus\\tau_acf_trial_isttc_concat_df_50ms_20lags_df.pkl')

In [None]:
tau_df_proper_merged = tau_df_proper.merge(tau_df_proper_isttc, on=['unit_id','trial_id'], how='inner', 
                                           suffixes=('_proper', '_isttc'))
tau_df_proper_merged

### Calculate tau

In [4]:
def func_single_exp(x, a, b, c):
    """
    Exponential function to fit the data.
    :param x: 1d array, independent variable
    :param a: float, parameter to fit
    :param b: float, parameter to fit
    :param c: float, parameter to fit
    :return: callable
    """
    #return a * np.exp(-b * x) + c
    return a * (np.exp(-b * x) + c) # as in the paper


def fit_single_exp(ydata_to_fit_, start_idx_=1):
    """
    Fit function func_exp to data using non-linear least square.

    todo check that - important point: Fit is done from the first ACF value (acf[0] is skipped, it is done like this
    in the papers, still not sure)

    :param ydata_to_fit_: 1d array, the dependant data to fit
    :param start_idx_: int, index to start fitting from
    :return: fit_popt, fit_pcov, tau, fit_r_squared
    """
    t = np.linspace(0, len(ydata_to_fit_)-1, len(ydata_to_fit_)).astype(int)

    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        try:
            popt, pcov = curve_fit(func_single_exp, t[start_idx_:], ydata_to_fit_[start_idx_:], maxfev=5000) # I used 5000, 1000000000 it is like in Siegle
            fit_popt = popt
            fit_pcov = pcov
            tau = 1 / fit_popt[1]
            # fit r-squared
            y_pred = func_single_exp(t[start_idx_:], *popt)
            fit_r_squared = r2_score(ydata_to_fit_[start_idx_:], y_pred)
        except RuntimeError as e:
            print('RuntimeError: {}'. format(e))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except OptimizeWarning as o:
            print('OptimizeWarning: {}'. format(o))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except RuntimeWarning as re:
            print('RuntimeWarning: {}'. format(re))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except ValueError as ve:
            print('ValueError: {}'. format(ve))
            print('Possible reason: acf contains NaNs, low spike count')
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan

    return fit_popt, fit_pcov, tau, fit_r_squared

In [5]:
def calc_tau(acf_df_, acf_cols_):
    acf_2d = acf_df_[acf_cols_].values
    acf_2d = acf_2d.astype(np.float64) # RuntimeWarning: overflow encountered in matmul sttc start_idx 2
    n_units = acf_2d.shape[0]
    print('Calculating taus for {}'.format(acf_2d.shape))
    
    fit_popt_a_l, fit_popt_b_l, fit_popt_c_l = [],[],[]
    fit_tau_l = []
    fit_r_squared_l = []
    
    for i in range(n_units):
        fit_popt, fit_pcov, tau, fit_r_squared = fit_single_exp(acf_2d[i,:], start_idx_=1)
        if  type(fit_popt) == np.ndarray:
            fit_popt_a_l.append(fit_popt[0])
            fit_popt_b_l.append(fit_popt[1])
            fit_popt_c_l.append(fit_popt[2])
        else:
            fit_popt_a_l.append(np.nan)
            fit_popt_b_l.append(np.nan)
            fit_popt_c_l.append(np.nan)
        fit_tau_l.append(tau)
        fit_r_squared_l.append(fit_r_squared)
    
    data_df = np.vstack((fit_popt_a_l, fit_popt_b_l, fit_popt_c_l, fit_tau_l, fit_r_squared_l)).T
    tau_df = pd.DataFrame(data_df, columns=['fit_a', 'fit_b', 'fit_c','tau', 'r_squared'])
    tau_df.insert(0, 'unit_id', acf_df_['unit_id'].values)
    #tau_df.insert(1, 'trial_id', acf_df_['trial_id'].values)
    tau_df['tau_ms'] = tau_df['tau'] * 50

    return tau_df

#### Plots

In [None]:
# percentage of nans in ACF and in tau - bar plots
fig, axes = plt.subplots(1,2, figsize=(6,3), gridspec_kw={'width_ratios': [2, 2]})
plt.subplots_adjust(hspace=0.4, wspace=0.4)

sns.barplot(ax=axes[0], x='metric', y='acf_nan_perc', data=nan_df, palette=['black', 'darkgrey','steelblue'], legend=False)
sns.barplot(ax=axes[1], x='metric', y='tau_nan_perc', data=nan_df, palette=['black', 'darkgrey','steelblue'], legend=False)

sns.despine()

# if save_fig:
#     fig.savefig(fig_folder + 'sttc_vs_pearson_constrained_dataset_with_empty_0_1000_51padding_full_dataset.png' , bbox_inches='tight')
#     fig.savefig(fig_folder + 'sttc_vs_pearson_constrained_dataset_with_empty_0_1000_51padding_full_dataset.svg' , bbox_inches='tight')

In [None]:
tau_df_proper_merged['diff'] = tau_df_proper_merged['r_squared_isttc'] - tau_df_proper_merged['r_squared_proper']

In [None]:
# r-squared of sttc and proper acf

fig, axes = plt.subplots(1,3, figsize=(10,3), gridspec_kw={'width_ratios': [2, 2,2]})
plt.subplots_adjust(hspace=0.4, wspace=0.4)

# sns.violinplot(ax=axes[0], x='area', y='tau_ms_log10', hue='metric', data=tau_df_proper_merged.query('tau_ms >= 0'), 
#                cut=0, density_norm='width', palette=['darkgrey','steelblue'])
# axes[0].legend(frameon=False, loc='upper center')
# axes[0].set_xlabel('Brain area')
# axes[0].set_ylabel('Timescale log10')


sns.scatterplot(ax=axes[1], x=tau_df_proper_merged['r_squared_proper'], y=tau_df_proper_merged['r_squared_isttc'], s=2, color='slategray')
axes[1].plot([0, 1], [0, 1], c='k', transform=axes[1].transAxes)
axes[1].set_aspect('equal', adjustable='box')
axes[1].set_xlabel('Pearson R-squared')
axes[1].set_ylabel('STTC R-squared')

sns.histplot(ax=axes[2], x=tau_df_proper_merged['diff'].values, stat='probability', bins=100, kde=False, color='steelblue')
axes[2].axvline(x=0, lw=1, c='k')
axes[2].set_xlabel('STTC R-squared - \nPearson R-squared')
#axes[2].set_title('{}% STTC fits \nhave higher R-squared'.format(np.round(n_sttc_better_perc,2)))


# fig.suptitle('Unit tau STTC vs Pearson, N Pearson: {} units, N STTC: {} units'.format(len(tau_average_trial_subset_df.query('metric == "pearson"')), 
#                                                                               len(tau_average_trial_subset_df.query('metric == "sttc"'))), y=1.05)

sns.despine()

# if save_fig:
#     fig.savefig(fig_folder + 'sttc_vs_pearson_constrained_dataset_with_empty_0_1000_51padding_full_dataset.png' , bbox_inches='tight')
#     fig.savefig(fig_folder + 'sttc_vs_pearson_constrained_dataset_with_empty_0_1000_51padding_full_dataset.svg' , bbox_inches='tight')