In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from analysis_functions import *
from model_functions import *
import matplotlib.patches as patches
import seaborn as sns
import scipy.stats as stats
from scipy.stats import shapiro, chi2, mannwhitneyu, ttest_ind

# Read extraced variables into csv

Consider importing multiple datasets

In [None]:
closedopenMM_GCaMPvars = pd.read_csv('Saved_data/GCaMP_closedopenMM_all_470_stats_df.csv')
#closedopenMM_GCaMPvars = pd.read_csv('GRAB_MM_extracted_closedopenMM_GCaMPvars.csv')
#sensor = 'GRAB'
closedopenMM_GCaMPvars

# Make boxplots to view difference in params for halts and no-halts 

In [None]:
#Look at halt data for 0 to 1 second after halt
halt_data_during_session1 = filter_data(closedopenMM_GCaMPvars, ['was_halt', 'session 1', '0-1s'])
halt_data_during_session1.describe() 

In [None]:
#Look at halt data for 0 to 1 second after the non-halt
Nohalt_during_session1 = filter_data(closedopenMM_GCaMPvars, ['no_halt', 'session 1', '0-1s'])
Nohalt_during_session1.describe() 

## Plotting params for individual mice

In [None]:
# List of statistics to compare
statistics_to_compare = ['peak', 'mean', 'stderr', 'movementX_prior']

# Generate the grid of boxplots for the time range '0-1s'
compare_statistics_grid(closedopenMM_GCaMPvars, statistics=statistics_to_compare, time_range='0-1s')



In [None]:
# List of statistics to compare
statistics_to_compare = ['peak', 'mean', 'stderr', 'movementX_prior']

# Generate the grid of boxplots for the time range '0-1s'
compare_statistics_grid(closedopenMM_GCaMPvars, statistics=statistics_to_compare, time_range='1-2s')

In [None]:
# Grouping by relevant columns and aggregating the desired statistics
mouse_means = (
    closedopenMM_GCaMPvars.groupby(['mouse_id', 'halt', 'block_type', 'session', 'time_range'])
    .agg({
        'peak': 'mean',  # Mean peak response for each group
        'mean': 'mean',  # Mean response for each group
        'stderr': 'mean',  # Mean standard error for each group
        'movementX_prior': 'mean',  # Mean movementX_prior
        'movementY_prior': 'mean',  # Mean movementY_prior
    })
    .reset_index()  # Reset index for easier handling
)

# Displaying the result
mouse_means


In [None]:
# List of statistics to compare
statistics_to_compare = ['peak', 'mean', 'stderr', 'movementX_prior', 'movementY_prior']

# Generate the grid of boxplots for the time range '0-1s'
compare_statistics_grid_sessions(closedopenMM_GCaMPvars, statistics=statistics_to_compare, time_range='0-1s')


## Plotting all mouse averages togheter

In [None]:
mouse_means.loc[mouse_means.time_range == '0-1s']

In [None]:
fig, ax = plt.subplots(2,3, figsize = (15, 8))
ax = ax.flatten()

i = 0

response_params = ['peak', 'mean']
for response_col in response_params:
        
    sns.boxplot(x="halt", y=response_col, data=mouse_means.loc[mouse_means.time_range == '0-1s'], hue="block_type", ax=ax[i])
    ax[i].set_title(f"{response_col} halt")
    ax[i].get_legend().remove()
    
    i+=1

    sns.boxplot(x="halt", y=response_col, data=mouse_means.loc[mouse_means.time_range == '-1-0s'], hue="block_type", ax=ax[i])
    ax[i].set_title(f"{response_col} halt")
    ax[i].get_legend().remove()

    i+=1

    sns.boxplot(x="halt", y=response_col, data=mouse_means.loc[mouse_means.time_range == '1-2s'], hue="block_type", ax=ax[i])
    ax[i].set_title(f"{response_col} halt")
    ax[i].get_legend().remove()

    i+=1
plt.legend()
fig.suptitle(f'average_of_mouseaverages_params')

fig.tight_layout(pad=1.08)
#fig.savefig(f'Figures/_halt_nohalt_boxplot.png', format = 'png')


In [None]:
## Testing the response difference in the halt and control condition

In [None]:
def testing_diff_control(main_df, control_df):
    #testing normality
    shapiro_main = shapiro(main_df)
    shapiro_control= shapiro(control_df)

    print("Shapiro-Wilk test for halt:", shapiro_main)
    print("Shapiro-Wilk test for no halt:", shapiro_control)

    if (shapiro_main.pvalue < 0.05) or (shapiro_control.pvalue < 0.05):
        print('\n Datasets are normally distributed, will perform t-test \n')
        t_stat, p_val = ttest_ind(main_df, control_df, equal_var=False)
        print(f"T-test: t-statistic={t_stat}, p-value={p_val}")
    else:
        print('\n At least one dataset is not normally distributed, will perform Mann–Whitney U test \n')
        u_stat, p_val = mannwhitneyu(main_df, control_df, alternative='two-sided')
        print(f"Mann-Whitney U test: U-statistic={u_stat}, p-value={p_val}")

    mean_diff = np.mean(main_df) - np.mean(control_df)
    pooled_std = np.sqrt((np.std(main_df, ddof=1)**2 + np.std(control_df, ddof=1)**2) / 2)
    cohens_d = mean_diff / pooled_std
    print(f"Cohen's d: {cohens_d}")

In [None]:
halt_mean_response = mouse_means.loc[mouse_means['halt'] == True, 'mean']
no_halt_mean_response = mouse_means.loc[mouse_means['halt'] == False, 'mean']

testing_diff_control(halt_mean_response, no_halt_mean_response)

In [None]:
halt_peak_response = mouse_means.loc[mouse_means['halt'] == True, 'peak']
no_peak_mean_response = mouse_means.loc[mouse_means['halt'] == False, 'peak']

testing_diff_control(halt_peak_response, no_peak_mean_response)

## Fitting Linear Mixed-Effects Model
Response = β0 +β1 ⋅moveX+β2 ⋅moveY+β3 ⋅event+β4 ⋅block_type+β5 ⋅ moveX:block_type+u mouse_id + ϵ

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "mean ~ movementX_prior + halt + block_type + movementX_prior:block_type",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"]
)
result = model.fit()
print(result.summary())


In [None]:
print(closedopenMM_GCaMPvars["mouse_id"].value_counts())
print(closedopenMM_GCaMPvars.groupby("mouse_id")[["movementX_prior", "movementY_prior"]].std())


In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "peak ~ movementX_prior + halt + block_type + movementX_prior:block_type",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"]
)
result = model.fit()
print(result.summary())

In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "mean ~ halt",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"]
)
result = model.fit()
print(result.summary())

## Look at halt data alone - especially if significantly different from no halt data

In [None]:
halt_data = filter_data(closedopenMM_GCaMPvars, ['was_halt'])

In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "peak ~ movementX_prior + block_type + movementX_prior:block_type",
    data=halt_data,
    groups=halt_data["mouse_id"]
)
result = model.fit()
print(result.summary())

In [None]:
#Fit a variable at a time
response_running = smf.ols('mean ~ movementX_prior + movementX_prior:block_type', data=halt_data).fit()

print(response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

In [None]:
closedloop_data = filter_data(halt_data, ['closedloop', '0-1s'])

cloop_response_running = smf.ols('mean ~ movementX_prior', data=closedloop_data).fit()

print(cloop_response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

openloop_data = filter_data(halt_data, ['openloop', '0-1s'])

oloop_response_running = smf.ols('mean ~ movementX_prior', data=openloop_data).fit()

print(oloop_response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

In [None]:
cloop_response_time = smf.ols('mean ~ event_time', data=closedloop_data).fit()

print(cloop_response_time.summary())


oloop_response_time = smf.ols('mean ~ event_time', data=openloop_data).fit()

print(oloop_response_time.summary())

In [None]:
#plotting closed loop
block_result_dict = {'closedloop': [cloop_response_running, closedloop_data],'openloop': [oloop_response_running, openloop_data]}

for block, resultlist in block_result_dict.items():
    
    fig, ax =  plt.subplots(2,2)
    ax = ax.flatten()
    
    residuals = resultlist[0].resid
    sns.regplot(
        x="movementX_prior",
        y="mean",
        data=resultlist[1],
        line_kws={"color": "red"},
        scatter_kws={"alpha": 0.5}, ax= ax[0])
    ax[0].set_title("Relationship between moveX and mean")
    ax[0].set_xlabel("moveX")
    ax[0].set_ylabel("mean")
    
    sns.histplot(residuals, kde=True, bins=30, ax = ax[1])
    ax[1].set_title("Distribution of Residuals")
    ax[1].set_xlabel("Residuals")
    ax[1].set_ylabel("Frequency")
    ax[1].axvline(0, color='red', linestyle='--')
    
    # Q-Q plot
    
    stats.probplot(residuals, dist="norm", plot= ax[2])
    ax[2].set_title("Q-Q Plot of Residuals")
    
    predicted = resultlist[0].params.Intercept + (resultlist[0].params.movementX_prior * resultlist[1]["movementX_prior"])
    
    # Actual vs. predicted plot
    sns.scatterplot(x=predicted, y=resultlist[1]["mean"], alpha=0.5, ax = ax[3])
    ax[3].plot([min(predicted), max(predicted)], [min(predicted), max(predicted)], color="red", linestyle="--")
    ax[3].set_title("Actual vs. Predicted mean")
    ax[3].set_xlabel("Predicted mean")
    ax[3].set_ylabel("Actual mean")
    
    fig.suptitle(block)
    
    fig.tight_layout()
    


In [None]:
halt_closedloop_data = filter_data(closedopenMM_GCaMPvars, ['was_halt', 'closedloop'])##closedopenMM_GCaMPvars[(closedopenMM_GCaMPvars['halt'] == True) & (closedopenMM_GCaMPvars['block_type'] == 'closedloop')]

plt.scatter(halt_closedloop_data['movementX_prior'], halt_closedloop_data['peak'], label = 'halt')

nohalt_closedloop_data = filter_data(closedopenMM_GCaMPvars, ['no_halt', 'closedloop'])#[(closedopenMM_GCaMPvars['halt'] == False) & (closedopenMM_GCaMPvars['block_type'] == 'closedloop')]

plt.scatter(nohalt_closedloop_data['movementX_prior'], nohalt_closedloop_data['peak'], alpha = 0.5, label = 'no halt')

plt.xlabel('Speed cm/sec')
plt.ylabel('peak response')
plt.legend()

In [None]:
# Fit a mixed-effects model
mixed_model = smf.mixedlm(
    "peak ~ halt + session + block_type",  # Fixed effects
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars['mouse_id'],  # Random intercept for mouse_id
).fit()

print(mixed_model.summary())


In [None]:
# Random slopes for 'event' by 'mouse_id'
random_slopes_model = smf.mixedlm(
    "peak ~ halt +session + block_type",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"],
    re_formula="~halt"  # Random slope for event within mouse_id
).fit()

print(random_slopes_model.summary())

In [None]:
null_model = smf.mixedlm(
    "peak ~ session + block_type",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"]
).fit()
print(null_model.summary())

In [None]:
# Calculate likelihood ratio
ll_null = null_model.llf  # Log-likelihood of the null model
ll_full = mixed_model.llf  # Log-likelihood of the full model
lr_stat = -2 * (ll_null - ll_full)

# Degrees of freedom
df_diff = mixed_model.df_modelwc - null_model.df_modelwc

# p-value
p_value = chi2.sf(lr_stat, df_diff)

print(f"Likelihood Ratio Test Statistic: {lr_stat}")
print(f"Degrees of Freedom: {df_diff}")
print(f"P-Value: {p_value}")

In [None]:
# Extract residuals
residuals = mixed_model.resid

# Histogram of residuals
sns.histplot(residuals, kde=True)
plt.title("Histogram of Residuals")
plt.show()

# Residuals vs. fitted values
fitted_values = mixed_model.fittedvalues
plt.scatter(fitted_values, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title("Residuals vs. Fitted Values")
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.show()

In [None]:
# Function to resample and refit the model
import warnings
#from sklearn.exceptions import ConvergenceWarning

def bootstrap_coefficients(model, data, n_bootstraps=1000):
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    bootstrap_results = []
    for _ in range(n_bootstraps):
        sample = data.sample(frac=1, replace=True)  # Resample with replacement
        bootstrap_model = smf.mixedlm(
            formula=model.model.formula,
            data=sample,
            groups=sample["mouse_id"],
            re_formula="~halt"
        ).fit(method='lbfgs', maxiter=100)
        bootstrap_results.append(bootstrap_model.params)
    return pd.DataFrame(bootstrap_results)

# Apply bootstrapping
#bootstrap_results = bootstrap_coefficients(mixed_model, combined_closedopenMM_GCaMPvars)
#print(bootstrap_results.describe())

In [None]:

#closedopenMM_GCaMPvars.describe()
#bootstrap_results.describe()

In [None]:
interaction_model = smf.mixedlm(
    "peak ~ halt * block_type + session",
    data=closedopenMM_GCaMPvars,
    groups=closedopenMM_GCaMPvars["mouse_id"],
    re_formula="~halt"
).fit()

print(interaction_model.summary())

In [None]:
nohalt_model = smf.mixedlm(
    "mean ~ block_type + session",
    data=Nohalt_during_session1 ,
    groups=Nohalt_during_session1["mouse_id"]
).fit()

print(nohalt_model.summary())

In [None]:
halt_model = smf.mixedlm(
    "mean ~ block_type + session",
    data=halt_data_during_session1,
    groups=halt_data_during_session1["mouse_id"]
).fit()

print(halt_model.summary())