In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from analysis_functions import *
from model_functions import *
import matplotlib.patches as patches
import seaborn as sns
import scipy.stats as stats
from scipy.stats import shapiro, chi2, mannwhitneyu, ttest_ind

# Read extraced variables into csv

In [None]:
Vars = pd.read_csv('GCaAMP_MM_extracted_vars.csv')
sensor = 'GCaMP'
#Vars = pd.read_csv('GRAB_MM_extracted_vars.csv')
#sensor = 'GRAB'
Vars

# Make boxplots to view difference in params for halts and no-halts 

In [None]:
halt_data = Vars.loc[Vars.event == 'halt']
halt_data.describe() 

In [None]:
Nohalt_data = Vars.loc[Vars.event == 'no halt']
Nohalt_data.describe() 

## Plotting params for individual mice

In [None]:
mouse = 'B3M2'
mousedata = Vars.loc[Vars.MouseID == mouse]
Vars.MouseID.unique()

In [None]:
fig, ax = plt.subplots(2,3, figsize = (15, 8))
ax = ax.flatten()
sns.boxplot(x="event", y="PeakResponse", data=mousedata, hue="SessionBlock", ax=ax[0])
ax[0].set_title("Peak Response during halt")
ax[0].get_legend().remove()

sns.boxplot(x="event", y="MeanResponse", data=mousedata, hue="SessionBlock", ax=ax[1])
ax[1].set_title("Mean Response during halt")
ax[1].get_legend().remove()

sns.boxplot(x="event", y="MeanResponse_after", data=mousedata, hue="SessionBlock", ax=ax[2])
ax[2].set_title("Mean Response post halt")


sns.boxplot(x="event", y="MinResponse", data=mousedata, hue="SessionBlock", ax=ax[3])
ax[3].set_title("Minimum Response during halt")
ax[3].get_legend().remove()

sns.boxplot(x="event", y="MeanResponse_after", data=mousedata, hue="SessionBlock", ax=ax[4])
ax[4].set_title("Mean Response after halt")
ax[4].get_legend().remove()

sns.boxplot(x="event", y="MinResponse_after", data=mousedata, hue="SessionBlock", ax=ax[5])
ax[5].set_title("Minimim Response post halt")
fig.suptitle(f'{mouse} param distributions')

fig.tight_layout(pad=1.08)
fig.savefig(f'Figures/{sensor}_{mouse}_halt_nohalt_boxplot.png', format = 'png')

In [None]:
# Grouping by the condition variables and calculating the mean for each group
mouse_means = (
    Vars.groupby(['MouseID', 'event', 'SessionBlock', 'SessionNumber'])
        .agg({
            'PeakResponse': 'mean',
            'PeakResponse_after': 'mean',
            'MeanResponse': 'mean',
            'MeanResponse_after': 'mean',
            'MinResponse_after': 'mean',
            'MinResponse': 'mean'
        })
        .reset_index())


mouse_means.head()

## Plotting all mouse averages togheter

In [None]:
fig, ax = plt.subplots(2,3, figsize = (15, 8))
ax = ax.flatten()

i = 0
for response_col in mouse_means:
    if 'Response' in response_col:
        
        sns.boxplot(x="event", y=response_col, data=mouse_means, hue="SessionBlock", ax=ax[i])
        ax[i].set_title(f"{response_col} halt")
        ax[i].get_legend().remove()
    
        i+=1
plt.legend()
fig.suptitle(f'{sensor}_average_of_mouseaverages_params')

fig.tight_layout(pad=1.08)
fig.savefig(f'Figures/{sensor}_halt_nohalt_boxplot.png', format = 'png')


In [None]:
## Testing the response difference in the halt and control condition

In [None]:
def testing_diff_control(main_df, control_df):
    #testing normality
    shapiro_main = shapiro(main_df)
    shapiro_control= shapiro(control_df)

    print("Shapiro-Wilk test for halt:", shapiro_main)
    print("Shapiro-Wilk test for no halt:", shapiro_control)

    if (shapiro_main.pvalue < 0.05) or (shapiro_control.pvalue < 0.05):
        print('\n Datasets are normally distributed, will perform t-test \n')
        t_stat, p_val = ttest_ind(main_df, control_df, equal_var=False)
        print(f"T-test: t-statistic={t_stat}, p-value={p_val}")
    else:
        print('\n At least one dataset is not normally distributed, will perform Mann–Whitney U test \n')
        u_stat, p_val = mannwhitneyu(main_df, control_df, alternative='two-sided')
        print(f"Mann-Whitney U test: U-statistic={u_stat}, p-value={p_val}")

    mean_diff = np.mean(main_df) - np.mean(control_df)
    pooled_std = np.sqrt((np.std(main_df, ddof=1)**2 + np.std(control_df, ddof=1)**2) / 2)
    cohens_d = mean_diff / pooled_std
    print(f"Cohen's d: {cohens_d}")

In [None]:
halt_mean_response = mouse_means.loc[mouse_means['event'] == 'halt', 'MeanResponse']
no_halt_mean_response = mouse_means.loc[mouse_means['event'] == 'no halt', 'MeanResponse']

testing_diff_control(halt_mean_response, no_halt_mean_response)

In [None]:
halt_peak_response = mouse_means.loc[mouse_means['event'] == 'halt', 'PeakResponse']
no_peak_mean_response = mouse_means.loc[mouse_means['event'] == 'no halt', 'PeakResponse']

testing_diff_control(halt_peak_response, no_peak_mean_response)

## Fitting Linear Mixed-Effects Model
Response = β0 +β1 ⋅moveX+β2 ⋅moveY+β3 ⋅event+β4 ⋅SessionBlock+β5 ⋅ moveX:SessionBlock+u MouseID + ϵ

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "MeanResponse ~ moveX + event + SessionBlock + moveX:SessionBlock",
    data=Vars,
    groups=Vars["MouseID"]
)
result = model.fit()
print(result.summary())


In [None]:
print(Vars["MouseID"].value_counts())
print(Vars.groupby("MouseID")[["moveX", "moveY"]].std())


In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "PeakResponse ~ moveX + event + SessionBlock + moveX:SessionBlock",
    data=Vars,
    groups=Vars["MouseID"]
)
result = model.fit()
print(result.summary())

In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "MeanResponse ~ event",
    data=Vars,
    groups=Vars["MouseID"]
)
result = model.fit()
print(result.summary())

## Look at halt data alone - especially if significantly different from no halt data

In [None]:
halt_data = Vars.loc[Vars['event'] == 'halt']

In [None]:
# Fit Linear Mixed-Effects Model
model = smf.mixedlm(
    "PeakResponse ~ moveX + SessionBlock + moveX:SessionBlock",
    data=halt_data,
    groups=halt_data["MouseID"]
)
result = model.fit()
print(result.summary())

In [None]:
#Fit a variable at a time
response_running = smf.ols('MeanResponse ~ moveX + moveX:SessionBlock', data=halt_data).fit()

print(response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

In [None]:
closedloop_data = halt_data.loc[halt_data.SessionBlock == 'closedloop']

cloop_response_running = smf.ols('MeanResponse ~ moveX', data=closedloop_data).fit()

print(cloop_response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

openloop_data = halt_data.loc[halt_data.SessionBlock == 'openloop']

oloop_response_running = smf.ols('MeanResponse ~ moveX', data=openloop_data).fit()

print(oloop_response_running.summary())
#print(response_running.t_test('PeakResponse'))
print("MSE=%.3f" % response_running.mse_resid)

In [None]:
for key, item in {'closedloop': [cloop_response_running, closedloop_data],'openloop': [oloop_response_running, openloop_data] }.items():
    print(item)

In [None]:
#plotting closed loop
block_result_dict = {'closedloop': [cloop_response_running, closedloop_data],'openloop': [oloop_response_running, openloop_data]}

for block, resultlist in block_result_dict.items():
    
    fig, ax =  plt.subplots(2,2)
    ax = ax.flatten()
    
    residuals = resultlist[0].resid
    sns.regplot(
        x="moveX",
        y="MeanResponse",
        data=resultlist[1],
        line_kws={"color": "red"},
        scatter_kws={"alpha": 0.5}, ax= ax[0])
    ax[0].set_title("Relationship between moveX and MeanResponse")
    ax[0].set_xlabel("moveX")
    ax[0].set_ylabel("MeanResponse")
    
    sns.histplot(residuals, kde=True, bins=30, ax = ax[1])
    ax[1].set_title("Distribution of Residuals")
    ax[1].set_xlabel("Residuals")
    ax[1].set_ylabel("Frequency")
    ax[1].axvline(0, color='red', linestyle='--')
    
    # Q-Q plot
    
    stats.probplot(residuals, dist="norm", plot= ax[2])
    ax[2].set_title("Q-Q Plot of Residuals")
    
    predicted = resultlist[0].params.Intercept + (resultlist[0].params.moveX * resultlist[1]["moveX"])
    
    # Actual vs. predicted plot
    sns.scatterplot(x=predicted, y=resultlist[1]["MeanResponse"], alpha=0.5, ax = ax[3])
    ax[3].plot([min(predicted), max(predicted)], [min(predicted), max(predicted)], color="red", linestyle="--")
    ax[3].set_title("Actual vs. Predicted MeanResponse")
    ax[3].set_xlabel("Predicted MeanResponse")
    ax[3].set_ylabel("Actual MeanResponse")
    
    fig.suptitle(block)
    
    fig.tight_layout()
    


In [None]:
halt_closedloop_data = Vars[(Vars['event'] == 'halt') & (Vars['SessionBlock'] == 'closedloop')]

plt.scatter(halt_closedloop_data['moveX'], halt_closedloop_data['PeakResponse'])

nohalt_closedloop_data = Vars[(Vars['event'] == 'no halt') & (Vars['SessionBlock'] == 'closedloop')]

plt.scatter(nohalt_closedloop_data['moveX'], nohalt_closedloop_data['PeakResponse'], alpha = 0.5)

plt.xlabel('Speed cm/sec')
plt.ylabel('peak response')

In [None]:
# Fit a mixed-effects model
mixed_model = smf.mixedlm(
    "PeakResponse ~ event + SessionNumber + SessionBlock",  # Fixed effects
    data=Vars,
    groups=Vars["MouseID"],  # Random intercept for MouseID
).fit()

print(mixed_model.summary())


In [None]:
# Random slopes for 'event' by 'MouseID'
random_slopes_model = smf.mixedlm(
    "PeakResponse ~ event + SessionNumber + SessionBlock",
    data=Vars,
    groups=Vars["MouseID"],
    re_formula="~event"  # Random slope for event within MouseID
).fit()

print(random_slopes_model.summary())

In [None]:
null_model = smf.mixedlm(
    "PeakResponse ~ SessionNumber + SessionBlock",
    data=Vars,
    groups=Vars["MouseID"]
).fit()
print(null_model.summary())

In [None]:


# Calculate likelihood ratio
ll_null = null_model.llf  # Log-likelihood of the null model
ll_full = mixed_model.llf  # Log-likelihood of the full model
lr_stat = -2 * (ll_null - ll_full)

# Degrees of freedom
df_diff = mixed_model.df_modelwc - null_model.df_modelwc

# p-value
p_value = chi2.sf(lr_stat, df_diff)

print(f"Likelihood Ratio Test Statistic: {lr_stat}")
print(f"Degrees of Freedom: {df_diff}")
print(f"P-Value: {p_value}")

In [None]:
# Extract residuals
residuals = mixed_model.resid

# Histogram of residuals
sns.histplot(residuals, kde=True)
plt.title("Histogram of Residuals")
plt.show()

# Residuals vs. fitted values
fitted_values = mixed_model.fittedvalues
plt.scatter(fitted_values, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title("Residuals vs. Fitted Values")
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.show()

In [None]:
# Function to resample and refit the model
import warnings
from sklearn.exceptions import ConvergenceWarning

def bootstrap_coefficients(model, data, n_bootstraps=1000):
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    bootstrap_results = []
    for _ in range(n_bootstraps):
        sample = data.sample(frac=1, replace=True)  # Resample with replacement
        bootstrap_model = smf.mixedlm(
            formula=model.model.formula,
            data=sample,
            groups=sample["MouseID"],
            re_formula="~event"
        ).fit(method='lbfgs', maxiter=100)
        bootstrap_results.append(bootstrap_model.params)
    return pd.DataFrame(bootstrap_results)

# Apply bootstrapping
#bootstrap_results = bootstrap_coefficients(mixed_model, combined_vars)
#print(bootstrap_results.describe())

In [None]:
#Vars.describe()
#bootstrap_results.describe()

In [None]:
interaction_model = smf.mixedlm(
    "PeakResponse ~ event * SessionBlock + SessionNumber",
    data=Vars,
    groups=Vars["MouseID"],
    re_formula="~event"
).fit()

print(interaction_model.summary())

In [None]:
nohalt_model = smf.mixedlm(
    "MeanResponse ~ SessionBlock + SessionNumber",
    data=Nohalt_data,
    groups=Nohalt_data["MouseID"]
).fit()

print(nohalt_model.summary())

In [None]:
halt_model = smf.mixedlm(
    "MeanResponse ~ SessionBlock + SessionNumber",
    data=halt_data,
    groups=halt_data["MouseID"]
).fit()

print(halt_model.summary())