# Social Familiarity and Reinforcement Value
This notebook replicates the analyses for the research on social familiarity and reward value in rats. The original analyses were conducted in R, and this document translates them into a Python workflow.

Two distinct analytical approaches are presented:

1.  A **frequentist, individual-level analysis** using `lmfit` to fit the Zero-Bounded Exponential (ZBEn) demand model for each subject and condition.
2.  A **Bayesian multilevel analysis** using `PyMC` to fit a custom nonlinear model, accounting for the repeated-measures data structure.

In [None]:
import sys
!{sys.executable} -m pip install pandas numpy scipy lmfit PyMC

In [74]:
import pandas as pd
import numpy as np
from lmfit import Model, Parameters
import pymc as pm
import arviz as az
import pytensor.tensor as pt

# Set the display format for floating-point numbers to 3 decimal places
pd.set_option('display.float_format', '{:.3f}'.format)

# --- Helper Functions (translated from R) ---
def lhs(x):
    """Inverse Hyperbolic Sine (log-like) transformation."""
    return np.log10(0.5 * x + np.sqrt(0.25 * (x**2) + 1))

# --- Load and Process Data ---
raw_dat = pd.read_csv("Code/dat.csv")
print("Data loaded successfully.")

dat = raw_dat.copy()
dat['active_ses_time'] = (dat['active_ses_time'] / 60) / 60
dat['interact_rt'] = dat['interact'] / dat['active_ses_time']
dat['lq'] = lhs(dat['interact_rt'])
dat['fmlr'] = np.where(dat['familiarity'] == 1, "Cagemate", "Non-cagemate")
dat['cond'] = dat['cond'].replace({"10sec": "10 Sec", "30sec": "30 Sec", "60sec": "60 Sec"})

dat_agg = dat.groupby(['pair', 'fmlr', 'cond', 'fr'], as_index=False).agg(
    interact_rt=('interact_rt', 'mean'),
    lq=('lq', 'mean')
)

# Create the specific interaction-like dummy variables required by the models
dat_processed = dat_agg.copy()
dat_processed['f1'] = ((dat_processed['fmlr'] == 'Cagemate') & (dat_processed['cond'] == '10 Sec')).astype(int)
dat_processed['f3'] = ((dat_processed['fmlr'] == 'Cagemate') & (dat_processed['cond'] == '30 Sec')).astype(int)
dat_processed['f6'] = ((dat_processed['fmlr'] == 'Cagemate') & (dat_processed['cond'] == '60 Sec')).astype(int)
dat_processed['u1'] = ((dat_processed['fmlr'] == 'Non-cagemate') & (dat_processed['cond'] == '10 Sec')).astype(int)
dat_processed['u3'] = ((dat_processed['fmlr'] == 'Non-cagemate') & (dat_processed['cond'] == '30 Sec')).astype(int)
dat_processed['u6'] = ((dat_processed['fmlr'] == 'Non-cagemate') & (dat_processed['cond'] == '60 Sec')).astype(int)

Data loaded successfully.


## Part 1: Frequentist Individual-Level Analysis

This section fits the ZBEn demand model to each subject's data and performs linear contrasts on the estimated parameters.

In [75]:
# --- Define the complex ZBEn model for lmfit ---
def zbe_model_full(fr, f1, f3, f6, u1, u3, u6, 
                   af1, af3, af6, au1, au3, au6, 
                   qf1, qf3, qf6, qu1, qu3, qu6):
    
    alpha_term = np.exp(af1*f1 + af3*f3 + af6*f6 + au1*u1 + au3*u3 + au6*u6)
    q0_term = qf1*f1 + qf3*f3 + qf6*f6 + qu1*u1 + qu3*u3 + qu6*u6
    lhs_q0 = lhs(q0_term)
    return lhs_q0 * np.exp((-alpha_term / lhs_q0) * q0_term * fr)

zbe_lmfit_model = Model(zbe_model_full, independent_vars=['fr', 'f1', 'f3', 'f6', 'u1', 'u3', 'u6'])

# --- Fit the model for each subject ---
param_list = []
contrast_list = []
for subj_pair in dat_processed['pair'].unique():
    subj_data = dat_processed[dat_processed['pair'] == subj_pair]
    params = zbe_lmfit_model.make_params(af1=-6, af3=-6, af6=-6, au1=-6, au3=-6, au6=-6,
                                         qf1=50, qf3=50, qf6=50, qu1=50, qu3=50, qu6=50)
    for p in params:
        if p.startswith('q'):
            params[p].set(min=0)
            
    independent_vars_dict = {
        'fr': subj_data['fr'], 'f1': subj_data['f1'], 'f3': subj_data['f3'],
        'f6': subj_data['f6'], 'u1': subj_data['u1'], 'u3': subj_data['u3'],
        'u6': subj_data['u6']
    }
    result = zbe_lmfit_model.fit(subj_data['lq'], params, **independent_vars_dict)
    
    fit_params = pd.DataFrame.from_dict(result.params.valuesdict(), orient='index', columns=['estimate'])
    fit_params['pair'] = subj_pair
    param_list.append(fit_params)

    alpha_fam_contrast = result.eval_uncertainty(sigma=1, af1=1, af3=1, af6=1, au1=-1, au3=-1, au6=-1)
    q0_fam_contrast = result.eval_uncertainty(sigma=1, qf1=1, qf3=1, qf6=1, qu1=-1, qu3=-1, qu6=-1)
    
    contrast_list.append({'pair': subj_pair, 'parameter': 'alpha', 'contrast': 'Cagemate vs. Non-cagemate', 'estimate': alpha_fam_contrast})
    contrast_list.append({'pair': subj_pair, 'parameter': 'Q0', 'contrast': 'Cagemate vs. Non-cagemate', 'estimate': q0_fam_contrast})

# --- Reformat and Display Results ---
final_params = pd.concat(param_list).reset_index().rename(columns={'index': 'term'})
final_params['parameter'] = np.where(final_params['term'].str.startswith('a'), 'alpha', 'Q0')
final_params['Familiarity'] = np.where(final_params['term'].str.contains('f'), 'Cagemate', 'Non-cagemate')

# Adding a string default value
final_params['Social Duration'] = np.select(
    [final_params['term'].str.endswith('1'), final_params['term'].str.endswith('3'), final_params['term'].str.endswith('6')],
    ['10 Sec', '30 Sec', '60 Sec'],
    default='N/A' 
)

final_params = final_params[['pair', 'Familiarity', 'Social Duration', 'parameter', 'estimate']]
final_contrasts = pd.DataFrame(contrast_list)
print("--- Estimated ZBEn Parameters (Individual Level) ---")
print(final_params.to_string())
print("\n--- Linear Contrast Results (p-values) ---")
final_contrasts_to_print = final_contrasts.copy()
final_contrasts_to_print['estimate'] = final_contrasts_to_print['estimate'].apply(
    lambda x: f"est={x[0]:.3f}, se={x[1]:.3f}"
)
print(final_contrasts_to_print)

--- Estimated ZBEn Parameters (Individual Level) ---
    pair   Familiarity Social Duration parameter  estimate
0      1      Cagemate          10 Sec     alpha    -4.448
1      1      Cagemate          30 Sec     alpha    -5.357
2      1      Cagemate          60 Sec     alpha    -5.091
3      1  Non-cagemate          10 Sec     alpha    -5.869
4      1  Non-cagemate          30 Sec     alpha    -4.194
5      1  Non-cagemate          60 Sec     alpha    -5.133
6      1      Cagemate          10 Sec        Q0    47.955
7      1      Cagemate          30 Sec        Q0    55.097
8      1      Cagemate          60 Sec        Q0    57.832
9      1  Non-cagemate          10 Sec        Q0    46.337
10     1  Non-cagemate          30 Sec        Q0    53.984
11     1  Non-cagemate          60 Sec        Q0    35.177
12     2      Cagemate          10 Sec     alpha    -6.058
13     2      Cagemate          30 Sec     alpha    -5.722
14     2      Cagemate          60 Sec     alpha    -6.287
15 

## Part 2: Bayesian Nonlinear Multilevel Analysis

This section uses `PyMC` to fit a single nonlinear multilevel model to the entire dataset.

In [76]:
print("--- Building and Fitting Bayesian Model with PyMC (this will take several minutes) ---")

# Create numeric indices for pair, fmlr, and cond
dat_agg['pair_idx'] = pd.Categorical(dat_agg['pair']).codes
dat_agg['fmlr_cond_idx'] = pd.Categorical(dat_agg['fmlr'] + "_" + dat_agg['cond']).codes

# Get dimensions
n_pairs = dat_agg['pair_idx'].nunique()
n_fmlr_conds = dat_agg['fmlr_cond_idx'].nunique()

with pm.Model() as pymc_model:
    sd_a = pm.Cauchy("sd_a", 0, 2.5)
    sd_b = pm.Cauchy("sd_b", 0, 2.5)
    
    a_coeffs = pm.Cauchy("a_coeffs", -6, 2.5, shape=n_fmlr_conds)
    b_coeffs = pm.Cauchy("b_coeffs", 4, 2.5, shape=n_fmlr_conds)
    
    a_pair_offset = pm.Normal("a_pair_offset", 0, 1, shape=n_pairs)
    a_pair = pm.Deterministic("a_pair", a_pair_offset * sd_a)
    
    b_pair_offset = pm.Normal("b_pair_offset", 0, 1, shape=n_pairs)
    b_pair = pm.Deterministic("b_pair", b_pair_offset * sd_b)

    a_est = a_coeffs[dat_agg['fmlr_cond_idx'].values] + a_pair[dat_agg['pair_idx'].values]
    b_est = b_coeffs[dat_agg['fmlr_cond_idx'].values] + b_pair[dat_agg['pair_idx'].values]

    mu = pt.log10(0.5*pt.exp(b_est) + pt.sqrt(0.25*(pt.exp(b_est)**2) + 1)) * \
         pt.exp((-pt.exp(a_est) / pt.log10(0.5*pt.exp(b_est) + pt.sqrt(0.25*(pt.exp(b_est)**2) + 1))) * pt.exp(b_est) * dat_agg['fr'].values)

    sigma = pm.Cauchy("sigma", 0, 2.5)
    lq_obs = pm.Normal("lq_obs", mu=mu, sigma=sigma, observed=dat_agg['lq'])
    
    idata = pm.sample(draws=2000, tune=2000, chains=4, cores=4, target_accept=0.95, progressbar=False)

# --- Analyze Posterior ---
# Create a mapping from fmlr_cond_idx back to descriptive names
idx_to_name = dat_agg[['fmlr_cond_idx', 'fmlr', 'cond']].drop_duplicates().sort_values('fmlr_cond_idx')
fmlr_cond_names = (idx_to_name['fmlr'] + "_" + idx_to_name['cond']).str.replace(" ", "_").tolist()

# Extract the posterior samples into a DataFrame 
a_samples = idata.posterior['a_coeffs'].stack(sample=("chain", "draw")).values.T
b_samples = idata.posterior['b_coeffs'].stack(sample=("chain", "draw")).values.T

a_draws = pd.DataFrame(a_samples, columns=fmlr_cond_names)
b_draws = pd.DataFrame(b_samples, columns=fmlr_cond_names)

# Programmatically find the column names to make the code robust
cagemate_cols_a = [col for col in a_draws.columns if 'Cagemate' in col]
non_cagemate_cols_a = [col for col in a_draws.columns if 'Non-cagemate' in col]
cagemate_10_a = [col for col in cagemate_cols_a if '10_Sec' in col][0]
cagemate_60_a = [col for col in cagemate_cols_a if '60_Sec' in col][0]
non_cagemate_10_a = [col for col in non_cagemate_cols_a if '10_Sec' in col][0]
non_cagemate_60_a = [col for col in non_cagemate_cols_a if '60_Sec' in col][0]

cagemate_cols_b = [col for col in b_draws.columns if 'Cagemate' in col]
non_cagemate_cols_b = [col for col in b_draws.columns if 'Non-cagemate' in col]
cagemate_10_b = [col for col in cagemate_cols_b if '10_Sec' in col][0]
cagemate_60_b = [col for col in cagemate_cols_b if '60_Sec' in col][0]
non_cagemate_10_b = [col for col in non_cagemate_cols_b if '10_Sec' in col][0]
non_cagemate_60_b = [col for col in non_cagemate_cols_b if '60_Sec' in col][0]

# Perform calculations using the retrieved column names
alpha_summary = pd.DataFrame({
    'familiarity': (a_draws[cagemate_cols_a].sum(axis=1) - a_draws[non_cagemate_cols_a].sum(axis=1)),
    'duration_linear': ((-1)*a_draws[cagemate_10_a] + 1*a_draws[cagemate_60_a]) + \
                       ((-1)*a_draws[non_cagemate_10_a] + 1*a_draws[non_cagemate_60_a])
})

q0_summary = pd.DataFrame({
    'familiarity': (b_draws[cagemate_cols_b].sum(axis=1) - b_draws[non_cagemate_cols_b].sum(axis=1)),
    'duration_linear': ((-1)*b_draws[cagemate_10_b] + 1*b_draws[cagemate_60_b]) + \
                       ((-1)*b_draws[non_cagemate_10_b] + 1*b_draws[non_cagemate_60_b])
})

print("\n--- Posterior Summary for Contrasts on Alpha ---")
print(alpha_summary.quantile([0.025, 0.5, 0.975]).T.rename(columns={0.5: 'median', 0.025: 'lower', 0.975: 'upper'}))

print("\n--- Posterior Summary for Contrasts on Q0 ---")
print(q0_summary.quantile([0.025, 0.5, 0.975]).T.rename(columns={0.5: 'median', 0.025: 'lower', 0.975: 'upper'}))

--- Building and Fitting Bayesian Model with PyMC (this will take several minutes) ---


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sd_a, sd_b, a_coeffs, b_coeffs, a_pair_offset, b_pair_offset, sigma]
Sampling 4 chains for 2_000 tune and 2_000 draw iterations (8_000 + 8_000 draws total) took 22 seconds.



--- Posterior Summary for Contrasts on Alpha ---
                 lower  median  upper
familiarity      0.745   1.441  2.134
duration_linear -0.132   0.427  0.995

--- Posterior Summary for Contrasts on Q0 ---
                 lower  median  upper
familiarity     -2.553  -1.376 -0.209
duration_linear -1.256  -0.354  0.560
