In [1]:
import pandas as pd
from itertools import product

#Sample Index	Study Bird #	Legband #	Sex
#Time point (hr)	Meloxicam Dose (mg/kg)
#[DBS] (ug/mL)


df = pd.read_csv('finch_dbs_df.txt', sep = '\t', dtype=str)

# Container for final result
all_melted = []

# Group by both dose and sex
for (dose, sex), group_df in df.groupby(['Meloxicam Dose (mg/kg)', 'Sex']):
    # Group animals by time_point
    grouped = group_df.groupby('Time point (hr)')['Sample Index'].apply(list)
    
    # Skip if any time point is missing animals
    if grouped.apply(len).min() == 0:
        continue

    # Cartesian product of animals across time points
    perms = list(product(*grouped))

    # Convert to wide DataFrame
    perms_df = pd.DataFrame(perms, columns=grouped.index)
    perms_df['permutation_id'] = [f"{dose}_{sex}_{i}" for i in range(len(perms))]
    perms_df['sex'] = sex
    perms_df['dose'] = dose

    # Melt to long format
    melted = perms_df.melt(
        id_vars=['permutation_id', 'sex', 'dose'],
        var_name='time_point',
        value_name='Sample Index'
    )

    # Collect
    all_melted.append(melted)

# Concatenate all groups
result = pd.concat(all_melted, ignore_index=True)
result = result.sort_values(['dose', 'sex', 'permutation_id', 'time_point']).reset_index(drop=True)

result = pd.merge(result, df, on ='Sample Index', how ='left')

# Show result
print(result)

result.to_csv('perm_finch_dbs_df.txt', sep='\t', index=False)

       permutation_id sex dose time_point Sample Index Study Bird # Legband #  \
0              13_F_0   F   13        0.5           85           29    Gray79   
1              13_F_0   F   13          1           51           37    Gray54   
2              13_F_0   F   13          2          112           45    Pink73   
3              13_F_0   F   13         24          123           45    Pink73   
4              13_F_0   F   13          4           90           29    Gray79   
...               ...  ..  ...        ...          ...          ...       ...   
374995      15_M_9999   M   15          1           31            9   Green88   
374996      15_M_9999   M   15          2           15          20A  Orange78   
374997      15_M_9999   M   15         24           25          20A  Orange78   
374998      15_M_9999   M   15          4           84           4A   Green84   
374999      15_M_9999   M   15          8           40          12A     Tan29   

       Sex Date sample coll

In [2]:
import pandas as pd

df = pd.read_csv('perm_finch_dbs_df.txt', sep = '\t', dtype=str)

# Step 2: Select 1000 unique permutation_ids per sex and dose group
unique_ids = (
    df[['permutation_id', 'sex', 'dose']]
    .drop_duplicates()
    .groupby(['sex', 'dose'], group_keys=False)
    .apply(lambda x: x.sample(n=min(1000, len(x)), random_state=1))
)['permutation_id']

# Step 3: Filter the original dataframe for all rows with those permutation_ids
filtered_df = df[df['permutation_id'].isin(unique_ids)]

filtered_df.to_csv("perm_finch_dbs_df_subset.txt", sep = '\t', index=False)

  .apply(lambda x: x.sample(n=min(1000, len(x)), random_state=1))


In [3]:
import pandas as pd
from scipy.stats import ttest_ind

# Load your data
df = pd.read_csv('finch_pk_df.txt', sep = '\t')  # example loading step

# === Step 1a: Summary stats by Dose × Sex × Parameter ===
summary = (
    df.groupby(['Dose', 'Sex', 'Parameter'])['Value']
    .agg(['mean', 'std', 'count'])
    .reset_index()
)
summary['mean_std'] = summary.apply(
    lambda row: f"{row['mean']:.2f} ± {row['std']:.2f}", axis=1
)

# === Step 1b: Summary stats for combined M+F per Dose × Parameter ===
combined_summary = (
    df.groupby(['Dose', 'Parameter'])['Value']
    .agg(['mean', 'std', 'count'])
    .reset_index()
)
combined_summary['Sex'] = 'All'
combined_summary['mean_std'] = combined_summary.apply(
    lambda row: f"{row['mean']:.2f} ± {row['std']:.2f}", axis=1
)

# Reorder columns to match
combined_summary = combined_summary[['Dose', 'Sex', 'Parameter', 'mean', 'std', 'count', 'mean_std']]

# Combine both summaries
summary_full = pd.concat([summary, combined_summary], ignore_index=True)
summary_full = summary_full.sort_values(by=['Parameter', 'Dose', 'Sex'])

# export summary stats df
summary_full.to_csv("summary_full_results.txt", sep='\t', index=False)

# === Step 2a: T-test M vs F within Dose × Parameter ===
sex_results = []
for (dose, param), group in df.groupby(['Dose', 'Parameter']):
    m = group[group['Sex'] == 'M']['Value'].dropna()
    f = group[group['Sex'] == 'F']['Value'].dropna()
    t_stat, p_val = (ttest_ind(m, f, equal_var=False) if len(m) > 1 and len(f) > 1 else (None, None))
    sex_results.append({
        'Parameter': param,
        'Group': f'Dose={dose}',
        'Comparison': 'M vs F',
        't-stat': t_stat,
        'p-value': p_val
    })

# === Step 2b: T-test Low vs High within each Sex × Parameter ===
dose_by_sex = []
for (sex, param), group in df.groupby(['Sex', 'Parameter']):
    doses = group['Dose'].unique()
    if len(doses) == 2:
        d1, d2 = doses
        v1 = group[group['Dose'] == d1]['Value'].dropna()
        v2 = group[group['Dose'] == d2]['Value'].dropna()
        t_stat, p_val = (ttest_ind(v1, v2, equal_var=False) if len(v1) > 1 and len(v2) > 1 else (None, None))
        dose_by_sex.append({
            'Parameter': param,
            'Group': f'Sex={sex}',
            'Comparison': f'{d1} vs {d2}',
            't-stat': t_stat,
            'p-value': p_val
        })

# === Step 2c: T-test Low vs High combining sexes (All) per Parameter ===
dose_combined = []
for param, group in df.groupby('Parameter'):
    doses = group['Dose'].unique()
    if len(doses) == 2:
        d1, d2 = doses
        v1 = group[group['Dose'] == d1]['Value'].dropna()
        v2 = group[group['Dose'] == d2]['Value'].dropna()
        t_stat, p_val = (ttest_ind(v1, v2, equal_var=False) if len(v1) > 1 and len(v2) > 1 else (None, None))
        dose_combined.append({
            'Parameter': param,
            'Group': 'All',
            'Comparison': f'{d1} vs {d2}',
            't-stat': t_stat,
            'p-value': p_val
        })

# === Combine all t-test results ===
ttest_df = pd.DataFrame(sex_results + dose_by_sex + dose_combined)
ttest_df = ttest_df.sort_values(by=['Parameter', 'Group'])

# === Display results and export df ===
# print("Summary statistics:")
# print(summary_full)

# print("\nT-test results:")
# print(ttest_df)

ttest_df.to_csv("t_test_results.txt", sep='\t', index=False)



  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
