In [12]:
#!/usr/bin/python
# updated DBR 02/2025 #

%matplotlib inline  

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st


In [17]:
#read in ecology from past work for comparisons
outdf=pd.read_csv('data/clean/ecometrics_wprovirusrecalc.csv') #for comparing TCRs among different cohorts

#extra data for fitting metrics
TCRrho=pd.read_csv('data/clean/init_foldchg_rho.csv')
IPDAdf=pd.read_csv('data/clean/IPDArates.csv')
lamdf=pd.read_csv('data/clean/expcontr.csv')

outdf.columns

Index(['Unnamed: 0', 'pid', 'Age', 'Male', 'CMVstatus', 'mo_post_ART',
       'study_months', 'CD4', 'Nadir CD4', 'mo_pre_ART', 'race', 'cohort',
       'cohort_num', 'HIVstatus', 'initial_N', 'resample_size_TCR',
       'resample_size_HIV', 'color', 'd0i', 'd1i', 'd2i', 'ali', 'd0_nH',
       'd1_nH', 'd2_nH', 'd0pst', 'd1pst', 'd2pst', 'alpst', 'd0pst_nH',
       'd1pst_nH', 'd2pst_nH', 'Npro', 'd0pro', 'd1pro', 'd2pro', 'alpro'],
      dtype='object')

In [34]:
#full data frame with fitting metrics

#CD4 levels
mCD4 = np.mean(outdf['CD4'].dropna())
sCD4 = np.std(outdf['CD4'].dropna())

#TCR rank abundance power law (over time)
tdf0 = outdf[outdf['mo_post_ART']<12]
tdf1 = outdf[(outdf['mo_post_ART']>12) & (outdf['mo_post_ART']<100)]
tdf2 = outdf[outdf['mo_post_ART']>100]

mTCRal0 = np.mean(tdf0['ali'].dropna().values)
sTCRal0 = np.std(tdf0['ali'].dropna().values)
mTCRal1 = np.mean(tdf1['ali'].dropna().values)
sTCRal1 = np.std(tdf1['ali'].dropna().values)
mTCRal2 = np.mean(tdf2['ali'].dropna().values)
sTCRal2 = np.std(tdf2['ali'].dropna().values)

#TCR ecology over time??
mdl=[]; sdl=[];
for idi,di in enumerate(['d0i','d1i','d2i']):
    mTCRd_0 = np.mean(tdf0[di].dropna().values)
    sTCRd_0 = np.std(tdf0[di].dropna().values)

    mTCRd_1 = np.mean(tdf1[di].dropna().values)
    sTCRd_1 = np.std(tdf1[di].dropna().values)

    mTCRd_2 = np.mean(tdf2[di].dropna().values)
    sTCRd_2 = np.std(tdf2[di].dropna().values)

    mdl+=[mTCRd_0,mTCRd_1,mTCRd_2]
    sdl+=[sTCRd_0,sTCRd_1,sTCRd_2]
    
#HIV power law over time
mHIVal1 = np.mean(outdf[outdf['mo_post_ART']<30]['alpro'].dropna().values)
sHIVal1 = np.std(outdf[outdf['mo_post_ART']<30]['alpro'].dropna().values)
mHIVal2 = np.mean(outdf[outdf['mo_post_ART']>30]['alpro'].dropna().values)
sHIVal2 = np.std(outdf[outdf['mo_post_ART']>30]['alpro'].dropna().values)

#HIV ecology over time?? just t1 and t2
mdlH=[]; sdlH=[];
for idi,di in enumerate(['d0pro','d1pro','d2pro']):

    m1 = np.mean(tdf1[di].dropna().values)
    s1 = np.std(tdf1[di].dropna().values)

    m2 = np.mean(tdf2[di].dropna().values)
    s2 = np.std(tdf2[di].dropna().values)

    mdlH+=[m1,m2]
    sdlH+=[s1,s2]

enamez=[]
for ti in range(3):
    for di in range(3):
        enamez+=['TCR d'+str(di)+' t'+str(ti)]
for ti in [1,2]:
    for di in range(3):
        enamez+=['HIV d'+str(di)+' t'+str(ti)]

#expansion/contraction distributions
mlame=np.mean(lamdf['lam_expand'])
slame=np.std(lamdf['lam_expand'])
mlamc=np.mean(lamdf['lam_contract'])
slamc=np.std(lamdf['lam_contract'])

#read in IPDA rates
mInt = IPDAdf['mean'].iloc[0]
sInt = IPDAdf['se'].iloc[0]
mDef = IPDAdf['mean'].iloc[1]
sDef = IPDAdf['se'].iloc[1]

#TCR fold changes
mrho=np.mean(TCRrho['init_foldchg_rho'])
srho=np.std(TCRrho['init_foldchg_rho'])

metric_list=['CD4 count',
             'TCR alpha t0','TCR alpha t1','TCR alpha t2',
             'HIV alpha t1','HIV alpha t2',
             'TCR lam expand','TCR lam contract',
             'Intact decay','Defective decay',
             'TCR correlation']+enamez

mean_list = [mCD4,mTCRal0,mTCRal1,mTCRal2,mHIVal1,mHIVal2,mlame,mlamc,mInt,mDef,mrho]+mdl+mdlH
std_list = [sCD4,sTCRal0,sTCRal1,sTCRal2,sHIVal1,sHIVal2,slame,slamc,sInt,sDef,srho]+sdl+sdlH

dfM_data = pd.DataFrame({'name':metric_list,'data_mean':mean_list,'data_std':std_list})

dfM_data.to_csv('data_metrics.csv')

In [36]:
dfM_data

Unnamed: 0,name,data_mean,data_std
0,CD4 count,590.954545,199.302985
1,TCR alpha t0,0.473809,0.126596
2,TCR alpha t1,0.433132,0.098152
3,TCR alpha t2,0.506257,0.171211
4,HIV alpha t1,0.737379,0.38963
5,HIV alpha t2,1.273764,0.284946
6,TCR lam expand,-0.47775,0.115612
7,TCR lam contract,-0.451298,0.141255
8,Intact decay,-0.017419,0.00554
9,Defective decay,-0.007199,0.005339


In [32]:
len(mean_list)

24