In [1]:
import pandas as pd
import datetime as dt
from dateutil.relativedelta import relativedelta
from datetime import date

import warnings 
warnings.filterwarnings("ignore")

In [2]:
data_source = "Active on ART Patients Linelist_Jun-2024.csv"

df = pd.read_csv(data_source, 
                 usecols=['CCC No', 'Sex', 'Age at reporting','Art Start Date','Last VL Result','Last VL Date', 'Active in PMTCT', 'Self Visit Date','Next Appointment Date'], 
                 parse_dates=['Art Start Date','Last VL Date','Self Visit Date','Next Appointment Date'])  

df.columns = (df.columns.str.replace(" ","_").str.lower())
df.rename(columns = {"age_at_reporting":"age","self_visit_date":"last_self_visit"}, inplace=True)

df.art_start_date = pd.to_datetime(df.art_start_date)
df.last_vl_date = pd.to_datetime(df.last_vl_date)
df.last_self_visit = pd.to_datetime(df.last_self_visit)
df.next_appointment_date = pd.to_datetime(df.next_appointment_date)

In [3]:
#elligible for VL - on ART for more than 3 months
elligible_df = df[~(df.art_start_date > pd.to_datetime(date.today() + relativedelta(months=-3)))]
elligible_df

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
0,1403300035,F,39,2014-02-28,LDL,2023-10-26,No,2024-04-26,2024-10-25
1,1403300018,F,36,2014-02-27,LDL,2024-01-26,No,2024-04-23,2024-10-22
2,1403300031,M,41,2014-02-20,LDL,2024-04-17,No,2024-04-17,2024-10-16
3,1403300032,M,91,2016-09-14,LDL,2023-10-30,No,2024-11-04,2024-09-10
4,1403300030,F,43,2014-02-20,LDL,2023-10-23,No,2024-01-23,2024-07-22
...,...,...,...,...,...,...,...,...,...
1080,1347605910,F,26,2016-04-19,LDL,2024-05-22,No,2024-06-14,2024-10-12
1083,1287604611,F,32,2019-11-09,LDL,2024-05-21,No,2024-05-21,2024-08-19
1084,1412005513,F,55,2011-08-22,LDL,2024-05-17,No,2024-06-17,2024-09-16
1085,1336900114,M,50,2020-04-24,432,2024-05-23,No,2024-06-06,2024-05-07


In [4]:
#vl not in the last year
pmtct_art = 'Yes'
elligible_df[elligible_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-12))].query('age > 24').query('active_in_pmtct != @pmtct_art').to_csv('valid1.csv')
#exclude pmctc yes

In [5]:
#vl not in the last 6 months age below 25 not on pmtct
elligible_df[elligible_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-6))].query('age<25').query('active_in_pmtct != @pmtct_art').to_csv('valid.csv')

In [6]:
#vl not in the last 6 months pmtct
elligible_df[elligible_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-6))].query('active_in_pmtct == @pmtct_art').to_csv('validpmtct.csv')

In [7]:
#elligible but with no vl history
elligible_df[elligible_df.last_vl_result.isnull()]

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
1061,1403301340,F,35,2024-02-13,,NaT,No,2024-02-22,2024-05-07
1066,1403301343,F,42,2024-02-22,,NaT,No,2024-05-20,2024-06-20
1073,1403301345,F,30,2024-03-15,,NaT,No,2024-04-16,2024-09-07


In [8]:
df.query('age <19 and age >0').shape[0]

84

In [9]:
elligible_df[elligible_df.active_in_pmtct.str.contains('Yes')]

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
55,1403300345,F,37,2014-03-27,LDL,2024-04-29,Yes,2024-06-28,2024-07-26
108,1403300284,F,40,2016-07-29,LDL,2024-04-23,Yes,2024-04-23,2024-07-22
115,1403300069,F,30,2014-04-29,62,2024-05-13,Yes,2024-05-13,2024-07-15
125,1403300399,F,41,2014-09-04,LDL,2024-01-30,Yes,2024-10-06,2024-09-12
132,1403300404,F,32,2014-08-28,LDL,2024-05-17,Yes,2024-06-21,2024-07-26
...,...,...,...,...,...,...,...,...,...
1049,1403301332,F,30,2023-11-16,LDL,2024-02-23,Yes,2024-06-21,2024-07-26
1055,1409100039,F,19,2013-04-15,LDL,2024-04-01,Yes,2024-07-06,2024-08-07
1056,1403301337,F,31,2024-01-22,LDL,2024-08-04,Yes,2024-06-06,2024-07-22
1062,1583408256,F,44,2009-06-07,LDL,2024-02-13,Yes,2024-06-14,2024-07-29


In [10]:
#assigning dc status
def dc(val):
    if val in {'Standard Care'}:
        return 'Not On DC'
    return 'On DC'

#(active_art_df.assign(dc_status=df.differentiated_care_model.apply(dc)))

In [11]:
#assigning age groups
def age_group(val):
    if val >=0 and val <10:
        return 'paeds'
    if val >=10 and val <=19:
        return 'adolescents'
    return 'adult'

(active_art_df.assign(age_category=df.age.apply(age_group)))

NameError: name 'active_art_df' is not defined

In [None]:
#bins = [0,9,14,19,24,100]
#labels = ['pead','teen','adolescenet','young adult','adult']
#active_art_df['age_category'] = pd.cut(active_art_df['age'],bins, labels = labels)
#active_art_df[active_art_df.age_category.isnull()]

In [None]:
print(f'TX Current : {active_art_df.shape[0]}')

In [None]:
print(f'Date : {date.today().strftime("%d/%m/%Y")}')