In [2]:
import pandas as pd
import datetime as dt
from dateutil.relativedelta import relativedelta
from datetime import date

import warnings 
warnings.filterwarnings("ignore")

In [3]:
df_source = "Active on ART Patients Linelist_Jun-2024.csv"

df = pd.read_csv(df_source, 
                 usecols=['CCC No', 'Sex', 'Age at reporting','Art Start Date','Last VL Result','Last VL Date', 'Active in PMTCT', 'Self Visit Date','Next Appointment Date'], 
                 parse_dates=['Art Start Date','Last VL Date','Self Visit Date','Next Appointment Date'])  


In [4]:
df.columns = [x.lower().replace(" ","_") for x in df.columns]

def prep_df(df):
    return (df
            .assign(art_start_date = pd.to_datetime(df.art_start_date),
                    last_vl_date = pd.to_datetime(df.last_vl_date),
                    self_visit_date = pd.to_datetime(df.self_visit_date),
                    next_appointment_date = pd.to_datetime(df.next_appointment_date))
            .rename(columns = {"age_at_reporting":"age","self_visit_date":"last_self_visit"}))
    
df = prep_df(df)

In [5]:
#another way to query
pmtct_df = df[df['active_in_pmtct'].eq('Yes')]
pmtct_df[(pmtct_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-6)))]

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
377,1403300535,F,24,2015-05-07,LDL,2024-01-16,Yes,2024-06-28,2024-07-19
395,1403300572,F,32,2015-05-20,162,2023-12-12,Yes,2024-05-07,2024-07-16
399,1403300581,F,35,2015-04-29,51,2024-01-04,Yes,2024-06-07,2024-07-08
449,1403300672,F,35,2015-08-12,LDL,2023-09-25,Yes,2024-06-24,2024-07-22
490,1403300744,F,24,2016-08-18,LDL,2023-10-19,Yes,2024-06-10,2024-09-06
509,1403300775,F,45,2016-01-14,LDL,2023-11-21,Yes,2024-06-18,2024-08-16
566,1839601531,F,32,2016-09-01,LDL,2024-01-16,Yes,2024-06-18,2024-07-19
622,1376000820,F,25,2017-03-20,LDL,2024-01-16,Yes,2024-06-21,2024-07-26
647,1403300968,F,37,2017-06-29,LDL,2024-01-16,Yes,2024-04-15,2024-07-12
881,1403301227,F,29,2020-10-13,LDL,2023-11-13,Yes,2024-05-20,2024-07-19


In [6]:
no_pmtct_df = df[df['active_in_pmtct'].eq('No')]
no_pmtct_df[(no_pmtct_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-6)))].query('age<=24')

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
447,1403300670,M,23,2015-11-11,LDL,2023-09-01,No,2023-09-01,2024-06-10
756,1403301104,F,15,2018-09-27,LDL,2023-05-02,No,2024-01-05,2024-08-23
857,1403301208,F,23,2020-05-26,LDL,2023-07-22,No,2024-01-10,2024-07-05
873,1403301220,M,20,2020-08-19,42,2023-11-27,No,2023-11-27,2024-07-24
880,1403301226,F,23,2020-10-07,LDL,2023-07-22,No,2024-06-14,2024-12-10
883,1304706827,M,16,2009-05-15,LDL,2023-11-07,No,2023-11-20,2024-07-15
1038,1403301327,M,23,2023-07-24,LDL,2023-11-07,No,2024-05-08,2024-11-05
1039,1371900944,F,24,2022-01-17,LDL,2023-07-18,No,2024-01-17,2024-07-18


In [7]:
no_pmtct_df[(no_pmtct_df.last_vl_date < pd.to_datetime(date.today() + relativedelta(months=-12)))].query('age>=25')

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
8,1403300028,F,34,2014-02-24,LDL,2023-06-08,No,2024-01-12,2024-07-15
15,1403300090,F,66,2014-03-03,LDL,2023-03-22,No,2024-04-23,2024-07-23
22,1403300067,F,36,2014-07-22,LDL,2023-07-13,No,2024-04-16,2024-10-15
253,1403300285,M,46,2014-06-23,LDL,2016-09-15,No,2024-06-07,2024-07-04
271,1403300374,M,50,2014-04-03,77,2023-05-26,No,2024-04-11,2024-10-09
279,1403300051,M,45,2014-03-05,LDL,2023-07-17,No,2024-04-22,2024-10-18
287,1403300381,F,42,2014-04-10,LDL,2023-05-05,No,2024-01-09,2024-07-05
300,1403300469,M,44,2014-10-22,25,2023-06-07,No,2024-04-11,2024-07-10
409,1403300605,F,47,2015-05-25,LDL,2023-05-08,No,2024-06-05,2024-07-05
438,1403300649,F,53,2015-09-04,LDL,2023-06-13,No,2023-10-13,2024-07-25


In [9]:
df[df.last_vl_result.isnull()]

Unnamed: 0,ccc_no,sex,age,art_start_date,last_vl_result,last_vl_date,active_in_pmtct,last_self_visit,next_appointment_date
1054,1403301351,M,32,2024-06-28,,NaT,No,2024-06-28,2024-07-12
1061,1403301340,F,35,2024-02-13,,NaT,No,2024-02-22,2024-07-05
1066,1403301343,F,42,2024-02-22,,NaT,No,2024-05-20,2024-06-20
1073,1403301345,F,30,2024-03-15,,NaT,No,2024-04-16,2024-07-09
1078,1403301346,F,36,2024-04-05,,NaT,No,2024-06-17,2024-07-15
1079,1403301347,M,30,2024-04-08,,NaT,No,2024-06-26,2024-07-24
1081,1403301348,F,34,2024-04-29,,NaT,No,2024-06-27,2024-07-25
1082,1403301349,M,49,2024-04-29,,NaT,No,2024-05-29,2024-07-25
1086,1403301350,F,28,2024-05-29,,NaT,No,2024-06-17,2024-07-16
