   i.      CAB-LA Patient IDI cohort: Per Table 1 (below), we will systematically select ~n=30 patients per each region (South West Uganda and Kenya) (total for both countries 40 < ~n < 72; due to time and resource limitations, a smaller sample may be collected during this study), with samples within region to be balanced proportional to overall intervention enrolment by:

    Original trials (OPD, ANC/PNC, VHT);
    Gender; and
    Community.
Please note, we would to interview some of those who opted for the injection but then after receiving one or two injections have decided not to continue with them. However, we would not want to interview more than a total of 10 - 15 of those no longer taking CAB-LA. Please indicate those persons in the list (perhaps with an *).

Table 1 (below)
·  N= 12-20 outpatient clinic clients (n=3-5 per 4 communities)

 

·  N= 16-20 Antenatal and postnatal clients (n= 2-3 per antenatal and postnatal group per 4 communities)

 

·  N= 12-20 men and women recruited from VHT clients (n= 3-5 per 4 communities, gender-balanced)


In [38]:
# import the required libraries
import pandas as pd
import numpy as np
import pyodbc
import json
import sqlalchemy
from sqlalchemy import create_engine
import datetime;
import os

In [113]:
# Create DB connection
def connect_to_mysql():
    # Load connection details from JSON file
    with open('connection_details.json', 'r') as file:
        connection_details = json.load(file)
        mysql_credentials=connection_details['mysql']
    
    # Establish connection to MySQL database
    try:
        connection = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}"
                .format(host=mysql_credentials['host'],
                user=mysql_credentials['user'],
                pw=mysql_credentials['password'],
                db=mysql_credentials['cab_database']))
        
        print("Successfully connected to MySQL database")
        return connection        
        
        # Close the database connection
        connection.close()
        print("Connection closed")
        return df_partcipant
    except:
        print("Failed to connect to MySQL database: {}")
        return false

In [149]:
#Get connection to the db
conn = connect_to_mysql()

# Pull data
sql = """
    SELECT
        *
    FROM        
        d_participant;
        """
        
df = pd.read_sql(sql, conn)
df = df[['subjid', 'participant_id', 'trial', 'clinic',
       'country', 'study_arm', 'screened', 'screened_met', 'consented',
       'enrolled', 'baseline_visit', 'cab_screen_date', 'cab_enr_date', 'cab_baseline_date', 'age',
       'age_group', 'sex']]



# get List of participants who have had a cab injection
sql = """
    SELECT
        subjid, vdate,cab_vweek, screen_cab_met, injection_received
    FROM 
        schedc_cab_baseline
    WHERE
       screen_cab_met = 1 AND injection_received = 1; 
    """
df_cab = pd.read_sql(sql, conn)

sql = """
    SELECT
        subjid, vdate,cab_vweek, screen_cab_met, injection_received, stop_cab
    FROM 
        schedc_cab_followup; 
    """
df_cab_fu = pd.read_sql(sql, conn)
df_cab_fu['vdate'] = pd.to_datetime(df_cab_fu['vdate'])
df_cab_fu.sort_values('vdate', inplace=True, ascending=False)
df_cab_fu.loc[df_cab_fu['screen_cab_met'] == 0, 'stop_cab'] = 1

df_latest_cab_status = df_cab_fu.groupby('subjid').first().reset_index()

# get those on who ever started cab only
df_int = df[(df['study_arm'] == 'Int') & (df['enrolled'] == 1)]
df_int = df_int.merge(df_cab, on = 'subjid', how = 'inner')
df_int = df_int.merge(df_latest_cab_status, on = 'subjid', how = 'left')
print(df_int.shape)

# get individual trials
df_anc = df_int[(df_int['trial'] == 'ANC')]
df_opd = df_int[(df_int['trial'] == 'OPD')]
df_vht = df_int[(df_int['trial'] == 'VHT/CHV')]

# Get Sample some of those who opted for the injection but then after receiving one or two injections have decided not to continue with them.
# Get those who are wk 24 since start of cab injection and have stopped injection

# get max injections given
summary_data = df_cab_fu.groupby(['subjid']).agg({'injection_received':[
    ('injections', lambda x: np.where(x == 1, 1,0).sum()),
    ('cab_vweek',lambda x: np.where(df_cab_fu['cab_vweek'] == 99, 0,df_cab_fu['cab_vweek']).max()),
    ('stop_cab_vweek',lambda x: np.where(df_cab_fu['stop_cab'] == 1.0, df_cab_fu['cab_vweek'],99).min())
]})
# Reset the index to make  regular columns
summary_data = summary_data.reset_index()

# Rename the aggregated column for clarity
summary_data.columns = ['subjid', 'n_injections', 'max_vweek','stop_cab_vweek']


# add the baseline injection
summary_data['n_injections_inc_baseline'] = summary_data['n_injections'] + 1
# expected injections as at wk 24
summary_data['expected_injections'] = 5
df_stopped = df_cab_fu[df_cab_fu['stop_cab'] == 1]
df_stopped_cab = summary_data[(summary_data['n_injections_inc_baseline']<=2) & (summary_data['subjid'].isin(df_stopped['subjid']))]

df_stopped_cab = df_stopped_cab.merge(df_int[['subjid','trial','sex', 'clinic']], on = 'subjid', how = 'inner')

# Get the sample
# Define the stratification variables for OPD and VHT
strata_columns = ['trial', 'clinic', 'sex']

# Define the desired sample size
sample_size = 1  # Adjust this according to your requirements

# Perform stratified sampling
stratified_sample = df_stopped_cab.groupby(strata_columns, as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=42))
stratified_sample['ever_stopped_cab'] = 1
# drop anyone who had stopped cab
df_anc = df_anc[~df_anc['subjid'].isin(df_stopped_cab['subjid'])]
df_opd = df_opd[~df_opd['subjid'].isin(df_stopped_cab['subjid'])]
df_vht = df_vht[~df_vht['subjid'].isin(df_stopped_cab['subjid'])]

# Define the desired sample size
sample_size = 5  # Adjust this according to your requirements

# Perform stratified sampling
#stratified_sample = df_int.groupby(strata_columns, as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=42))

sample_size = 4
stratified_sample_anc = df_anc.groupby(['clinic'], as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=42))

sample_size = 2

stratified_sample_opd = df_opd.groupby(['clinic', 'sex'], as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=42))
stratified_sample_vht = df_vht.groupby(['clinic', 'sex'], as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=42))

stratified_sample_anc['ever_stopped_cab'] = 0
stratified_sample_opd['ever_stopped_cab'] = 0
stratified_sample_vht['ever_stopped_cab'] = 0


include_vars = ['subjid','trial','clinic','sex','ever_stopped_cab']

# Concatenate the DataFrames vertically (bind rows)
df_out = pd.concat([stratified_sample_anc[include_vars], stratified_sample_opd[include_vars], stratified_sample_vht[include_vars], stratified_sample[include_vars]])


# Reset the index
df_out = df_out.reset_index(drop=True)

df_out.sort_values('clinic', inplace=True, ascending=False)
df_out

# check if in ANC trial the the 3 participants were included
#Postnatal vs antenatal (Anyone who signed pregnancy consent will be classified as antenatal)
# There are only 3 Pregnancies reported in ANC Trial
anc_antenatal = ['SP11005006', 'SP11007006', 'SP11004005']
print('ANC LIST')
df_out[df_out['subjid'].isin(anc_antenatal)]

# print line list
df_out.to_csv('qual_IDI_list.csv', encoding='utf-8', index=False)


Successfully connected to MySQL database
(265, 26)
ANC LIST


In [161]:
# Generate list of provider for IDI
#Get connection to the db
conn = connect_to_mysql()
# Sample Providers
sql = """
    SELECT
        *
    FROM 
        prvdr_mm_survey p
    left outer Join meta m on m.run_uuid = p.run_uuid; 
    """
df_prvdr = pd.read_sql(sql, conn)
df_prvdr['subjid'] = df_prvdr['subjid'].astype(int)
df = df_prvdr.copy()

# get list of providers from excel
df_prv_list = pd.read_excel("CAB LA Provider ids- to ew 3rd-7-23.xlsx", sheet_name='Sheet1')
df_prv_list['subjid'] = df_prv_list['Intervierwe ID']

# Change IDs from to avoid duplicates for specific IDs - cloud db
df_prvdr.loc[(df_prvdr['subjid'] == 4) & (df_prvdr['tablet'] == 'Tablet15'), 'subjid'] = 104 #uganda
df_prvdr.loc[(df_prvdr['subjid'] == 7) & (df_prvdr['tablet'] == '532'), 'subjid'] = 107 #Kenya
df_prvdr.loc[(df_prvdr['subjid'] == 12) & (df_prvdr['tablet'] == '501'), 'subjid'] = 112 #Kenya
df_prvdr.loc[(df_prvdr['subjid'] == 17) & (df_prvdr['tablet'] == '525'), 'subjid'] = 117 #Kenya
#df_prvdr.loc[(df_prvdr['subjid'] == 76) & (df_prvdr['tablet'] == 'Tablet15'), 'subjid'] = 176 #Uganda


# Change IDs from to avoid duplicates for specific IDs - excel list
df_prv_list.loc[(df_prv_list['subjid'] == 4) & (df_prv_list['clinic_name'] == 'BUSHENYI'), 'subjid'] = 104 #uganda
df_prv_list.loc[(df_prv_list['subjid'] == 7) & (df_prv_list['clinic_name'] == 'Sena'), 'subjid'] = 107 #Kenya
df_prv_list.loc[(df_prv_list['subjid'] == 12) & (df_prv_list['clinic_name'] == 'Magunga'), 'subjid'] = 112 #Kenya
df_prv_list.loc[(df_prv_list['subjid'] == 17) & (df_prv_list['clinic_name'] == 'Oyani'), 'subjid'] = 117 #Kenya
#df_prv_list.loc[(df_prv_list['subjid'] == 76) & (df_prv_list['clinic_name'] == 'Tablet15'), 'subjid'] = 176 #Uganda


df_prvdr = df_prvdr.merge(df_prv_list, on = 'subjid', how = 'left')

# drop missing or not linked IDs
df_missing_id  = df_prvdr[df_prvdr['Study'].isna()]['subjid']
df_prvdr = df_prvdr[~df_prvdr['subjid'].isin(df_missing_id)]

df_out_provider = df_prvdr[['subjid', 'Study', 'study_visit', 'Country', 'Name', 'Gender', 'Conduct CAB-LA Procedures?', 'clinic_name']]

df_out_prvdr = df_out_provider.groupby('subjid').first().reset_index()
#drop youth and and HTN Linkage
df_out_prvdr = df_out_prvdr[~df_out_prvdr['Study'].isin(['Youth', 'HTN Linkage'])]


# drop RA
df_out_prvdr_oth = df_out_prvdr[~df_out_prvdr['subjid'].isin([107,41, 13])]

df_out_prvdr_ogongo_sibuoche = df_out_prvdr_oth[df_out_prvdr_oth['clinic_name'].isin(['Sibuoche', 'Ogongo', 'ITOJO'])]
df_out_prvdr_oth = df_out_prvdr_oth[~df_out_prvdr_oth['subjid'].isin(df_out_prvdr_ogongo_sibuoche['subjid'])]

sample_size = 2
stratified_sample_prvdr = df_out_prvdr_oth.groupby(['clinic_name'], as_index=False, group_keys=False).apply(lambda x: x.sample(n=sample_size, random_state=150))
# Add Ogongo and Sibouche VHT Providers
stratified_sample_prvdr = pd.concat([stratified_sample_prvdr, df_out_prvdr_ogongo_sibuoche])

print(stratified_sample_prvdr)
stratified_sample_prvdr.to_csv('qual_IDI_list_providers.csv', encoding='utf-8', index=False)

Successfully connected to MySQL database
    subjid    Study  study_visit Country                    Name  Gender  \
19     104   ANC/FP            1  Uganda            Bithire Jane  Female   
3        7   ANC/FP            1  Uganda            Muhindo Bobb    Male   
2        3   ANC/FP            1  Uganda        Muhumuza Deudant    Male   
0        1   ANC/FP            0  Uganda          Nyangoma Betty  Female   
5       12      OPD            0  Uganda          Mwesigye Annet  Female   
4       11      OPD            1  Uganda        Akankwasa Martin    Male   
21     112      OPD            0   Kenya            Joyce Awinja  Female   
9       20      OPD            0   Kenya     Brian Otieno Chodha    Male   
17      70  VHT/CHV            0  Uganda        Kiiza Christiana  Female   
18      71  VHT/CHV            1  Uganda             Guma Darius    Male   
22     117      OPD            0   Kenya            Franck Ogega    Male   
8       19      OPD            1   Kenya    Cav

In [146]:
df_out_prvdr.groupby(['Study', 'clinic_name'], as_index=False, group_keys=False).count()
df_out_prvdr[['subjid','Study', 'clinic_name','Name']]

Unnamed: 0,subjid,Study,clinic_name,Name
0,1,ANC/FP,Bwizibweza,Nyangoma Betty
1,2,ANC/FP,Sena,Araka Sarah
2,3,ANC/FP,Bwizibweza,Muhumuza Deudant
3,7,ANC/FP,BUSHENYI,Muhindo Bobb
4,11,OPD,KITAGATA,Akankwasa Martin
5,12,OPD,KITAGATA,Mwesigye Annet
6,13,OPD,ITOJO,Kamugisha Brian
7,14,OPD,ITOJO,Edna Ntamuhira
8,19,OPD,Oyani,Cavine Charles Adika
9,20,OPD,Magunga,Brian Otieno Chodha


In [159]:
df_out_prvdr['Study'].unique()
df[df['subjid'].isin([12,4,7,12, 17,76])].sort_values('subjid')
df_out_prvdr_ogongo_sibuoche

Unnamed: 0,subjid,Study,study_visit,Country,Name,Gender,Conduct CAB-LA Procedures?,clinic_name
