In [119]:
from redcap import Project
import pandas as pd

#REDCap API configuration
api_url = 'https://redcap.mrc.gm:8443/redcap/api/'
api_key = '9D6259C7B0E732D2C11EAAAF1D49B818'
project = Project(api_url, api_key)

In [120]:
##Extract all data from household registration.
HHRegistration_data = project.export_records(forms=['household_registration_form'])

#convert data to Dataframe.
HHold_data=pd.DataFrame(HHRegistration_data)
print(HHold_data.columns)

Index(['compoundno', 'redcap_event_name', 'redcap_repeat_instrument',
       'redcap_repeat_instance', 'hrf_direction', 'hrf_compnumconsent',
       'hrf_vname', 'hrf_hhname', 'hrf_telephone_hhh', 'hrf_hhsize',
       'hrf_women1617', 'hrf_women1839', 'hrf_date', 'hrf_staffid',
       'household_registration_form_complete'],
      dtype='object')


In [121]:
# Specify the fields you want to extract base on logics
HH_data=HHold_data[['compoundno','redcap_repeat_instrument','redcap_repeat_instance','hrf_women1839','hrf_date','hrf_staffid']]
HH_data=pd.DataFrame(HH_data)

#st_date as a date data type
HH_data['hrf_date'] = pd.to_datetime(HH_data['hrf_date'], errors='coerce')


HH_data = HH_data[
    (HH_data['hrf_date'].notnull()) &
    (HH_data['compoundno'] >= '70')
]

print(HH_data)

     compoundno     redcap_repeat_instrument redcap_repeat_instance  \
2135     70-001  household_registration_form                      1   
2136     70-002  household_registration_form                      1   
2137     70-003  household_registration_form                      1   
2138     70-004  household_registration_form                      1   
2140     70-005  household_registration_form                      1   
...         ...                          ...                    ...   
3020     85-011  household_registration_form                      2   
3021     85-012  household_registration_form                      1   
3022     85-013  household_registration_form                      1   
3023     85-014  household_registration_form                      1   
3024     85-015  household_registration_form                      1   

     hrf_women1839   hrf_date hrf_staffid  
2135             1 2024-01-23       lcham  
2136             1 2024-01-23       lcham  
2137           

In [122]:
##Extract all data from Sensitization.
Enumeration_data = project.export_records(forms=['enumeration_and_sensitisation'])

#convert data to Dataframe.
EnumData=pd.DataFrame(Enumeration_data)
print(EnumData.columns)

Index(['compoundno', 'redcap_event_name', 'redcap_repeat_instrument',
       'redcap_repeat_instance', 'enu_village', 'enu_comp', 'enu_household',
       'wk_ckno', 'enu_date', 'enu_name', 'enu_dob', 'enu_mname', 'enu_fname',
       'enu_age', 'enu_dob_estimated', 'enu_dobsource', 'enu_sensitisation',
       'sen_date', 'sen_visitstatus', 'sen_comment', 'sen_ppregnant',
       'sen_explained', 'sen_icd_given', 'sen_contact1', 'sen_contact2',
       'sen_contact3', 'sen_contact4', 'sen_outcome', 'sen_staffid',
       'enumeration_and_sensitisation_complete'],
      dtype='object')


In [123]:
# Specify the fields you want to extract base on logics
Enum_data=EnumData[['compoundno','redcap_repeat_instrument','redcap_repeat_instance','enu_name','enu_dob','enu_age','enu_mname','wk_ckno','enu_fname','enu_village','enu_comp','enu_household']]
Enum_data=pd.DataFrame(Enum_data)

#st_date as a date data type
Enum_data['enu_dob'] = pd.to_datetime(Enum_data['enu_dob'], errors='coerce')


Enum_data = Enum_data[
    (Enum_data['compoundno'] >= '70')
    #(Enum_data['enu_dob'].notnull()) &
    #(Enum_data['enu_name'].str.strip() !='')
]

inst_number = str(Enum_data['redcap_repeat_instance'])
#east_number = f"{Enum_data['compoundno']}-{str(Enum_data['redcap_repeat_instance']).zfill(2)}" if Enum_data['redcap_repeat_instance'] < 10 else f"{Enum_data['compoundno']}-{Enum_data['redcap_repeat_instance']}"


##List of potentials
Sensitized_List = pd.DataFrame({
    'EKNo.': Enum_data['wk_ckno'],
    'Name of Potential': Enum_data['enu_name'],
    'DoB': Enum_data['enu_dob'],
    'Age': Enum_data['enu_age'],
    'Mother Name': Enum_data['enu_mname'],
    'Father Name': Enum_data['enu_fname'],
    'Village' : Enum_data['enu_village'],
    'Compound': Enum_data['enu_comp'],
    'Household' :Enum_data['enu_household']
})

# Save the DataFrame to a CSV file
Sensitized_List.to_csv('Sensitized_East.csv', index=False)


In [124]:
##SUM OF POTENTIALS IN EACH COMPOUND
HH_data=pd.DataFrame(HH_data)

# Convert 'hrf_women1839' to numeric type
HH_data['hrf_women1839'] = pd.to_numeric(HH_data['hrf_women1839'])

# Group by 'compoundno' 'staffid' and sum 'hrf_women1839'
sum_of_potentials = HH_data.groupby(['compoundno'])['hrf_women1839'].sum().reset_index()


# Rename the columns for clarity
sum_of_potentials.columns = ['compoundno','sum_1839']

print(sum_of_potentials)


    compoundno  sum_1839
0       70-001       1.0
1       70-002       1.0
2       70-003       1.0
3       70-004       3.0
4       70-005       1.0
..         ...       ...
525     85-011       1.0
526     85-012       1.0
527     85-013       1.0
528     85-014       0.0
529     85-015       2.0

[530 rows x 2 columns]


In [125]:
##POTENTIALS SENSITIZED IN EACH COMPOUND
Enum_data = pd.DataFrame(Enum_data)

# Group by 'compoundno' and count the occurrences
sensitized_potentials = Enum_data.groupby('compoundno').size().reset_index(name='sum_of_compno')

print(sensitized_potentials)


    compoundno  sum_of_compno
0       70-001              2
1       70-002              2
2       70-003              1
3       70-004              4
4       70-005              1
..         ...            ...
592     85-012              1
593     85-013              2
594     85-014              1
595     85-015              3
596     85-016              1

[597 rows x 2 columns]


In [126]:

# Merge the two DataFrames on 'compoundno' column
merged_df = pd.merge(sum_of_potentials, sensitized_potentials, on='compoundno')

# Calculate the difference and rename the column
merged_df['1839-potential'] = merged_df['sum_1839'] - merged_df['sum_of_compno']

# Select the desired columns
east_df = merged_df[['compoundno', 'sum_1839', 'sum_of_compno', '1839-potential']]

print(east_df)


    compoundno  sum_1839  sum_of_compno  1839-potential
0       70-001       1.0              2            -1.0
1       70-002       1.0              2            -1.0
2       70-003       1.0              1             0.0
3       70-004       3.0              4            -1.0
4       70-005       1.0              1             0.0
..         ...       ...            ...             ...
525     85-011       1.0              2            -1.0
526     85-012       1.0              1             0.0
527     85-013       1.0              2            -1.0
528     85-014       0.0              1            -1.0
529     85-015       2.0              3            -1.0

[530 rows x 4 columns]


In [127]:
East_kiang = pd.DataFrame({
    'Compound No.':east_df['compoundno'],
    'Potentials': east_df['sum_1839'],
    'Sensitized': east_df['sum_of_compno'],
    'Differencies': east_df['1839-potential'],
    #'Staff ID' : east_df['hrf_staffid']
})

# Save the DataFrame to a CSV file
East_kiang.to_csv('East_Kiang.csv', index=False)
