In [1]:
import pandas as pd
import json

dashboard = pd.ExcelFile("data.xlsx")

In [2]:
data = pd.read_excel(dashboard,"Statistics")

aes = pd.read_excel(dashboard, 'AEs')

referrals = pd.read_excel(dashboard, 'Referrals')

def convert_numeric_text_columns(df):
    for col in df.columns:
        # Check if the column starts with "Shangring"
        if col.startswith('shangring') or col.startswith('sgDisposable')or col.startswith('sgReusable') or col.startswith('total') or col.startswith('hiv') or col.startswith('fu') or col.startswith('referralToOtherServices'):
            # Replace NaN values with 0 in columns starting with "Shangring"
            df[col] = df[col].fillna(0)
        # Use 'and' to check if the column is NOT one of the specified columns
        elif col != 'ReferrelRecordingYear' and col != 'AERecordingYear' and col != 'year':
            # Convert column to numeric if all values are numeric-like strings
            if df[col].astype(str).str.isnumeric().all():
                df[col] = pd.to_numeric(df[col])
    return df

# Assuming 'data' is the DataFrame you want to process
converted_data = convert_numeric_text_columns(data)

# Display the converted data
converted_data


Unnamed: 0,AERecordingMonth,AERecordingSite,AERecordingYear,District,ReferrelRecordingMonth,ReferrelRecordingSite,ReferrelRecordingYear,TotalMCsBYMethod,aeComments,ae_classification,...,total_surgicalReusable.4,total_surgicalReusable.5,total_surgicalReusable.6,total_surgicalReusable.7,total_surgicalReusable.8,totalhivNegative,totalhivNegativeNC,uncircumcisedClientsForHTS,vmmc_number,year
0,December,Gwanyika Rural Health Centre,2024.0,GOKWE SOUTH,December,Gwanyika Rural Health Centre,2024.0,37,Client is healing,Severe,...,OK,0,OK,OK,0,37,0,0,345534.0,2024
1,,,,GOKWE SOUTH,,,,25,,,...,0,0,OK,0,0,25,0,0,,2024
2,November,Katema RDC,2024.0,GOKWE SOUTH,,,,94,Client is healing,Moderate,...,OK,0,OK,OK,0,13,0,0,234556.0,2024
3,November,Jahana Clinic,2024.0,GOKWE SOUTH,,,,80,"Sature elavated by lignocaine,\nclient healing",Severe,...,OK,OK,OK,OK,OK,80,1,1,234455.0,2024


In [3]:
aes

Unnamed: 0,AERecordingMonth,AERecordingSite,AERecordingYear,District,cancelthisAEReport,aeComments,ae_classification,ae_type_code,circumcising_cadre,client_age,date_ae_identified,mcMethod,vmmc_number
0,November,Krima Clinic,2024,GOKWE SOUTH,,Client healing,Severe,COA,Nurse,25,2024-11-10,Surgical,467233
1,November,Krima Clinic,2024,GOKWE SOUTH,,Client healing,Moderate,BL,Nurse,18,2024-11-08,Surgical,245555
2,December,Gwanyika Rural Health Centre,2024,GOKWE SOUTH,,Client is healing,Severe,BL,Nurse,24,2024-12-12,Surgical,345534
3,November,Katema RDC,2024,GOKWE SOUTH,,Client is healing,Moderate,BL,Nurse,25,2024-11-14,Surgical,234556
4,November,Jahana Clinic,2024,GOKWE SOUTH,,"Sature elavated by lignocaine,\nclient healing",Severe,COA,Nurse,24,2024-11-10,Surgical,234455


In [4]:
# Ensure Month-Year columns are strings for easy comparison
data['year'] = data['year'].astype(str)
aes['AERecordingYear'] = aes['AERecordingYear'].astype(str)
referrals['ReferrelRecordingYear'] = referrals['ReferrelRecordingYear'].astype(str)


data['recordingMonth'] = data['recordingMonth'].astype(str)
aes['AERecordingMonth'] = aes['AERecordingMonth'].astype(str)
referrals['ReferrelRecordingMonth'] = referrals['ReferrelRecordingMonth'].astype(str)

# Transform df1 to include the list object
json_data = []
data = data.fillna("")
for _, row in data.iterrows():
    row_data = row.to_dict()
    
    if row['totalAES'] >= 1:
        # Filter df2 to find matching rows
        recordedAEs = aes[
            (aes['AERecordingSite'] == row['facilityName']) &
            (aes['AERecordingMonth'] == row['recordingMonth']) &
            (aes['AERecordingYear'] == row['year'])
        ]
        
        # Add list object to the row if matches are found
        row_data['matchingAES'] = recordedAEs.to_dict(orient='records')
    else:
        # Add an empty list if no matches
        row_data['matchingAES'] = []


    if row['referralToOtherServices'] >= 1:
        # Filter df2 to find matching rows
        otherReferrals = referrals[
            (referrals['ReferrelRecordingSite'] == row['facilityName']) &
            (referrals['ReferrelRecordingMonth'] == row['recordingMonth']) &
            (referrals['ReferrelRecordingYear'] == row['year'])
        ]
        
        # Add list object to the row if matches are found
        row_data['otherReferrals'] = otherReferrals.to_dict(orient='records')
    else:
        # Add an empty list if no matches
        row_data['otherReferrals'] = []
    
    # Append to the final JSON structure
    json_data.append(row_data)

# Convert to JSON
result_json = json.dumps(json_data, indent=4)

output_file = "data.json"

# Save the JSON data to the file
with open(output_file, "w") as file:
    json.dump(json_data, file, indent=4)

# Print the JSON
print(result_json)

[
    {
        "AERecordingMonth": "December",
        "AERecordingSite": "Gwanyika Rural Health Centre",
        "AERecordingYear": 2024.0,
        "District": "GOKWE SOUTH",
        "ReferrelRecordingMonth": "December",
        "ReferrelRecordingSite": "Gwanyika Rural Health Centre",
        "ReferrelRecordingYear": 2024.0,
        "TotalMCsBYMethod": 37,
        "aeComments": "Client is healing",
        "ae_classification": "Severe",
        "ae_type_code": "BL",
        "checkMonthValidation": 1.0,
        "checkYearValidation": 1.0,
        "checkifFacilityHasBeenSaved": 0,
        "circumcising_cadre": "Nurse",
        "client_age": 24.0,
        "date_ae_identified": "2024-12-12",
        "facilityName": "Gwanyika Rural Health Centre",
        "followUpTotal": "",
        "fu15-19": 25,
        "fu20-24": 12.0,
        "fu25-29": 0.0,
        "fu30-34": 0,
        "fu35-39": 0.0,
        "fu40-44": 0.0,
        "fu45-49": 0.0,
        "fu50": 0.0,
        "hivNegative15-19": 2