In [1]:
"""
    This script processes the validation response JSON file generated by validate_response.py.
    As a reminder, the generated JSON file is a list of dictionaries, where each dictionary is a response from a verbal autopsy record generated by a language model.
    Each record is run 10 times, and the results are aggregated into a single dictionary.
    This code will loads the JSON file, then groupby rowid, aggregate the number of times a similar ICD10 code and CGHR10 code is returned, 
    compile all results into a dataframe, and then export the results to CSV files.
"""


import pandas as pd

df = pd.read_csv('resopnse_validation_parsed_20240226_1614.csv')

In [2]:
"""
    Objective:
    Simplify ICDs by dropping subsequent decimals and grouping by rowid.
"""

# remove any ICDs with decimals
df[['cause1_icd10', 'cause2_icd10', 'cause3_icd10', 'cause4_icd10', 'cause5_icd10']] = df[['cause1_icd10', 'cause2_icd10', 'cause3_icd10', 'cause4_icd10', 'cause5_icd10']].map(lambda x: x.split('.')[0] if pd.notnull(x) else x)



In [3]:
df.sample(5)

Unnamed: 0.1,Unnamed: 0,rowid,cause1_icd10,cause1_icd10_prob,cause2_icd10,cause2_icd10_prob,cause3_icd10,cause3_icd10_prob,cause4_icd10,cause4_icd10_prob,cause5_icd10,cause5_icd10_prob,output_created,output_model,output_system_prompt,output_user_prompt,output_usage_completion_tokens,output_usage_prompt_tokens,output_msg,output_probs
751,14004044_7,14004044,P74,0.691812,,,,,,,,,2024-02-26T15:33:27.169984-05:00,gpt-3.5-turbo-0125,You are a physician with expertise in determin...,"With the highest certainty, determine the unde...",4,297,P74.9,"[{'icd': 'P74.9', 'icd_linprob_mean': 0.691811..."
632,14002498_6,14002498,I64,0.760461,,,,,,,,,2024-02-26T15:30:14.090943-05:00,gpt-3.5-turbo-0125,You are a physician with expertise in determin...,"With the highest certainty, determine the unde...",2,391,I64,"[{'icd': 'I64', 'icd_linprob_mean': 0.76046096..."
981,24000608_9,24000608,J45,0.754466,,,,,,,,,2024-02-26T15:35:24.310105-05:00,gpt-3.5-turbo-0125,You are a physician with expertise in determin...,"With the highest certainty, determine the unde...",4,512,J45.9,"[{'icd': 'J45.9', 'icd_linprob_mean': 0.754466..."
54,14001816_0,14001816,A41,0.566662,,,,,,,,,2024-02-26T15:15:20.932913-05:00,gpt-3.5-turbo-0125,You are a physician with expertise in determin...,"With the highest certainty, determine the unde...",2,354,A41,"[{'icd': 'A41', 'icd_linprob_mean': 0.56666243..."
278,24001023_2,24001023,A09,0.689796,,,,,,,,,2024-02-26T15:27:19.158391-05:00,gpt-3.5-turbo-0125,You are a physician with expertise in determin...,"With the highest certainty, determine the unde...",2,520,A09,"[{'icd': 'A09', 'icd_linprob_mean': 0.68979565..."


In [4]:
"""
    Objective: Group by rowid and count the number of times a cause1_icd10 is repeated for each rowid. This tells us how many times an ICD10 is repeatedly given by the model
"""
grouped_df = df.groupby('rowid')

same_cause_count_df = pd.DataFrame(grouped_df['cause1_icd10'].value_counts())
same_cause_count_df = pd.get_dummies(same_cause_count_df['count']).astype(int).groupby('rowid').sum().rename(columns=lambda x: f'same_cause1_icd10_{x}x')


display(same_cause_count_df)

print("same_cause_count_df.sum() binarized:")
print(same_cause_count_df.sum(), "\n")

# reduce all non-zero values to 1 and sum
print("same_cause_count_df.sum() non-binarized:")
print(same_cause_count_df[['same_cause1_icd10_1x', 'same_cause1_icd10_2x', 'same_cause1_icd10_3x', 'same_cause1_icd10_4x', 'same_cause1_icd10_5x', 
                        'same_cause1_icd10_6x', 'same_cause1_icd10_7x', 'same_cause1_icd10_8x', 'same_cause1_icd10_9x', 'same_cause1_icd10_10x']].apply(lambda x: x.astype(bool)).sum()
)

Unnamed: 0_level_0,same_cause1_icd10_1x,same_cause1_icd10_2x,same_cause1_icd10_3x,same_cause1_icd10_4x,same_cause1_icd10_5x,same_cause1_icd10_6x,same_cause1_icd10_7x,same_cause1_icd10_8x,same_cause1_icd10_9x,same_cause1_icd10_10x
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
14000252,0,0,0,0,0,0,0,0,0,1
14000286,0,0,0,0,0,0,0,0,0,1
14000296,0,0,0,0,0,0,0,0,0,1
14000405,1,0,0,0,0,0,0,0,1,0
14000435,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
24002738,0,0,0,0,0,0,0,0,0,1
24002795,1,1,0,0,0,0,1,0,0,0
24002976,0,0,0,0,0,0,0,0,0,1
24003163,0,0,0,0,0,0,0,0,0,1


same_cause_count_df.sum() binarized:
same_cause1_icd10_1x     12
same_cause1_icd10_2x      6
same_cause1_icd10_3x      8
same_cause1_icd10_4x      5
same_cause1_icd10_5x      4
same_cause1_icd10_6x      7
same_cause1_icd10_7x      8
same_cause1_icd10_8x      3
same_cause1_icd10_9x     10
same_cause1_icd10_10x    70
dtype: int64 

same_cause_count_df.sum() non-binarized:
same_cause1_icd10_1x     12
same_cause1_icd10_2x      5
same_cause1_icd10_3x      8
same_cause1_icd10_4x      5
same_cause1_icd10_5x      2
same_cause1_icd10_6x      7
same_cause1_icd10_7x      8
same_cause1_icd10_8x      3
same_cause1_icd10_9x     10
same_cause1_icd10_10x    70
dtype: int64


In [5]:
"""
    Objective: Add the aggregated cause1_icd10 to the end of the dataframe. This completes the ICD10 processing portion. 
    The final dataframe have the following columns:
    - same_cause1_icd10_1x      (binarized, indiciating if the cause1_cghr10 is repeated 1 times)
    - same_cause1_icd10_2x
    - same_cause1_icd10_3x
    - same_cause1_icd10_4x
    - same_cause1_icd10_5x
    - same_cause1_icd10_6x
    - same_cause1_icd10_7x
    - same_cause1_icd10_8x
    - same_cause1_icd10_9x
    - same_cause1_icd10_10x     (binarized, indicating if the cause1_cghr10 is repeated 10 times)
    - cause1_icd10              (dictionary, {ICD10_1: count, ICD10_2: count, ...})
"""
aggregated_cause1_icd10_rows = []

for name, group in grouped_df:
    # print(f"Row ID: {name}", group['cause1_icd10'].value_counts().to_dict())
    aggregated_cause1_icd10_rows.append([name, group['cause1_icd10'].value_counts().to_dict()])
    
combined_icd10_df = pd.DataFrame(aggregated_cause1_icd10_rows, columns=['rowid', 'cause1_icd10']).set_index('rowid')

# pd.set_option('display.max_rows', None)

# combined_df

same_cause_count_df.merge(combined_icd10_df, left_index=True, right_index=True)

Unnamed: 0_level_0,same_cause1_icd10_1x,same_cause1_icd10_2x,same_cause1_icd10_3x,same_cause1_icd10_4x,same_cause1_icd10_5x,same_cause1_icd10_6x,same_cause1_icd10_7x,same_cause1_icd10_8x,same_cause1_icd10_9x,same_cause1_icd10_10x,cause1_icd10
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
14000252,0,0,0,0,0,0,0,0,0,1,{'A09': 10}
14000286,0,0,0,0,0,0,0,0,0,1,{'G83': 10}
14000296,0,0,0,0,0,0,0,0,0,1,{'K35': 10}
14000405,1,0,0,0,0,0,0,0,1,0,"{'J18': 9, 'E11': 1}"
14000435,0,0,0,0,0,0,0,0,0,1,{'B54': 10}
...,...,...,...,...,...,...,...,...,...,...,...
24002738,0,0,0,0,0,0,0,0,0,1,{'B54': 10}
24002795,1,1,0,0,0,0,1,0,0,0,"{'B50': 7, 'A83': 2, 'P22': 1}"
24002976,0,0,0,0,0,0,0,0,0,1,{'G83': 10}
24003163,0,0,0,0,0,0,0,0,0,1,{'G40': 10}


In [7]:
"""
    Objective: calculate the number of times a cause1_icd10 is repeated for each rowid using CGHR10.    
"""

# load the mapping
icd10_to_cghr_mapping = pd.read_csv('../data_202402/icd10_cghr10_v1.csv')

icd10_to_cghr_mapping.sample(5)

Unnamed: 0,cghr10_age,cghr10_title,icd10_code,icd10_range
3500,child,"Epilepsy, leukaemia, and other noncommunicable...",P81,P80-P83
2646,child,Other,F18,F03-F72
4433,neo,Other,B55,B55-B60
233,adult,Cancers,C41,C30-C45
3810,child,Injuries,T08,T00-T98


In [8]:
"""
    Objective: Load a minimal set of columns from the original datasets of all age groups and rounds.
"""

# Get age group and round once again and merge everything together into one dataframe
path_prefix = "../data_202402/"
merged_all_df = pd.DataFrame()

rounds = ['rd1', 'rd2']
age_groups = ['adult', 'child', 'neo']

for r in rounds:
    for a in age_groups:
        
        questionnaire_df =  pd.read_csv(f"{path_prefix}healsl_{r}_{a}_v1.csv")
        age_df =            pd.read_csv(f"{path_prefix}healsl_{r}_{a}_age_v1.csv")
        narrative_df =      pd.read_csv(f"{path_prefix}healsl_{r}_{a}_narrative_v1.csv")

        narrative_df = narrative_df.rename(columns={'summary': 'open_narrative'})
        
        # Merge the dataframes
        narrative_only = narrative_df[['rowid','open_narrative']]
        sex_only = questionnaire_df[['rowid','sex_cod']]
        age_only = age_df[['rowid','age_value_death','age_unit_death']]
        
        merged_df = narrative_only.merge(sex_only, on='rowid').merge(age_only, on='rowid')

        # Fill in missing values with empty string
        merged_df['sex_cod'] = merged_df['sex_cod'].fillna('')
        
        merged_df['age_group'] = f"{a}"
        merged_df['rd_group'] = f"{r}"

        assert not merged_df.isnull().values.any(), "Execution halted: NaN values found in merged_df"

        print(f"round: {r.ljust(10)} age group: {a.ljust(10)} len: {str(merged_df.shape[0]).ljust(10)}")
        # print(f"Sample of merged_df {merged_df.shape}:")
        # display(merged_df.sample(5))
        
        merged_all_df = pd.concat([merged_all_df, merged_df])
        


  questionnaire_df =  pd.read_csv(f"{path_prefix}healsl_{r}_{a}_v1.csv")
  questionnaire_df =  pd.read_csv(f"{path_prefix}healsl_{r}_{a}_v1.csv")


round: rd1        age group: adult      len: 4987      
round: rd1        age group: child      len: 2998      
round: rd1        age group: neo        len: 585       
round: rd2        age group: adult      len: 2025      
round: rd2        age group: child      len: 1059      
round: rd2        age group: neo        len: 233       


In [9]:
"""
    Objective: Map an ICD10 code to its their CGHR10 code. 
    Similar ICD10 codes can have different conversions depending on age groups. Therefore we need to determine the age group before converting.
"""

# Same ICD10 codes can have different conversions depending on age groups. Therefore we need to determine the age group before converting.
df['age_group'] = df['rowid'].map(merged_all_df.set_index('rowid')['age_group'])

# convert cause1_icd10 to cghr10_title, match icd10 and age group
df['cause1_cghr10']= df.apply(lambda x: icd10_to_cghr_mapping[(icd10_to_cghr_mapping['cghr10_age'] == x['age_group']) & (icd10_to_cghr_mapping['icd10_code'] == x['cause1_icd10'])]['cghr10_title'].values[0], axis=1)

df.rename(columns={'Unnamed: 0': 'u_rowid'}, inplace=True)

In [10]:
# increase the number of rows to display
pd.set_option('display.max_rows', 25)

df[['u_rowid', 'rowid', 'cause1_icd10', 'cause1_cghr10', 'age_group']].head(10)

Unnamed: 0,u_rowid,rowid,cause1_icd10,cause1_cghr10,age_group
0,14006015_0,14006015,J39,Chronic respiratory diseases,adult
1,14003152_0,14003152,A15,Tuberculosis,adult
2,14004789_0,14004789,V01,Road and transport injuries,adult
3,14008356_0,14008356,N12,Other noncommunicable diseases,adult
4,14004298_0,14004298,K25,Other noncommunicable diseases,adult
5,14005850_0,14005850,I10,Other cardiovascular diseases,adult
6,14005861_0,14005861,B54,Malaria,adult
7,14003875_0,14003875,T75,Other injuries,adult
8,14009118_0,14009118,J18,Acute respiratory infections,adult
9,14006898_0,14006898,B54,Malaria,adult


In [11]:
"""
    Objective: Aggregate the number of times CGHR10 code is repeated and indicate in their respective columns as 1x...10x using binarization.
"""

same_cause_cghr10_count_df = pd.DataFrame(grouped_df['cause1_cghr10'].value_counts())
same_cause_cghr10_count_df = pd.get_dummies(same_cause_cghr10_count_df['count']).astype(int).groupby('rowid').sum().rename(columns=lambda x: f'same_cause1_cghr10_{x}x')

display(same_cause_cghr10_count_df)

# same_cghr_cause_count_df.sum()

# Alternative view: reduce all non-zero values to 1
same_cause_cghr10_count_df[['same_cause1_cghr10_1x', 'same_cause1_cghr10_2x', 'same_cause1_cghr10_3x', 'same_cause1_cghr10_4x', 'same_cause1_cghr10_5x',
                        'same_cause1_cghr10_6x', 'same_cause1_cghr10_7x', 'same_cause1_cghr10_8x', 'same_cause1_cghr10_9x', 'same_cause1_cghr10_10x']].apply(lambda x: x.astype(bool)).sum()

Unnamed: 0_level_0,same_cause1_cghr10_1x,same_cause1_cghr10_2x,same_cause1_cghr10_3x,same_cause1_cghr10_4x,same_cause1_cghr10_5x,same_cause1_cghr10_6x,same_cause1_cghr10_7x,same_cause1_cghr10_8x,same_cause1_cghr10_9x,same_cause1_cghr10_10x
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
14000252,0,0,0,0,0,0,0,0,0,1
14000286,0,0,0,0,0,0,0,0,0,1
14000296,0,0,0,0,0,0,0,0,0,1
14000405,1,0,0,0,0,0,0,0,1,0
14000435,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
24002738,0,0,0,0,0,0,0,0,0,1
24002795,1,1,0,0,0,0,1,0,0,0
24002976,0,0,0,0,0,0,0,0,0,1
24003163,0,0,0,0,0,0,0,0,0,1


same_cause1_cghr10_1x      6
same_cause1_cghr10_2x      4
same_cause1_cghr10_3x      2
same_cause1_cghr10_4x      5
same_cause1_cghr10_5x      1
same_cause1_cghr10_6x      5
same_cause1_cghr10_7x      3
same_cause1_cghr10_8x      3
same_cause1_cghr10_9x      5
same_cause1_cghr10_10x    83
dtype: int64

In [12]:
"""
    Objective: Add the aggregated cause1_cghr10 to the end of the dataframe. This completes the CGHR10 processing portion.
    The final dataframe have the following columns:
    - same_cause1_cghr10_1x     (binarized, indiciating if the cause1_cghr10 is repeated 1 times)
    - same_cause1_cghr10_2x
    - same_cause1_cghr10_3x
    - same_cause1_cghr10_4x
    - same_cause1_cghr10_5x
    - same_cause1_cghr10_6x
    - same_cause1_cghr10_7x
    - same_cause1_cghr10_8x
    - same_cause1_cghr10_9x
    - same_cause1_cghr10_10x    (binarized, indiciating if the cause1_cghr10 is repeated 10 times)
    - cause1_cghr10             (dictionary, {CGHR10_1: count, CGHR10_2: count, ...})
"""

aggregated_cghr10_rows = []

for name, group in grouped_df:
    # print(f"Row ID: {name}", group['cause1_icd10'].value_counts().to_dict())
    aggregated_cghr10_rows.append([name, group['cause1_cghr10'].value_counts().to_dict()])
    
combined_cghr10_df = pd.DataFrame(aggregated_cghr10_rows, columns=['rowid', 'cause1_cghr10']).set_index('rowid')

same_cause_cghr10_count_df.merge(combined_cghr10_df, left_index=True, right_index=True)

Unnamed: 0_level_0,same_cause1_cghr10_1x,same_cause1_cghr10_2x,same_cause1_cghr10_3x,same_cause1_cghr10_4x,same_cause1_cghr10_5x,same_cause1_cghr10_6x,same_cause1_cghr10_7x,same_cause1_cghr10_8x,same_cause1_cghr10_9x,same_cause1_cghr10_10x,cause1_cghr10
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
14000252,0,0,0,0,0,0,0,0,0,1,{'Diarrhoeal diseases': 10}
14000286,0,0,0,0,0,0,0,0,0,1,{'Stroke': 10}
14000296,0,0,0,0,0,0,0,0,0,1,{'Other noncommunicable diseases': 10}
14000405,1,0,0,0,0,0,0,0,1,0,"{'Acute respiratory infections': 9, 'Diabetes ..."
14000435,0,0,0,0,0,0,0,0,0,1,{'Malaria': 10}
...,...,...,...,...,...,...,...,...,...,...,...
24002738,0,0,0,0,0,0,0,0,0,1,{'Malaria': 10}
24002795,1,1,0,0,0,0,1,0,0,0,"{'Malaria': 7, 'Other infections': 2, 'Other': 1}"
24002976,0,0,0,0,0,0,0,0,0,1,{'Stroke': 10}
24003163,0,0,0,0,0,0,0,0,0,1,{'Other noncommunicable diseases': 10}


In [13]:
display(same_cause_count_df.merge(same_cause_cghr10_count_df, left_index=True, right_index=True)[['same_cause1_icd10_10x', 'same_cause1_cghr10_10x']])

print(f"Non-matching rows: ", end='')
print((same_cause_count_df.merge(same_cause_cghr10_count_df, left_index=True, right_index=True)[['same_cause1_icd10_10x', 'same_cause1_cghr10_10x']].sum(axis=1) != 2).sum())

Unnamed: 0_level_0,same_cause1_icd10_10x,same_cause1_cghr10_10x
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1
14000252,1,1
14000286,1,1
14000296,1,1
14000405,0,0
14000435,1,1
...,...,...
24002738,1,1
24002795,0,0
24002976,1,1
24003163,1,1


Non-matching rows: 30


In [14]:
"""
    Objective: Merge the previously processed ICD10 and CGHR10 dataframes into one, and add the age_group and round columns to the end of the dataframe.
    The final dataframe have the following columns:
    - {all columns from processed ICD10 and CGHR10}
    - age_group     (adult, child, neo)
    - round         (integer 1 or 2)
"""

# combine the two dataframes

# temp_df_2 = same_cause_count_df.merge(combined_df, left_index=True, right_index=True)

print("Merging previously processed ICD10 and CGHR10 dataframes into one, and add the age_group and round columns to the end of the dataframe.")
temp_df_1 = same_cause_count_df.merge(combined_icd10_df, left_index=True, right_index=True)
temp_df_2 = same_cause_cghr10_count_df.merge(combined_cghr10_df, left_index=True, right_index=True)
temp_df_3 = merged_all_df.set_index('rowid')[['age_group', 'rd_group']].rename(columns={'rd_group': 'round'})
temp_df_3['round'] = temp_df_3['round'].str.replace(r'\D', '', regex=True).astype(int)

pd.options.display.max_columns = None

output_df = temp_df_1.join([temp_df_2, temp_df_3], how='inner').rename(columns={'cause1_icd10': 'cause_icd10', 'cause1_cghr10': 'cause_cghr10'})
output_df.sample(5)

# temp_df_1

Unnamed: 0_level_0,same_cause1_icd10_1x,same_cause1_icd10_2x,same_cause1_icd10_3x,same_cause1_icd10_4x,same_cause1_icd10_5x,same_cause1_icd10_6x,same_cause1_icd10_7x,same_cause1_icd10_8x,same_cause1_icd10_9x,same_cause1_icd10_10x,cause_icd10,same_cause1_cghr10_1x,same_cause1_cghr10_2x,same_cause1_cghr10_3x,same_cause1_cghr10_4x,same_cause1_cghr10_5x,same_cause1_cghr10_6x,same_cause1_cghr10_7x,same_cause1_cghr10_8x,same_cause1_cghr10_9x,same_cause1_cghr10_10x,cause_cghr10,age_group,round
rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
14000645,0,0,0,0,0,0,0,0,0,1,{'B50': 10},0,0,0,0,0,0,0,0,0,1,{'Malaria': 10},child,1
14005510,0,0,0,0,0,0,0,0,0,1,{'I64': 10},0,0,0,0,0,0,0,0,0,1,{'Stroke': 10},adult,1
14004885,0,0,0,0,0,0,0,0,0,1,{'A97': 10},0,0,0,0,0,0,0,0,0,1,{'Unspecified infections': 10},adult,1
14001749,1,0,0,0,0,0,0,0,1,0,"{'G04': 9, 'G12': 1}",1,0,0,0,0,0,0,0,1,0,"{'Unspecified infections': 9, 'Other noncommun...",adult,1
14002600,0,0,1,0,0,0,1,0,0,0,"{'B50': 7, 'B54': 3}",0,0,0,0,0,0,0,0,0,1,{'Malaria': 10},child,1


In [15]:
"""
    Objective: Export the results to CSV files.
"""

try:
    print("Exporting to CSV files...")
    output_df[output_df['round'] == 1].to_csv('healsl_rd1_rapid_gpt3_10xreruns_v2a.csv')
    output_df[output_df['round'] == 2].to_csv('healsl_rd2_rapid_gpt3_10xreruns_v2a.csv')
except Exception as e:
    print(e)

In [16]:
merged_all_df.set_index('rowid').loc[24002795]

open_narrative     According to the respondent, the deceased was ...
sex_cod                                                       Female
age_value_death                                                    4
age_unit_death                                                 Years
age_group                                                      child
rd_group                                                         rd2
Name: 24002795, dtype: object