In [None]:
import pandas as pd
import numpy as np

In [26]:
df_bmi_women = pd.read_csv("mean-body-mass-index-bmi-in-adult-women.csv")
df_bmi_men = pd.read_csv("mean-body-mass-index-bmi-in-adult-males.csv")
df_height = pd.read_csv("mean-height-males-vs-females.csv")

# drop rows that don't contain data
df_height = df_height.dropna(subset=['Mean male height (cm)'])

# drop rows that don't have a 3 digit ISO code
df_height = df_height[df_height['Code'].str.len() == 3]
df_bmi_men = df_bmi_men[df_bmi_men['Code'].str.len() == 3]
df_bmi_women = df_bmi_women[df_bmi_women['Code'].str.len() == 3]

# sort by year, and only keep the most recent data point by country
df_height = df_height.sort_values(by=['Year']).groupby('Entity').tail(1)
df_bmi_men = df_bmi_men.sort_values(by=['Year']).groupby('Entity').tail(1)
df_bmi_women = df_bmi_women.sort_values(by=['Year']).groupby('Entity').tail(1)

# sort all dataframe by "Code"
df_bmi_men.sort_values(by=['Code'], inplace=True)
df_bmi_women.sort_values(by=['Code'], inplace=True)
df_height.sort_values(by=['Code'], inplace=True)


# only keep rows that contain: ["Code" , "Mean BMI (female)"] 
df_bmi_women = df_bmi_women[["Code" , "Mean BMI (female)"]]
df_bmi_men = df_bmi_men[["Code" , "Mean BMI (male)"]]
df_height = df_height[["Code" , "Mean male height (cm)","Mean female height (cm)"]]

# merge dataframes
df = pd.merge(df_bmi_men, df_bmi_women, on='Code', how='inner')
df = pd.merge(df, df_height, on='Code', how='inner')

# find weights
df['women weight'] = df["Mean BMI (female)"]* (df["Mean female height (cm)"]/100)**2
df['men weight'] = df["Mean BMI (male)"]* (df["Mean male height (cm)"]/100)**2

print(df)


    Code  Mean BMI (male)  Mean BMI (female)  Mean male height (cm)  \
0    AFG        22.682456          24.234135             165.255861   
1    AGO        22.436538          24.376501             167.310727   
2    ALB        27.174471          26.507512             173.388856   
3    AND        27.478395          26.271661             176.060167   
4    ARE        28.188004          29.638162             170.455430   
..   ...              ...                ...                    ...   
195  WSM        30.629908          34.339825             174.377060   
196  YEM        23.726697          24.197709             159.886941   
197  ZAF        25.105225          29.591921             166.678099   
198  ZMB        21.448027          23.833982             166.517144   
199  ZWE        22.301353          25.431487             168.586005   

     Mean female height (cm)  women weight  men weight  
0                 155.075323     58.279109   61.944652  
1                 157.305210     

In [None]:
# output the merged dataframe to a csv
df.to_csv("../data/weightsOWID.csv", index=False)