# Nutrition Paradox: A Global View on Obesity and Malnutrition

### Step 1: Dataset Collection & Combination

In [7]:
import pandas as pd
import requests

In [9]:
# API Endpoints
urls = {
    "adult_obesity": "https://ghoapi.azureedge.net/api/NCD_BMI_30C",
    "child_obesity": "https://ghoapi.azureedge.net/api/NCD_BMI_PLUS2C",
    "adult_malnutrition": "https://ghoapi.azureedge.net/api/NCD_BMI_18C",
    "child_malnutrition": "https://ghoapi.azureedge.net/api/NCD_BMI_MINUS2C"
}

In [11]:
def fetch_data(url):
    response = requests.get(url)
    data = response.json()["value"]
    return pd.json_normalize(data)

In [13]:
# Loading datasets
df_adult_obesity = fetch_data(urls["adult_obesity"])
df_child_obesity = fetch_data(urls["child_obesity"])
df_adult_malnutrition = fetch_data(urls["adult_malnutrition"])
df_child_malnutrition = fetch_data(urls["child_malnutrition"])

In [15]:
# Adding age_group column
df_adult_obesity["age_group"] = "Adult"
df_adult_malnutrition["age_group"] = "Adult"
df_child_obesity["age_group"] = "Child/Adolescent"
df_child_malnutrition["age_group"] = "Child/Adolescent"

In [17]:
# Combining datasets
df_obesity = pd.concat([df_adult_obesity, df_child_obesity], ignore_index=True)
df_malnutrition = pd.concat([df_adult_malnutrition, df_child_malnutrition], ignore_index=True)

In [19]:
# Filtering the data by year (2012–2022)
df_obesity = df_obesity[df_obesity["TimeDim"].between(2012, 2022)]
df_malnutrition = df_malnutrition[df_malnutrition["TimeDim"].between(2012, 2022)]

In [21]:
#Initial results
print("Obesity dataset shape:", df_obesity.shape)
print("Malnutrition dataset shape:", df_malnutrition.shape)

Obesity dataset shape: (27720, 26)
Malnutrition dataset shape: (27720, 26)


### Step 2: Data Cleaning & Feature Engineering

In [23]:
import pycountry

In [25]:
# 1: Columns to Retain
cols_to_keep = ["TimeDim", "Dim1", "NumericValue", "Low", "High", "ParentLocation", "SpatialDim", "age_group"]
df_obesity = df_obesity[cols_to_keep]
df_malnutrition = df_malnutrition[cols_to_keep]

In [27]:
df_obesity.shape

(27720, 8)

In [29]:
df_malnutrition.shape

(27720, 8)

In [31]:
# 2: Renaming the columns
rename_map = {
    "TimeDim": "Year",
    "Dim1": "Gender",
    "NumericValue": "Mean_Estimate",
    "Low": "LowerBound",
    "High": "UpperBound",
    "ParentLocation": "Region",
    "SpatialDim": "Country",
    "age_group": "Age_group"
}
df_obesity.rename(columns=rename_map, inplace=True)
df_malnutrition.rename(columns=rename_map, inplace=True)


In [33]:
# 3: Standardizing gender values
def clean_gender(g):
    g = g.lower()
    if "fmle" in g: return "Female"
    if "mle" in g: return "Male"
    return "Both"

In [35]:
df_obesity["Gender"] = df_obesity["Gender"].apply(clean_gender)
df_malnutrition["Gender"] = df_malnutrition["Gender"].apply(clean_gender)

In [37]:
# 4: Converting ISO codes to country names using pycountry
def get_country_name(code):
    try:
        return pycountry.countries.get(alpha_3=code).name
    except:
        return None

special_cases = {
    "GLOBAL": "Global",
    "WB_LMI": "Low & Middle Income",
    "WB_HI": "High Income",
    "WB_LI": "Low Income",
    "EMR": "Eastern Mediterranean Region",
    "EUR": "Europe",
    "AFR": "Africa",
    "SEAR": "South-East Asia Region",
    "WPR": "Western Pacific Region",
    "AMR": "Americas Region",
    "WB_UMI": "Upper Middle Income"
}

def convert_country(code):
    return get_country_name(code) or special_cases.get(code, code)

df_obesity["Country"] = df_obesity["Country"].apply(convert_country)
df_malnutrition["Country"] = df_malnutrition["Country"].apply(convert_country)


In [39]:
# 5: Add CI_Width column
df_obesity["CI_Width"] = df_obesity["UpperBound"] - df_obesity["LowerBound"]
df_malnutrition["CI_Width"] = df_malnutrition["UpperBound"] - df_malnutrition["LowerBound"]


In [41]:
# 6: Adding obesity_level
def get_obesity_level(val):
    if val >= 30: return "High"
    elif val >= 25: return "Moderate"
    else: return "Low"

df_obesity["Obesity_Level"] = df_obesity["Mean_Estimate"].apply(get_obesity_level)


In [43]:
df_obesity["Obesity_Level"]

1             Low
8        Moderate
14       Moderate
16           High
17           High
           ...   
83137         Low
83140         Low
83148         Low
83151         Low
83154         Low
Name: Obesity_Level, Length: 27720, dtype: object

In [45]:
# 7: Adding malnutrition_level
def get_malnutrition_level(val):
    if val >= 20: return "High"
    elif val >= 10: return "Moderate"
    else: return "Low"

df_malnutrition["Malnutrition_Level"] = df_malnutrition["Mean_Estimate"].apply(get_malnutrition_level)


In [47]:
df_malnutrition["Malnutrition_Level"]

0             Low
4             Low
5             Low
7        Moderate
9             Low
           ...   
83147         Low
83149         Low
83150         Low
83156         Low
83159         Low
Name: Malnutrition_Level, Length: 27720, dtype: object

In [49]:
obesity_cols = ["Year", "Gender", "Mean_Estimate", "LowerBound", "UpperBound",
                "Age_group", "Country", "Region", "CI_Width", "Obesity_Level"]
malnutrition_cols = ["Year", "Gender", "Mean_Estimate", "LowerBound", "UpperBound",
                     "Age_group", "Country", "Region", "CI_Width", "Malnutrition_Level"]

df_obesity = df_obesity[obesity_cols]
df_malnutrition = df_malnutrition[malnutrition_cols]

In [57]:
df_obesity.head()

Unnamed: 0,Year,Gender,Mean_Estimate,LowerBound,UpperBound,Age_group,Country,Region,CI_Width,Obesity_Level
1,2020,Male,17.48764,15.79378,19.24573,Adult,"Iran, Islamic Republic of",Eastern Mediterranean,3.45195,Low
8,2020,Male,28.33553,23.08959,33.81534,Adult,Iraq,Eastern Mediterranean,10.72575,Moderate
14,2014,Female,27.9487,24.92477,31.21326,Adult,Greenland,Europe,6.28849,Moderate
16,2019,Female,31.89322,28.16068,35.57493,Adult,Armenia,Europe,7.41425,High
17,2018,Both,33.69421,29.90581,37.47086,Adult,Malta,Europe,7.56505,High


In [55]:
df_malnutrition.head()

Unnamed: 0,Year,Gender,Mean_Estimate,LowerBound,UpperBound,Age_group,Country,Region,CI_Width,Malnutrition_Level
0,2021,Female,5.79511,3.97204,8.07702,Adult,Gabon,Africa,4.10498,Low
4,2017,Both,3.09065,2.42823,3.84215,Adult,Mongolia,Western Pacific,1.41392,Low
5,2016,Female,8.37172,6.65567,10.25312,Adult,Sierra Leone,Africa,3.59745,Low
7,2021,Both,13.33635,12.24232,14.44287,Adult,South-East Asia Region,,2.20055,Moderate
9,2014,Male,2.20807,1.25384,3.51682,Adult,Qatar,Eastern Mediterranean,2.26298,Low


#### Data Overview & Basic Stats

In [59]:
# To check data shape and columns
print("Obesity Data Shape:", df_obesity.shape)
print("Malnutrition Data Shape:", df_malnutrition.shape)

Obesity Data Shape: (27720, 10)
Malnutrition Data Shape: (27720, 10)


In [61]:
# Summary statistics
print(df_obesity.describe())
print(df_malnutrition.describe())

               Year  Mean_Estimate    LowerBound    UpperBound      CI_Width
count  27720.000000   27720.000000  27720.000000  27720.000000  27720.000000
mean    2017.000000      12.761041      9.393155     17.068489      7.675334
std        3.162335      10.858925      9.893854     12.395013      6.616899
min     2012.000000       0.265670      0.077540      0.547850      0.216700
25%     2014.000000       4.753960      2.354430      7.875503      2.926715
50%     2017.000000       9.944540      6.506535     13.704380      5.480535
75%     2020.000000      17.482418     12.697755     23.986050     10.338880
max     2022.000000      80.609250     74.365370     87.868510     40.554060
               Year  Mean_Estimate    LowerBound    UpperBound      CI_Width
count  27720.000000   27720.000000  27720.000000  27720.000000  27720.000000
mean    2017.000000       5.354451      3.397094      8.147889      4.750795
std        3.162335       4.811220      3.908061      6.443692      4.257684

In [63]:
# Check for missing values
print(df_obesity.isnull().sum())
print(df_malnutrition.isnull().sum())

Year                0
Gender              0
Mean_Estimate       0
LowerBound          0
UpperBound          0
Age_group           0
Country             0
Region           1452
CI_Width            0
Obesity_Level       0
dtype: int64
Year                     0
Gender                   0
Mean_Estimate            0
LowerBound               0
UpperBound               0
Age_group                0
Country                  0
Region                1452
CI_Width                 0
Malnutrition_Level       0
dtype: int64


In [65]:
# Unique values in categorical columns
print("Obesity Gender unique values:", df_obesity["Gender"].unique())
print("Malnutrition Gender unique values:", df_malnutrition["Gender"].unique())


Obesity Gender unique values: ['Male' 'Female' 'Both']
Malnutrition Gender unique values: ['Female' 'Both' 'Male']


In [67]:
# Unique values in categorical columns
print("Obesity Gender unique values:", df_obesity["Gender"].unique())
print("Malnutrition Gender unique values:", df_malnutrition["Gender"].unique())


Obesity Gender unique values: ['Male' 'Female' 'Both']
Malnutrition Gender unique values: ['Female' 'Both' 'Male']


In [69]:
print("Obesity Age Groups:", df_obesity["Age_group"].unique())
print("Malnutrition Age Groups:", df_malnutrition["Age_group"].unique())

Obesity Age Groups: ['Adult' 'Child/Adolescent']
Malnutrition Age Groups: ['Adult' 'Child/Adolescent']


In [71]:
print("Number of Countries in Obesity:", len(df_obesity["Country"].unique()))
print("Number of Countries in Malnutrition:", len(df_malnutrition["Country"].unique()))

Number of Countries in Obesity: 210
Number of Countries in Malnutrition: 210


#### Distribution of Mean Estimates

#### Saving the cleaned DataFrames as CSVs

In [124]:
df_obesity.to_csv("cleaned_obesity.csv", index=False)
df_malnutrition.to_csv("cleaned_malnutrition.csv", index=False)