# **Dataset Overview and Data Collection**

In [None]:
import requests
import pandas as pd
import json
import pprint

url = ["https://ghoapi.azureedge.net/api/NCD_BMI_30C",
       "https://ghoapi.azureedge.net/api/NCD_BMI_PLUS2C",
       "https://ghoapi.azureedge.net/api/NCD_BMI_18C",
       "https://ghoapi.azureedge.net/api/NCD_BMI_MINUS2C"]

obesity_1 = requests.get(url[0]).json()

In [None]:
obesity_2 = requests.get(url[1]).json()

In [None]:
malnutrition_1 = requests.get(url[2]).json()

In [None]:
malnutrition_2 = requests.get(url[3]).json()

In [None]:
df1 = pd.DataFrame(obesity_1["value"])
df2 = pd.DataFrame(obesity_2["value"])
df3 = pd.DataFrame(malnutrition_1["value"])
df4 = pd.DataFrame(malnutrition_2["value"])

In [None]:
df1.shape

In [None]:
df1.head()

In [None]:
df2.head()

In [None]:
df1.columns == df2.columns

In [None]:
df3.columns == df4.columns

In [None]:
df3.columns,df4.columns

In [None]:
x = df3.pop("Dim1")

In [None]:
df3.insert(9,"Dim1",x)

In [None]:
df3.iloc[:,8:10].head()

In [None]:
df4.iloc[:,8:10].head()

In [None]:
df1["age_group"] = "A"
df2["age_group"] = "C"
df3["age_group"] = "A"
df4["age_group"] = "C"

In [None]:
df_o = pd.concat([df1,df2])
df_m = pd.concat([df3,df4])

In [None]:
df_o.head()

In [None]:
df_m.head()

In [None]:
df_o1 = df_o[(df_o["TimeDim"]>= 2012)][(df_o["TimeDim"] <= 2022)]

In [None]:
df_o1 = df_o1.reset_index(drop=True)
df_o1["TimeDim"].unique()

In [None]:
df_m1 = df_m[(df_m["TimeDim"]>= 2012)][(df_m["TimeDim"] <= 2022)]
df_m1 = df_m1.reset_index(drop=True)
df_m1["TimeDim"].unique()

# **Data Cleaning and feature engineering**

In [None]:
val = ["ParentLocation","Dim1","TimeDim","Low","High","NumericValue","SpatialDim","age_group"]
df_obesity = []
for i in val:
  df_obesity.append(df_o1.pop(i))

In [None]:
df_obesity = pd.DataFrame(df_obesity)

In [None]:
df_obesity = df_obesity.transpose()

In [None]:
df_obesity.columns

In [None]:
val = ["ParentLocation","Dim1","TimeDim","Low","High","NumericValue","SpatialDim","age_group"]
df_malnutrition = []
for i in val:
  df_malnutrition.append(df_m1.pop(i))

In [None]:
df_malnutrition= pd.DataFrame(df_malnutrition)

In [None]:
df_malnutrition = df_malnutrition.transpose()

In [None]:
df_malnutrition.columns

In [None]:
df_malnutrition.head()

In [None]:
df_obesity.head()

In [None]:
df_obesity.columns =["Region","Gender","Year","LowerBound","UpperBound","Mean_Estimate","Country","Age_Group"]
df_malnutrition.columns =["Region","Gender","Year","LowerBound","UpperBound","Mean_Estimate","Country","Age_Group"]

In [None]:
df_obesity["Gender"].unique()

In [None]:
df_obesity["Gender"] = df_obesity["Gender"].replace({"SEX_FMLE" : "Female",
                                                     "SEX_BTSX" : "Both",
                                                     "SEX_MLE" : "Male"})

In [None]:
df_malnutrition["Gender"] = df_malnutrition["Gender"].replace({"SEX_FMLE" : "Female",
                                                     "SEX_BTSX" : "Both",
                                                     "SEX_MLE" : "Male"})

In [None]:
df_obesity["Gender"].unique()

In [None]:
df_malnutrition["Gender"].unique()

In [None]:
!pip install pycountry

In [None]:
import pycountry

In [None]:
def get_country_name(alpha_3_code):
    special_cases = {
                    'GLOBAL': 'Global',
                    'WB_LMI': 'Low & Middle Income',
                    'WB_HI': 'High Income',
                    'WB_LI': 'Low Income',
                    'EMR': 'Eastern Mediterranean Region',
                    'EUR': 'Europe',
                    'AFR': 'Africa',
                    'SEAR': 'South-East Asia Region',
                    'WPR': 'Western Pacific Region',
                    'AMR': 'Americas Region',
                    'WB_UMI': 'Upper Middle Income'}
    if alpha_3_code in special_cases.keys():
            return special_cases[alpha_3_code]
    try:
        country = pycountry.countries.get(alpha_3 = alpha_3_code)
        if country:
            return country.name
    except (AttributeError, LookupError):
        pass
    
    return code

In [None]:
df_obesity["Country"] = df_obesity["Country"].apply(get_country_name)

In [None]:
df_malnutrition["Country"] = df_malnutrition["Country"].apply(get_country_name)

In [None]:
df_obesity.head()

In [None]:
df_malnutrition.head()

In [None]:
df_obesity["CI_Width"] = df_obesity["UpperBound"]-df_obesity["LowerBound"]

In [None]:
df_malnutrition["CI_Width"] = df_malnutrition["UpperBound"]-df_malnutrition["LowerBound"]

In [None]:
df_obesity.loc[df_obesity["Mean_Estimate"] >= 30, "Obesity_level"] = "High"
df_obesity.loc[(df_obesity["Mean_Estimate"] >= 25) & (df_obesity["Mean_Estimate"] < 30), "Obesity_level"] = "Moderate"
df_obesity.loc[df_obesity["Mean_Estimate"] < 25, "Obesity_level"] = "Low"

In [None]:
df_malnutrition.loc[df_malnutrition["Mean_Estimate"] >= 20, "Malnutrition_level"] = "High"
df_malnutrition.loc[(df_malnutrition["Mean_Estimate"] >= 10) & (df_malnutrition["Mean_Estimate"] < 20), "Malnutrition_level"] = "Moderate"
df_malnutrition.loc[df_malnutrition["Mean_Estimate"] < 10, "Malnutrition_level"] = "Low"

In [None]:
df_obesity.shape

In [None]:
df_malnutrition.shape

In [None]:
order1 = ["Year","Gender","Mean_Estimate","LowerBound","UpperBound","Age_Group","Country","Region","CI_Width","Obesity_level"]
order2 = ["Year","Gender","Mean_Estimate","LowerBound","UpperBound","Age_Group","Country","Region","CI_Width","Malnutrition_level"]

df_obesity = df_obesity[order1]
df_malnutrition = df_malnutrition[order2]

In [None]:
df_malnutrition.head()

# **Obesity EDA**

In [None]:
df_obesity.isna().any()

In [None]:
df_obesity.head()

In [None]:
test= df_obesity[df_obesity["Region"].isnull() == True].iloc[:,6:8]

In [None]:
test["Country"].unique()

In [None]:
df_obesity["Region"].unique()

In [None]:
df_obesity[df_obesity["Country"] == "Europe"]

In [None]:
ct = [i for i in test["Country"].unique()]
for i in ct:
    # Filter the dataframe for the current country
    country_data = df_obesity[df_obesity["Country"] == i]
    
    # Check if all Region values are NaN for this country
    if country_data["Region"].isna().all():
        print(f"All Regions with Country {i} is None")
    else:
        print(f"All Regions with Country {i} is not None")

In [None]:
for i in ct:
    if any(df_obesity["Country"] == i):
        df_obesity.loc[df_obesity["Country"] == i, "Region"] = i

In [None]:
df_obesity.isna().any()

In [None]:
df_obesity.shape

In [None]:
import plotly.express as px

In [None]:
px.histogram(df_obesity, x = "Mean_Estimate")

In [None]:
px.histogram(df_obesity, x ="CI_Width")

In [None]:
px.histogram(df_obesity, x= "Year", y = "Mean_Estimate")

In [None]:
px.histogram(df_obesity, x= "Year", y = "CI_Width")

In [None]:
px.pie(df_obesity, values = "Mean_Estimate", names = "Region", height=600)

In [None]:
px.pie(df_obesity, values = "CI_Width", names = "Region", height=600)