In [62]:
import numpy as np
import pandas as pd

import datetime as dt
import matplotlib.pyplot as plt
import plotly.express as px
food_df = pd.read_csv("Food_Supply_Quantity_kg_Data.csv")

In [63]:
# About the Dataset

# This dataset was created last 2020 as COVID-19 was just emerging to be a global health crisis.
# Back then, scientists and doctors did not know how to fight COVID-19.
# As a result, the author stated that adapting a healthy diet as a way to protect your family.

# The author obtained the Data for different food group supply quantities, nutrition values, obesity, 
# and undernourished percentages from Food and Agriculture Organization of the United Nations FAO website
# The population count from each country came from the Population Reference Bureau Website while the COVID-19 confirmed, deaths, recovered and active cases
# came from CSSE Website


In [64]:
# Structure of the dataset file

## The rows in the dataset represent the countries
## The columns represent different variables such as the intake of a certain food, obesity, undernourished, confirmed cases, deaths, recovered, active cases, etc. 
## All of these variables are represented in percentage.  
## There are a total of 170 observations and 32 variables in the dataset. 


In [65]:
# Variables

## The first few columns in the dataset represent the several type of food that individuals consumed in each country. 
## While the last few columns represent the percentaage of obese and undernourished people in each country. 
## The percentage of confirmed COVID-19 cases, deaths, recovered individuals, active COVID-19 cases, and population of each country can also be seen at the last few columns.

In [66]:
# Pre-Processing

food_df = food_df.drop('Unit (all except Population)', axis=1)
food_df = food_df.dropna()
# Temporarily changed <2.5 values to 2.4 
food_df['Undernourished'] = food_df['Undernourished'].replace('<2.5', '2.4')
food_df['Undernourished'] = food_df['Undernourished'].astype(float) 

In [67]:
# EDA 2
food_df['Mortality'] = food_df['Deaths'] / food_df['Confirmed']*100 # adds a Mortality Column 


In [68]:
fig = px.scatter(food_df, x="Mortality", y = "Obesity", size = "Active", hover_name='Country', log_x=False,
                 size_max=30, template="simple_white")

fig.add_shape(
    # Line Horizontal   
    type="line",
    x0=0,
    y0 = food_df['Obesity'].mean(),
    x1 = food_df['Mortality'].max(),
    y1 = food_df['Obesity'].mean(),
    line=dict(
        color="crimson",
        width=4
    ),
)
fig.show()

In [69]:
# EDA 3

fig = px.scatter(food_df, x="Mortality", y = "Undernourished", size = "Active", hover_name='Country', log_x=False,
                 size_max=30, template="simple_white")

fig.add_shape(
    # Line Horizontal   
    type="line",
    x0=0,
    y0 = food_df['Undernourished'].mean(),
    x1 = food_df['Mortality'].max(),
    y1 = food_df['Undernourished'].mean(),
    line=dict(
        color="crimson",
        width=4
    ),
)
fig.show()