In [34]:
# import necessary variables
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px

## Load the data

In [35]:
df = pd.read_csv('data/FAOSTAT_data.csv')
df.head()

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,88,E,Estimated value,
1,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,89,E,Estimated value,
2,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,92,E,Estimated value,
3,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,93,E,Estimated value,
4,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,94,E,Estimated value,


## Clean and tidy the data

In [36]:
# lowercase the columns and remove the space bar
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace(' ', '_')

In [38]:
# check the null values
print(df.isnull().sum())

# # drop the null values
# df = df.dropna()

# drop the columns that are not needed
df = df.drop(['note'], axis=1)

domain_code              0
domain                   0
area_code_(m49)          0
area                     0
element_code             0
element                  0
item_code                0
item                     0
year_code                0
year                     0
unit                     0
value                 9137
flag                     0
flag_description         0
note                129132
dtype: int64


In [39]:
df

Unnamed: 0,domain_code,domain,area_code_(m49),area,element_code,element,item_code,item,year_code,year,unit,value,flag,flag_description
0,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,88,E,Estimated value
1,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,89,E,Estimated value
2,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,92,E,Estimated value
3,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,93,E,Estimated value
4,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,94,E,Estimated value
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139523,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20142016,2014-2016,g/pc/d,69.9,E,Estimated value
139524,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20152017,2015-2017,g/pc/d,68.6,E,Estimated value
139525,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20162018,2016-2018,g/pc/d,66.9,E,Estimated value
139526,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20172019,2017-2019,g/pc/d,63.6,E,Estimated value


In [42]:
df.item.unique()

array(['Average dietary energy supply adequacy (percent) (3-year average)',
       'Share of dietary energy supply derived from cereals, roots and tubers (kcal/cap/day) (3-year average)',
       'Average protein supply (g/cap/day) (3-year average)',
       'Average supply of protein of animal origin (g/cap/day) (3-year average)',
       'Gross domestic product per capita, PPP, (constant 2017 international $)',
       'Prevalence of undernourishment (percent) (3-year average)',
       'Number of people undernourished (million) (3-year average)',
       'Prevalence of severe food insecurity in the total population (percent) (3-year average)',
       'Prevalence of severe food insecurity in the male adult population (percent) (3-year average)',
       'Prevalence of severe food insecurity in the female adult population (percent) (3-year average)',
       'Prevalence of moderate or severe food insecurity in the total population (percent) (3-year average)',
       'Prevalence of moderate or

In [48]:
# filter the data

# per capita food supply variability (kcal/cap/day)
df1 = df[df['item'] == 'Per capita food supply variability (kcal/cap/day)']

# group by country and item
df1 = df1.groupby(['area', 'item'], as_index=False)['value'].sum()
df1 = df1[['area', 'item', 'value']]

In [49]:
df1

Unnamed: 0,area,item,value
0,Afghanistan,Per capita food supply variability (kcal/cap/day),58477172504444292831186258595343222627313231
1,Albania,Per capita food supply variability (kcal/cap/day),72493639566262602534446363382338484627141313
2,Algeria,Per capita food supply variability (kcal/cap/day),32313225272731313333252436404337262751524245
3,Angola,Per capita food supply variability (kcal/cap/day),43182020201225455960581642051971676283909778278
4,Antigua and Barbuda,Per capita food supply variability (kcal/cap/day),3
...,...,...,...
177,Venezuela (Bolivarian Republic of),Per capita food supply variability (kcal/cap/day),8252491311311311178436111146143100253533972372...
178,Viet Nam,Per capita food supply variability (kcal/cap/day),5941421512192132333119231417191626281615817
179,Yemen,Per capita food supply variability (kcal/cap/day),122151723211012242218565470624091143132904482
180,Zambia,Per capita food supply variability (kcal/cap/day),503230273023223669705857676238761320212130


In [43]:
df.groupby(['var1', 'var2'])['var3'].mean()

                                                     item value
304     Per capita food production variability (consta...  16.3
305     Per capita food production variability (consta...    21
306     Per capita food production variability (consta...  20.8
307     Per capita food production variability (consta...  17.3
308     Per capita food production variability (consta...  12.4
...                                                   ...   ...
139079  Per capita food production variability (consta...  25.9
139080  Per capita food production variability (consta...  26.4
139081  Per capita food production variability (consta...  19.7
139082  Per capita food production variability (consta...   5.6
139083  Per capita food production variability (consta...   5.6

[3920 rows x 2 columns]


## Data Visualization

In [None]:
# group by countries
df_country = df.groupby('area').sum().reset_index()