In [204]:
# import necessary variables
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')

## Load the data

In [205]:
df = pd.read_csv('data/FAOSTAT_data.csv')
df.head()

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,88,E,Estimated value,
1,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,89,E,Estimated value,
2,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,92,E,Estimated value,
3,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,93,E,Estimated value,
4,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,94,E,Estimated value,


## Clean and tidy the data

In [206]:
# lowercase the columns and remove the space bar
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace(' ', '_')

In [207]:
# check the null values
print(df.isnull().sum())

# # drop the null values
# df = df.dropna()

# drop the columns that are not needed
df = df.drop(['note'], axis=1)

# change the column name
df = df.rename(columns={'area': 'country'})

domain_code              0
domain                   0
area_code_(m49)          0
area                     0
element_code             0
element                  0
item_code                0
item                     0
year_code                0
year                     0
unit                     0
value                 9137
flag                     0
flag_description         0
note                129132
dtype: int64


In [208]:
df.item.unique()

array(['Average dietary energy supply adequacy (percent) (3-year average)',
       'Share of dietary energy supply derived from cereals, roots and tubers (kcal/cap/day) (3-year average)',
       'Average protein supply (g/cap/day) (3-year average)',
       'Average supply of protein of animal origin (g/cap/day) (3-year average)',
       'Gross domestic product per capita, PPP, (constant 2017 international $)',
       'Prevalence of undernourishment (percent) (3-year average)',
       'Number of people undernourished (million) (3-year average)',
       'Prevalence of severe food insecurity in the total population (percent) (3-year average)',
       'Prevalence of severe food insecurity in the male adult population (percent) (3-year average)',
       'Prevalence of severe food insecurity in the female adult population (percent) (3-year average)',
       'Prevalence of moderate or severe food insecurity in the total population (percent) (3-year average)',
       'Prevalence of moderate or

In [209]:
df

Unnamed: 0,domain_code,domain,area_code_(m49),country,element_code,element,item_code,item,year_code,year,unit,value,flag,flag_description
0,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,88,E,Estimated value
1,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,89,E,Estimated value
2,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,92,E,Estimated value
3,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,93,E,Estimated value
4,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,94,E,Estimated value
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139523,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20142016,2014-2016,g/pc/d,69.9,E,Estimated value
139524,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20152017,2015-2017,g/pc/d,68.6,E,Estimated value
139525,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20162018,2016-2018,g/pc/d,66.9,E,Estimated value
139526,FS,Suite of Food Security Indicators,716,Zimbabwe,6123,Value,21061,Average fat supply (g/cap/day) (3-year average),20172019,2017-2019,g/pc/d,63.6,E,Estimated value


In [210]:
# filter the data

# per capita food supply variability (kcal/cap/day)
df1 = df[df['item'] == 'Per capita food supply variability (kcal/cap/day)']

# convert the value column to float
df1['value'] = pd.to_numeric(df1['value'], errors='coerce')

# group by country and item
df1 = df1.groupby(['country', 'item'], as_index=False)['value'].mean()

# round the value column
df1['value'] = df1['value'].round(2)

## Data Visualization

In [211]:
# choropleth
fig = px.choropleth(df1, locations = 'country',
                    locationmode='country names',
                    color="value",
                    hover_name = 'country',
                    title = 'Per Capita Food Supply Variability (kcal/cap/day)',
                    color_continuous_scale=px.colors.sequential.Plasma
                    )
fig.update_layout(width=1000, height=800)
fig.show()

In [212]:
# time series data
df2 = df[['country', 'year', 'value']]
# convert the value column to float
df2['value'] = pd.to_numeric(df2['value'], errors='coerce')
df2 = df2.groupby(['country', 'year']).mean().reset_index()
df2 = df2.pivot(index='country', columns='year', values='value')
df2

year,2000,2000-2002,2001,2001-2003,2002,2002-2004,2003,2003-2005,2004,2004-2006,...,2017,2017-2019,2018,2018-2020,2019,2019-2021,2020,2020-2022,2021,2022
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,223.700588,59.38,224.575882,62.94,270.487368,54.16,271.884737,52.62,244.503810,51.23,...,359.908235,42.872727,307.894000,42.986364,360.781765,42.558824,399.280000,35.960000,488.925455,426.945556
Albania,481.722273,52.45,530.579524,52.79,520.983636,53.08,539.859091,52.88,560.862727,52.99,...,928.068947,30.909091,1065.682941,31.566667,1084.625882,18.375000,1200.874667,18.150000,2105.637778,631.747143
Algeria,553.792500,45.26,640.562857,45.01,577.152500,44.92,653.721364,44.51,667.308636,44.67,...,774.687143,30.085000,771.723810,29.955000,672.041667,18.253333,848.272222,18.907692,1380.306364,456.571111
American Samoa,61.052500,,61.350000,,61.675000,,62.000000,,50.026000,,...,51.228000,,51.228000,,51.218000,,51.218000,,2.035000,3.020000
Andorra,38.786667,,44.412500,,40.442222,0.30,41.285556,0.30,42.087778,0.30,...,51.605000,0.300000,51.642500,0.300000,51.715000,0.300000,57.377143,,2.325000,3.020000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela (Bolivarian Republic of),217.288000,38.17,216.601000,37.84,206.417143,37.63,210.733810,37.46,211.302381,37.15,...,259.565000,40.200000,260.021667,40.020000,274.447059,30.800000,302.247333,41.000000,431.092000,523.415000
Viet Nam,347.109130,32.79,407.142500,33.14,349.480833,33.73,373.839565,34.13,386.381304,34.68,...,638.736190,24.972222,666.218571,22.875000,695.602857,11.794118,740.993000,12.593333,1341.648182,457.615556
Yemen,232.813529,40.91,233.290588,41.34,221.893889,41.60,192.257143,41.50,223.816667,41.47,...,265.430625,35.759091,265.357500,38.750000,263.096875,38.388235,293.105714,28.413333,400.944000,492.440000
Zambia,320.404737,32.50,322.346842,33.31,269.497826,33.18,312.042500,32.93,316.554500,32.59,...,393.202105,27.213636,345.222273,28.286364,393.562632,28.200000,454.166875,32.273333,655.732727,439.895556


In [213]:
absolute_change = df2['2021'] - df2['2000']
relative_change = ((df2['2021'] - df2['2000']) / df2['2000']) * 100

In [214]:
print(absolute_change)
print(relative_change)

country
Afghanistan                            265.224866
Albania                               1623.915505
Algeria                                826.513864
American Samoa                         -59.017500
Andorra                                -36.461667
                                         ...     
Venezuela (Bolivarian Republic of)     213.804000
Viet Nam                               994.539051
Yemen                                  168.130471
Zambia                                 335.327990
Zimbabwe                               225.393030
Length: 204, dtype: float64
country
Afghanistan                           118.562436
Albania                               337.106170
Algeria                               149.246128
American Samoa                        -96.666803
Andorra                               -94.005672
                                         ...    
Venezuela (Bolivarian Republic of)     98.396598
Viet Nam                              286.520568
Yemen         

In [216]:
df2 = pd.DataFrame({
    'country': df2.index,
    '2000': df2['2000'],
    '2021': df2['2021'],
    'absolute_change': absolute_change,
    'relative_change': relative_change
})

df2['absolute_change'] = df2['absolute_change'].round(2)
df2['relative_change']=df2['relative_change'].apply(lambda x: int(x))
df2['relative_change']= df2['relative_change'].astype(str) + '%'

df2

Unnamed: 0_level_0,country,2000,2021,absolute_change,relative_change
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,Afghanistan,223.700588,488.925455,265.22,118%
Albania,Albania,481.722273,2105.637778,1623.92,337%
Algeria,Algeria,553.792500,1380.306364,826.51,149%
American Samoa,American Samoa,61.052500,2.035000,-59.02,-96%
Andorra,Andorra,38.786667,2.325000,-36.46,-94%
...,...,...,...,...,...
Venezuela (Bolivarian Republic of),Venezuela (Bolivarian Republic of),217.288000,431.092000,213.80,98%
Viet Nam,Viet Nam,347.109130,1341.648182,994.54,286%
Yemen,Yemen,232.813529,400.944000,168.13,72%
Zambia,Zambia,320.404737,655.732727,335.33,104%


In [217]:
# Reset index and drop 'Country/area' column
df2.drop(columns=['country'], inplace=True)

In [218]:
df2.reset_index(inplace=True)
df2

Unnamed: 0,country,2000,2021,absolute_change,relative_change
0,Afghanistan,223.700588,488.925455,265.22,118%
1,Albania,481.722273,2105.637778,1623.92,337%
2,Algeria,553.792500,1380.306364,826.51,149%
3,American Samoa,61.052500,2.035000,-59.02,-96%
4,Andorra,38.786667,2.325000,-36.46,-94%
...,...,...,...,...,...
199,Venezuela (Bolivarian Republic of),217.288000,431.092000,213.80,98%
200,Viet Nam,347.109130,1341.648182,994.54,286%
201,Yemen,232.813529,400.944000,168.13,72%
202,Zambia,320.404737,655.732727,335.33,104%
