# Public health research with python

**FAO data, files exploration and data cleaning**

In [801]:
import pandas as pd

In [802]:
import numpy as np

*a. [undernutrition.csv](http://localhost:8888/edit/undernutrition.csv) : exploration and data cleaning*

- Loading file:

In [803]:
undernutrition = pd.read_csv('undernutrition.csv', sep=';')

- First 5 rows display :

In [804]:
undernutrition.head()

Unnamed: 0,Zone,Année,Valeur (en million d'hab)
0,Afghanistan,2012-2014,8.6
1,Afghanistan,2013-2015,8.8
2,Afghanistan,2014-2016,8.9
3,Afghanistan,2015-2017,9.7
4,Afghanistan,2016-2018,10.5


- Number of rows and columns :

In [805]:
undernutrition.shape

(1218, 3)

- .dtypes display :

In [806]:
undernutrition.dtypes

Zone                         object
Année                        object
Valeur (en million d'hab)    object
dtype: object

- "Valeur" column .unique values display :

In [807]:
undernutrition["Valeur (en million d'hab)"].unique()

array(['8.6', '8.8', '8.9', '9.7', '10.5', '11.1', '2.2', '2.5', '2.8',
       '3', '3.1', '3.3', '0.1', '1.3', '1.2', nan, '7.6', '6.2', '5.3',
       '5.6', '5.8', '5.7', '1.5', '1.6', '1.1', '1.7', '<0.1', '21.7',
       '22.4', '23.3', '22.3', '21.5', '20.9', '0.8', '2', '1.9', '1.8',
       '0.4', '0.5', '0.3', '0.2', '3.2', '3.4', '3.6', '3.8', '2.1',
       '2.3', '2.4', '0.6', '0.7', '0.9', '3.9', '2.7', '1.4', '4.8',
       '4.6', '4.9', '5', '4.4', '4.3', '4.2', '4.5', '26.2', '24.3',
       '21.3', '21.1', '2.9', '5.1', '5.2', '5.4', '203.8', '198.3',
       '193.1', '190.9', '190.1', '189.2', '23.6', '24', '24.1', '3.7',
       '7.3', '7.8', '8.4', '9', '9.1', '10.1', '10', '10.7', '11.5',
       '11.9', '11.8', '8.7', '10.3', '11', '1', '5.5', '6.8', '7.9',
       '5.9', '7', '9.2', '9.4', '9.6', '6.7', '7.1', '7.2', '14.7',
       '17.4', '20.2', '22.2', '22.8', '24.6', '31.1', '28.5', '25.4',
       '24.8', '26.1', '14.5', '15.4', '16.5', '15.8', '15.7', '10.8',
       '

- Update "Valeur" column to numeric :

In [808]:
undernutrition["Valeur (en million d'hab)"] = pd.to_numeric(undernutrition["Valeur (en million d'hab)"],errors = 'coerce')

- .dtypes display :

In [809]:
undernutrition.dtypes

Zone                          object
Année                         object
Valeur (en million d'hab)    float64
dtype: object

- .unique values display :

In [810]:
undernutrition["Valeur (en million d'hab)"].unique()

array([8.600e+00, 8.800e+00, 8.900e+00, 9.700e+00, 1.050e+01, 1.110e+01,
       2.200e+00, 2.500e+00, 2.800e+00, 3.000e+00, 3.100e+00, 3.300e+00,
       1.000e-01, 1.300e+00, 1.200e+00,       nan, 7.600e+00, 6.200e+00,
       5.300e+00, 5.600e+00, 5.800e+00, 5.700e+00, 1.500e+00, 1.600e+00,
       1.100e+00, 1.700e+00, 2.170e+01, 2.240e+01, 2.330e+01, 2.230e+01,
       2.150e+01, 2.090e+01, 8.000e-01, 2.000e+00, 1.900e+00, 1.800e+00,
       4.000e-01, 5.000e-01, 3.000e-01, 2.000e-01, 3.200e+00, 3.400e+00,
       3.600e+00, 3.800e+00, 2.100e+00, 2.300e+00, 2.400e+00, 6.000e-01,
       7.000e-01, 9.000e-01, 3.900e+00, 2.700e+00, 1.400e+00, 4.800e+00,
       4.600e+00, 4.900e+00, 5.000e+00, 4.400e+00, 4.300e+00, 4.200e+00,
       4.500e+00, 2.620e+01, 2.430e+01, 2.130e+01, 2.110e+01, 2.900e+00,
       5.100e+00, 5.200e+00, 5.400e+00, 2.038e+02, 1.983e+02, 1.931e+02,
       1.909e+02, 1.901e+02, 1.892e+02, 2.360e+01, 2.400e+01, 2.410e+01,
       3.700e+00, 7.300e+00, 7.800e+00, 8.400e+00, 

- Number of unique values :

In [811]:
len(undernutrition["Valeur (en million d'hab)"].unique())

139

- "Valeur" column display :

In [812]:
undernutrition["Valeur (en million d'hab)"]

0        8.6
1        8.8
2        8.9
3        9.7
4       10.5
        ... 
1213     NaN
1214     NaN
1215     NaN
1216     NaN
1217     NaN
Name: Valeur (en million d'hab), Length: 1218, dtype: float64

- Replace NaN values by 0.00 :

In [813]:
undernutrition.fillna(0, inplace=True)

- "Valeur" column display :

In [814]:
undernutrition["Valeur (en million d'hab)"]

0        8.6
1        8.8
2        8.9
3        9.7
4       10.5
        ... 
1213     0.0
1214     0.0
1215     0.0
1216     0.0
1217     0.0
Name: Valeur (en million d'hab), Length: 1218, dtype: float64

- Update "Année" column values :

In [815]:
undernutrition = undernutrition.replace("2012-2014","2013")

In [816]:
undernutrition = undernutrition.replace("2013-2015","2014")

In [817]:
undernutrition = undernutrition.replace("2014-2016","2015")

In [818]:
undernutrition = undernutrition.replace("2015-2017","2016")

In [819]:
undernutrition = undernutrition.replace("2016-2018","2017")

In [820]:
undernutrition = undernutrition.replace("2017-2019","2018")

- undernutrition dataframe display :

In [821]:
print(undernutrition)

             Zone Année  Valeur (en million d'hab)
0     Afghanistan  2013                        8.6
1     Afghanistan  2014                        8.8
2     Afghanistan  2015                        8.9
3     Afghanistan  2016                        9.7
4     Afghanistan  2017                       10.5
...           ...   ...                        ...
1213     Zimbabwe  2014                        0.0
1214     Zimbabwe  2015                        0.0
1215     Zimbabwe  2016                        0.0
1216     Zimbabwe  2017                        0.0
1217     Zimbabwe  2018                        0.0

[1218 rows x 3 columns]


- Convert "Valeur" column values in thousands of inhabitants :

In [822]:
undernutrition["Valeur (en milliers d'hab)"] = undernutrition["Valeur (en million d'hab)"] * 1000

- undernutrition dataframe display :

In [823]:
undernutrition.head()

Unnamed: 0,Zone,Année,Valeur (en million d'hab),Valeur (en milliers d'hab)
0,Afghanistan,2013,8.6,8600.0
1,Afghanistan,2014,8.8,8800.0
2,Afghanistan,2015,8.9,8900.0
3,Afghanistan,2016,9.7,9700.0
4,Afghanistan,2017,10.5,10500.0


- Convert "Valeur (en milliers d'hab)" column to integer :

In [824]:
undernutrition = undernutrition.astype({"Valeur (en milliers d'hab)": int})

- undernutrition dataframe display :

In [825]:
undernutrition.head()

Unnamed: 0,Zone,Année,Valeur (en million d'hab),Valeur (en milliers d'hab)
0,Afghanistan,2013,8.6,8600
1,Afghanistan,2014,8.8,8800
2,Afghanistan,2015,8.9,8900
3,Afghanistan,2016,9.7,9700
4,Afghanistan,2017,10.5,10500


In [899]:
undernutrition2017 = undernutrition.loc[population["Année"].isin([2017])]

In [900]:
undernutritionGroupByZone = undernutrition2017.groupby("Zone").sum()

In [902]:
undernutritionGroupByZone.head()

Unnamed: 0_level_0,Valeur (en million d'hab),Valeur (en milliers d'hab)
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,10.5,10500
Afrique du Sud,3.1,3100
Albanie,0.1,100
Algérie,1.3,1300
Allemagne,0.0,0


*b. [population.csv](http://localhost:8888/edit/population.csv) : exploration and data cleaning*

- Loading file:

In [903]:
population = pd.read_csv('population.csv', sep=';')

- First 5 rows display :

In [904]:
population.head()

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab)
0,Afghanistan,2013,32269.589
1,Afghanistan,2014,33370.794
2,Afghanistan,2015,34413.603
3,Afghanistan,2016,35383.032
4,Afghanistan,2017,36296.113


- Number of rows and columns :

In [905]:
population.shape

(1416, 3)

- .dtypes display :

In [906]:
population.dtypes

Zone                           object
Année                           int64
Valeur (en milliers d'hab)    float64
dtype: object

- .unique values display :

In [907]:
population["Valeur (en milliers d'hab)"].unique()

array([32269.589, 33370.794, 34413.603, ..., 14030.331, 14236.595,
       14438.802])

- Number of unique values :

In [908]:
len(population["Valeur (en milliers d'hab)"].unique())

1413

- Align "Zone" column name with those of "food_availability" file :

In [909]:
population = population.replace("Royaume-Uni de Grande-Bretagne et d'Irlande du Nord","Royaume-Uni")

In [910]:
population = population.replace("Tchéquie","Tchéquie (la)")

In [911]:
population.iloc[1302:1308]

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab)
1302,Tchéquie (la),2013,10586.533
1303,Tchéquie (la),2014,10591.108
1304,Tchéquie (la),2015,10601.397
1305,Tchéquie (la),2016,10618.857
1306,Tchéquie (la),2017,10641.034
1307,Tchéquie (la),2018,10665.677


In [912]:
population.iloc[1110:1116]

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab)
1110,Royaume-Uni,2013,64984.018
1111,Royaume-Uni,2014,65423.047
1112,Royaume-Uni,2015,65860.146
1113,Royaume-Uni,2016,66297.944
1114,Royaume-Uni,2017,66727.461
1115,Royaume-Uni,2018,67141.684


- Convert "Valeur" column values to million of inhabitants :

In [913]:
population["Valeur (en million d'hab)"] = population["Valeur (en milliers d'hab)"] * 1000

- population dataframe display :

In [914]:
population.head()

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab),Valeur (en million d'hab)
0,Afghanistan,2013,32269.589,32269589.0
1,Afghanistan,2014,33370.794,33370794.0
2,Afghanistan,2015,34413.603,34413603.0
3,Afghanistan,2016,35383.032,35383032.0
4,Afghanistan,2017,36296.113,36296113.0


- Convert "Valeur (en million d'hab)" column to integer :

In [915]:
population = population.astype({"Valeur (en million d'hab)": int})

- population dataframe display :

In [916]:
population.head()

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab),Valeur (en million d'hab)
0,Afghanistan,2013,32269.589,32269589
1,Afghanistan,2014,33370.794,33370794
2,Afghanistan,2015,34413.603,34413603
3,Afghanistan,2016,35383.032,35383032
4,Afghanistan,2017,36296.113,36296113


- population 2017 new df :

In [917]:
pop2017 = population.loc[population["Année"].isin([2017])]

- population 2017 dataframe display :

In [918]:
pop2017.head()

Unnamed: 0,Zone,Année,Valeur (en milliers d'hab),Valeur (en million d'hab)
4,Afghanistan,2017,36296.113,36296113
10,Afrique du Sud,2017,57009.756,57009756
16,Albanie,2017,2884.169,2884169
22,Algérie,2017,41389.189,41389189
28,Allemagne,2017,82658.409,82658409


In [919]:
pop2017.rename(columns = {"Valeur (en milliers d'hab)": "Population (en milliers d'hab)"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [920]:
pop2017.rename(columns = {"Valeur (en million d'hab)": "Population (en million d'hab)"}, inplace=True)

In [921]:
pop2017.rename(columns = {"Année": "2017"}, inplace=True)

In [922]:
pop2017.head()

Unnamed: 0,Zone,2017,Population (en milliers d'hab),Population (en million d'hab)
4,Afghanistan,2017,36296.113,36296113
10,Afrique du Sud,2017,57009.756,57009756
16,Albanie,2017,2884.169,2884169
22,Algérie,2017,41389.189,41389189
28,Allemagne,2017,82658.409,82658409


In [923]:
pop2017.shape

(236, 4)

*c. [food_help.csv](http://localhost:8888/edit/food_help.csv) : exploration and data cleaning*


- Loading file:

In [924]:
foodHelp = pd.read_csv('food_help.csv', sep=';')

- First 5 rows display :

In [925]:
foodHelp.head()

Unnamed: 0,Pays bénéficiaire,Année,Produit,Valeur
0,Afghanistan,2013,Autres non-céréales,682
1,Afghanistan,2014,Autres non-céréales,335
2,Afghanistan,2013,Blé et Farin,39224
3,Afghanistan,2014,Blé et Farin,15160
4,Afghanistan,2013,Céréales,40504


- Number of rows and columns :

In [926]:
foodHelp.shape

(1475, 4)

- .dtypes display :

In [927]:
foodHelp.dtypes

Pays bénéficiaire    object
Année                 int64
Produit              object
Valeur                int64
dtype: object

- "Valeur" column .unique values display :

In [928]:
foodHelp["Valeur"].unique()

array([  682,   335, 39224, ...,    96,  5022,  2310], dtype=int64)

- Number of unique values :

In [929]:
len(foodHelp["Valeur"].unique())

1086

- Rename "Valeur" column by "Valeur (en tonnes)" :

In [930]:
foodHelp.rename(columns = {'Valeur': 'Valeur (en tonnes)'}, inplace=True)

In [931]:
print(foodHelp)

     Pays bénéficiaire  Année                   Produit  Valeur (en tonnes)
0          Afghanistan   2013       Autres non-céréales                 682
1          Afghanistan   2014       Autres non-céréales                 335
2          Afghanistan   2013              Blé et Farin               39224
3          Afghanistan   2014              Blé et Farin               15160
4          Afghanistan   2013                  Céréales               40504
...                ...    ...                       ...                 ...
1470          Zimbabwe   2015  Mélanges et préparations                  96
1471          Zimbabwe   2013              Non-céréales                5022
1472          Zimbabwe   2014              Non-céréales                2310
1473          Zimbabwe   2015              Non-céréales                 306
1474          Zimbabwe   2013                Riz, total                  64

[1475 rows x 4 columns]


*d. [food_availability.csv](http://localhost:8888/edit/food_availability.csv) : exploration and data cleaning*

- Loading file:

In [932]:
foodAvailability = pd.read_csv('food_availability.csv', sep=';')

- First 5 rows display :

In [933]:
foodAvailability.head()

Unnamed: 0,Zone,Produit,Origine,Aliments pour animaux,Autres Utilisations,Disponibilité alimentaire (Kcal/personne/jour),Disponibilité alimentaire en quantité (kg/personne/an),Disponibilité de matière grasse en quantité (g/personne/jour),Disponibilité de protéines en quantité (g/personne/jour),Disponibilité intérieure,Exportations - Quantité,Importations - Quantité,Nourriture,Pertes,Production,Semences,Traitement,Variation de stock
0,Afghanistan,Abats Comestible,animale,,,5.0,1.72,0.2,0.77,53.0,,,53.0,,53.0,,,
1,Afghanistan,"Agrumes, Autres",vegetale,,,1.0,1.29,0.01,0.02,41.0,2.0,40.0,39.0,2.0,3.0,,,
2,Afghanistan,Aliments pour enfants,vegetale,,,1.0,0.06,0.01,0.03,2.0,,2.0,2.0,,,,,
3,Afghanistan,Ananas,vegetale,,,0.0,0.0,,,0.0,,0.0,0.0,,,,,
4,Afghanistan,Bananes,vegetale,,,4.0,2.7,0.02,0.05,82.0,,82.0,82.0,,,,,


- Number of rows and columns :

In [934]:
foodAvailability.shape

(15605, 18)

- Replace NaN values by 0.00 :

In [935]:
foodAvailability.fillna(0, inplace=True)

- foodAvailability dataframe display :

In [936]:
print(foodAvailability)

               Zone                Produit   Origine  Aliments pour animaux  \
0       Afghanistan       Abats Comestible   animale                    0.0   
1       Afghanistan        Agrumes, Autres  vegetale                    0.0   
2       Afghanistan  Aliments pour enfants  vegetale                    0.0   
3       Afghanistan                 Ananas  vegetale                    0.0   
4       Afghanistan                Bananes  vegetale                    0.0   
...             ...                    ...       ...                    ...   
15600  Îles Salomon       Viande de Suides   animale                    0.0   
15601  Îles Salomon    Viande de Volailles   animale                    0.0   
15602  Îles Salomon          Viande, Autre   animale                    0.0   
15603  Îles Salomon                    Vin  vegetale                    0.0   
15604  Îles Salomon         Épices, Autres  vegetale                    0.0   

       Autres Utilisations  Disponibilité alimentai

- .dtypes display :

In [937]:
foodAvailability.dtypes

Zone                                                              object
Produit                                                           object
Origine                                                           object
Aliments pour animaux                                            float64
Autres Utilisations                                              float64
Disponibilité alimentaire (Kcal/personne/jour)                   float64
Disponibilité alimentaire en quantité (kg/personne/an)           float64
Disponibilité de matière grasse en quantité (g/personne/jour)    float64
Disponibilité de protéines en quantité (g/personne/jour)         float64
Disponibilité intérieure                                         float64
Exportations - Quantité                                          float64
Importations - Quantité                                          float64
Nourriture                                                       float64
Pertes                                             

- Convert use of food availability columns in tons :

In [938]:
foodAvailability["Aliments pour animaux"] = foodAvailability["Aliments pour animaux"] * 1000000

In [939]:
foodAvailability["Autres Utilisations"] = foodAvailability["Autres Utilisations"] * 1000000

In [940]:
foodAvailability["Disponibilité intérieure"] = foodAvailability["Disponibilité intérieure"] * 1000000

In [941]:
foodAvailability["Exportations - Quantité"] = foodAvailability["Exportations - Quantité"] * 1000000

In [942]:
foodAvailability["Importations - Quantité"] = foodAvailability["Importations - Quantité"] * 1000000

In [943]:
foodAvailability["Nourriture"] = foodAvailability["Nourriture"] * 1000000

In [944]:
foodAvailability["Pertes"] = foodAvailability["Pertes"] * 1000000

In [945]:
foodAvailability["Production"] = foodAvailability["Production"] * 1000000

In [946]:
foodAvailability["Semences"] = foodAvailability["Semences"] * 1000000

In [947]:
foodAvailability["Traitement"] = foodAvailability["Traitement"] * 1000000

In [948]:
foodAvailability["Variation de stock"] = foodAvailability["Variation de stock"] * 1000000

- foodAvailability dataframe display :

In [949]:
print(foodAvailability)

               Zone                Produit   Origine  Aliments pour animaux  \
0       Afghanistan       Abats Comestible   animale                    0.0   
1       Afghanistan        Agrumes, Autres  vegetale                    0.0   
2       Afghanistan  Aliments pour enfants  vegetale                    0.0   
3       Afghanistan                 Ananas  vegetale                    0.0   
4       Afghanistan                Bananes  vegetale                    0.0   
...             ...                    ...       ...                    ...   
15600  Îles Salomon       Viande de Suides   animale                    0.0   
15601  Îles Salomon    Viande de Volailles   animale                    0.0   
15602  Îles Salomon          Viande, Autre   animale                    0.0   
15603  Îles Salomon                    Vin  vegetale                    0.0   
15604  Îles Salomon         Épices, Autres  vegetale                    0.0   

       Autres Utilisations  Disponibilité alimentai

- Rename use of food availability columns by adding "(en tonnes)" :

In [950]:
foodAvailability.rename(columns = {'Nourriture': 'Nourriture (en kg)','Pertes': 'Pertes (en kg)','Aliments pour animaux': 'Aliments pour animaux (en kg)','Autres Utilisations': 'Autres Utilisations (en kg)','Disponibilité intérieure': 'Disponibilité intérieure (en kg)','Exportations - Quantité': 'Exportations - Quantité (en kg)','Importations - Quantité': 'Importations - Quantité (en kg)','Nourriture (en kg)': 'Nourriture (en kg)','Pertes': 'Pertes (en kg)','Production': 'Production (en kg)','Semences': 'Semences (en kg)','Traitement': 'Traitement (en kg)','Variation de stock': 'Variation de stock (en kg)'}) 

Unnamed: 0,Zone,Produit,Origine,Aliments pour animaux (en kg),Autres Utilisations (en kg),Disponibilité alimentaire (Kcal/personne/jour),Disponibilité alimentaire en quantité (kg/personne/an),Disponibilité de matière grasse en quantité (g/personne/jour),Disponibilité de protéines en quantité (g/personne/jour),Disponibilité intérieure (en kg),Exportations - Quantité (en kg),Importations - Quantité (en kg),Nourriture (en kg),Pertes (en kg),Production (en kg),Semences (en kg),Traitement (en kg),Variation de stock (en kg)
0,Afghanistan,Abats Comestible,animale,0.0,0.0,5.0,1.72,0.20,0.77,53000000.0,0.0,0.0,53000000.0,0.0,53000000.0,0.0,0.0,0.0
1,Afghanistan,"Agrumes, Autres",vegetale,0.0,0.0,1.0,1.29,0.01,0.02,41000000.0,2000000.0,40000000.0,39000000.0,2000000.0,3000000.0,0.0,0.0,0.0
2,Afghanistan,Aliments pour enfants,vegetale,0.0,0.0,1.0,0.06,0.01,0.03,2000000.0,0.0,2000000.0,2000000.0,0.0,0.0,0.0,0.0,0.0
3,Afghanistan,Ananas,vegetale,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Afghanistan,Bananes,vegetale,0.0,0.0,4.0,2.70,0.02,0.05,82000000.0,0.0,82000000.0,82000000.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15600,Îles Salomon,Viande de Suides,animale,0.0,0.0,45.0,4.70,4.28,1.41,3000000.0,0.0,0.0,3000000.0,0.0,2000000.0,0.0,0.0,0.0
15601,Îles Salomon,Viande de Volailles,animale,0.0,0.0,11.0,3.34,0.69,1.14,2000000.0,0.0,2000000.0,2000000.0,0.0,0.0,0.0,0.0,0.0
15602,Îles Salomon,"Viande, Autre",animale,0.0,0.0,0.0,0.06,0.00,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15603,Îles Salomon,Vin,vegetale,0.0,0.0,0.0,0.07,0.00,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**The State of Food Security and Nutrition in the World 2017**

*1. Proportion of people undernourished in 2017 :*

- DataFrames display :

In [951]:
undernutritionGroupByZone

Unnamed: 0_level_0,Valeur (en million d'hab),Valeur (en milliers d'hab)
Zone,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,10.5,10500
Afrique du Sud,3.1,3100
Albanie,0.1,100
Algérie,1.3,1300
Allemagne,0.0,0
...,...,...
États-Unis d'Amérique,0.0,0
Éthiopie,21.1,21100
Îles Cook,0.0,0
Îles Marshall,0.0,0


In [952]:
pop2017

Unnamed: 0,Zone,2017,Population (en milliers d'hab),Population (en million d'hab)
4,Afghanistan,2017,36296.113,36296113
10,Afrique du Sud,2017,57009.756,57009756
16,Albanie,2017,2884.169,2884169
22,Algérie,2017,41389.189,41389189
28,Allemagne,2017,82658.409,82658409
...,...,...,...,...
1390,Venezuela (République bolivarienne du),2017,29402.484,29402484
1396,Viet Nam,2017,94600.648,94600648
1402,Yémen,2017,27834.819,27834819
1408,Zambie,2017,16853.599,16853599


- Merge DataFrames :

In [953]:
proportionUndernutrition = undernutritionGroupByZone.merge(pop2017, on="Zone", how="right")

In [954]:
proportionUndernutrition

Unnamed: 0,Zone,Valeur (en million d'hab),Valeur (en milliers d'hab),2017,Population (en milliers d'hab),Population (en million d'hab)
0,Afghanistan,10.5,10500.0,2017,36296.113,36296113
1,Afrique du Sud,3.1,3100.0,2017,57009.756,57009756
2,Albanie,0.1,100.0,2017,2884.169,2884169
3,Algérie,1.3,1300.0,2017,41389.189,41389189
4,Allemagne,0.0,0.0,2017,82658.409,82658409
...,...,...,...,...,...,...
231,Venezuela (République bolivarienne du),8.0,8000.0,2017,29402.484,29402484
232,Viet Nam,6.5,6500.0,2017,94600.648,94600648
233,Yémen,0.0,0.0,2017,27834.819,27834819
234,Zambie,0.0,0.0,2017,16853.599,16853599


- Replace N/A with 0 and check :

In [955]:
proportionUndernutrition.fillna(0, inplace=True)

In [956]:
proportionUndernutrition[41:45]

Unnamed: 0,Zone,Valeur (en million d'hab),Valeur (en milliers d'hab),2017,Population (en milliers d'hab),Population (en million d'hab)
41,Chine - RAS de Hong-Kong,0.0,0.0,2017,7306.322,7306322
42,Chine - RAS de Macao,0.0,0.0,2017,622.585,622585
43,Chine (continentale),0.0,0.0,2017,1421021.791,1421021791
44,Chine (Taiwan Province de),0.0,0.0,2017,23674.546,23674546


- Create a new df only with columns 0, 2 and 4 :

In [957]:
proportionUndernutrition_final = proportionUndernutrition.iloc[0:237, [0,2,4]]

In [958]:
proportionUndernutrition_final

Unnamed: 0,Zone,Valeur (en milliers d'hab),Population (en milliers d'hab)
0,Afghanistan,10500.0,36296.113
1,Afrique du Sud,3100.0,57009.756
2,Albanie,100.0,2884.169
3,Algérie,1300.0,41389.189
4,Allemagne,0.0,82658.409
...,...,...,...
231,Venezuela (République bolivarienne du),8000.0,29402.484
232,Viet Nam,6500.0,94600.648
233,Yémen,0.0,27834.819
234,Zambie,0.0,16853.599


- Sum the column "Valeur (en milliers d'hab)" and check the result :

In [970]:
undernutrition_sum = proportionUndernutrition_final["Valeur (en milliers d'hab)"].sum()

In [974]:
print(undernutrition_sum)

534800.0


- Sum the column "Population (en milliers d'hab)" and check the result :

In [971]:
population_sum = proportionUndernutrition_final["Population (en milliers d'hab)"].sum()

In [975]:
print(population_sum)

7548134.1110000005


- Final result :

In [972]:
calculProportionUndernutrition = (undernutrition_sum / population_sum) * 100

In [979]:
calculProportionUndernutrition = calculProportionUndernutrition.astype(int)

In [980]:
print(calculProportionUndernutrition,"%")

7 %


*2. Theoretical number of people who could be fed in the world in 2017 :*

In [886]:
foodAvailibility.head()

Unnamed: 0,Zone,Produit,Origine,Aliments pour animaux,Autres Utilisations,Disponibilité alimentaire (Kcal/personne/jour),Disponibilité alimentaire en quantité (kg/personne/an),Disponibilité de matière grasse en quantité (g/personne/jour),Disponibilité de protéines en quantité (g/personne/jour),Disponibilité intérieure,Exportations - Quantité,Importations - Quantité,Nourriture,Pertes,Production,Semences,Traitement,Variation de stock
0,Afghanistan,Abats Comestible,animale,0.0,0.0,5.0,1.72,0.2,0.77,53000000.0,0.0,0.0,53000000.0,0.0,53000000.0,0.0,0.0,0.0
1,Afghanistan,"Agrumes, Autres",vegetale,0.0,0.0,1.0,1.29,0.01,0.02,41000000.0,2000000.0,40000000.0,39000000.0,2000000.0,3000000.0,0.0,0.0,0.0
2,Afghanistan,Aliments pour enfants,vegetale,0.0,0.0,1.0,0.06,0.01,0.03,2000000.0,0.0,2000000.0,2000000.0,0.0,0.0,0.0,0.0,0.0
3,Afghanistan,Ananas,vegetale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Afghanistan,Bananes,vegetale,0.0,0.0,4.0,2.7,0.02,0.05,82000000.0,0.0,82000000.0,82000000.0,0.0,0.0,0.0,0.0,0.0


In [887]:
foodAvailibilityKcal = foodAvailibility[{"Zone","Disponibilité alimentaire (Kcal/personne/jour)"}]

In [888]:
foodAvailibilityKcal.head()

Unnamed: 0,Zone,Disponibilité alimentaire (Kcal/personne/jour)
0,Afghanistan,5.0
1,Afghanistan,1.0
2,Afghanistan,1.0
3,Afghanistan,0.0
4,Afghanistan,4.0


In [889]:
foodAvailibilityKcalGroupByZone = foodAvailibilityKcal.groupby(by="Zone").sum()

In [890]:
foodAvailibilityKcalGroupByZone.head()

Unnamed: 0_level_0,Disponibilité alimentaire (Kcal/personne/jour)
Zone,Unnamed: 1_level_1
Afghanistan,2087.0
Afrique du Sud,3020.0
Albanie,3188.0
Algérie,3293.0
Allemagne,3503.0


In [891]:
foodAvailibilityKcalGroupByZone = foodAvailibilityKcalGroupByZone.astype({"Disponibilité alimentaire (Kcal/personne/jour)": int})

In [892]:
foodAvailibilityKcalGroupByZone.head()

Unnamed: 0_level_0,Disponibilité alimentaire (Kcal/personne/jour)
Zone,Unnamed: 1_level_1
Afghanistan,2087
Afrique du Sud,3020
Albanie,3188
Algérie,3293
Allemagne,3503


In [893]:
foodAvailibilityKcalGroupByZone.sum()

Disponibilité alimentaire (Kcal/personne/jour)    495442
dtype: int64

In [894]:
foodAvailibilityKcalGroupByZone.shape

(175, 1)

In [895]:
apportKcalJournalierPays = foodAvailibilityKcalGroupByZone["Disponibilité alimentaire (Kcal/personne/jour)"]

In [896]:
def proportion() :
    for elt in apportKcalJournalierPays:
        if elt <= 2500:
            print(1)
        else:
            print(0)

In [897]:
proportion().sum()

1
0
0
0
0
1
1
0
0
0
0
0
0
0
1
0
0
0
0
1
0
1
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
1
0
1
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
1
1
1
1
0
0
0
1
0
1
0
0
1
0
0
0
0
1
1
1
0
0
1
1
0
0
0
0
0
0
0
0
0
1
1
1
0
0
1
0
1
1


AttributeError: 'NoneType' object has no attribute 'sum'