# Vegetables Primary

http://www.fao.org/statistics/en

FAOSTAT provides access to over 3 million time-series and cross sectional data relating to food and agriculture. FAOSTAT contains data for 200 countries and more than 200 primary products and inputs in its core data set.

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = Path.cwd()                                          # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
df = pd.read_csv('Vegetables Primary.csv')
df

Unnamed: 0,Country or Area,Element,Year,Unit,Value,Value Footnotes
0,Afghanistan,Area harvested,2018.0,ha,81082.0,A
1,Afghanistan,Area harvested,2017.0,ha,95976.0,A
2,Afghanistan,Area harvested,2016.0,ha,159315.0,A
3,Afghanistan,Area harvested,2015.0,ha,103546.0,A
4,Afghanistan,Area harvested,2014.0,ha,99962.0,A
...,...,...,...,...,...,...
39178,Zimbabwe,Production,1962.0,tonnes,83324.0,A
39179,Zimbabwe,Production,1961.0,tonnes,80322.0,A
39180,fnSeqID,Footnote,,,,
39181,A,"Aggregate, may include official, semi-official...",,,,


# Head and Tail

In [4]:
df = df[:39180]                                   # select index position 0-39179
df

Unnamed: 0,Country or Area,Element,Year,Unit,Value,Value Footnotes
0,Afghanistan,Area harvested,2018.0,ha,81082.0,A
1,Afghanistan,Area harvested,2017.0,ha,95976.0,A
2,Afghanistan,Area harvested,2016.0,ha,159315.0,A
3,Afghanistan,Area harvested,2015.0,ha,103546.0,A
4,Afghanistan,Area harvested,2014.0,ha,99962.0,A
...,...,...,...,...,...,...
39175,Zimbabwe,Production,1965.0,tonnes,89451.0,A
39176,Zimbabwe,Production,1964.0,tonnes,86242.0,A
39177,Zimbabwe,Production,1963.0,tonnes,84393.0,A
39178,Zimbabwe,Production,1962.0,tonnes,83324.0,A


In [5]:
df['Year'] = df['Year'].astype(int)                # convert year to integer
df

Unnamed: 0,Country or Area,Element,Year,Unit,Value,Value Footnotes
0,Afghanistan,Area harvested,2018,ha,81082.0,A
1,Afghanistan,Area harvested,2017,ha,95976.0,A
2,Afghanistan,Area harvested,2016,ha,159315.0,A
3,Afghanistan,Area harvested,2015,ha,103546.0,A
4,Afghanistan,Area harvested,2014,ha,99962.0,A
...,...,...,...,...,...,...
39175,Zimbabwe,Production,1965,tonnes,89451.0,A
39176,Zimbabwe,Production,1964,tonnes,86242.0,A
39177,Zimbabwe,Production,1963,tonnes,84393.0,A
39178,Zimbabwe,Production,1962,tonnes,83324.0,A


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39180 entries, 0 to 39179
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Country or Area  39180 non-null  object 
 1   Element          39180 non-null  object 
 2   Year             39180 non-null  int32  
 3   Unit             39180 non-null  object 
 4   Value            39180 non-null  float64
 5   Value Footnotes  39180 non-null  object 
dtypes: float64(1), int32(1), object(4)
memory usage: 1.6+ MB


In [7]:
df.rename(columns={'Country or Area': 'country_or_area'}, inplace=True) 

df.columns = [col.lower() for col in df.columns]
df.head()   

Unnamed: 0,country_or_area,element,year,unit,value,value footnotes
0,Afghanistan,Area harvested,2018,ha,81082.0,A
1,Afghanistan,Area harvested,2017,ha,95976.0,A
2,Afghanistan,Area harvested,2016,ha,159315.0,A
3,Afghanistan,Area harvested,2015,ha,103546.0,A
4,Afghanistan,Area harvested,2014,ha,99962.0,A


In [8]:
df_sub = df[['country_or_area', 'element', 'year', 'unit', 'value']]
df_sub

Unnamed: 0,country_or_area,element,year,unit,value
0,Afghanistan,Area harvested,2018,ha,81082.0
1,Afghanistan,Area harvested,2017,ha,95976.0
2,Afghanistan,Area harvested,2016,ha,159315.0
3,Afghanistan,Area harvested,2015,ha,103546.0
4,Afghanistan,Area harvested,2014,ha,99962.0
...,...,...,...,...,...
39175,Zimbabwe,Production,1965,tonnes,89451.0
39176,Zimbabwe,Production,1964,tonnes,86242.0
39177,Zimbabwe,Production,1963,tonnes,84393.0
39178,Zimbabwe,Production,1962,tonnes,83324.0


In [9]:
df_sub.describe(include='object')

Unnamed: 0,country_or_area,element,unit
count,39180,39180,39180
unique,248,3,3
top,Afghanistan,Production,tonnes
freq,174,13062,13062


In [10]:
df_sub['country_or_area'].value_counts()

Afghanistan              174
Northern Africa          174
Namibia                  174
Nauru                    174
Nepal                    174
                        ... 
Serbia and Montenegro     42
Serbia                    39
Montenegro                39
South Sudan               21
Sudan                     21
Name: country_or_area, Length: 248, dtype: int64

In [11]:
df_sub['element'].value_counts()

Production        13062
Area harvested    13059
Yield             13059
Name: element, dtype: int64

In [12]:
df_sub['unit'].value_counts()

tonnes    13062
ha        13059
hg/ha     13059
Name: unit, dtype: int64

# Descriptive Statistics

In [13]:
df_sub['value'].describe()

count    3.918000e+04
mean     3.969821e+06
std      3.434244e+07
min      1.000000e+00
25%      2.743000e+04
50%      1.038865e+05
75%      3.074938e+05
max      1.088839e+09
Name: value, dtype: float64

# Dataframe Grouping

In [14]:
cnt_area_grp = df_sub.groupby('country_or_area')
cnt_area_grp.head()

Unnamed: 0,country_or_area,element,year,unit,value
0,Afghanistan,Area harvested,2018,ha,81082.0
1,Afghanistan,Area harvested,2017,ha,95976.0
2,Afghanistan,Area harvested,2016,ha,159315.0
3,Afghanistan,Area harvested,2015,ha,103546.0
4,Afghanistan,Area harvested,2014,ha,99962.0
...,...,...,...,...,...
39006,Zimbabwe,Area harvested,2018,ha,34082.0
39007,Zimbabwe,Area harvested,2017,ha,33683.0
39008,Zimbabwe,Area harvested,2016,ha,33224.0
39009,Zimbabwe,Area harvested,2015,ha,32636.0


In [15]:
grpd_cnty_stats = df_sub.groupby('country_or_area').agg(
                                total_area = pd.NamedAgg('value', 'sum'))

grpd_cnty_stats['total_area'] = np.int64(grpd_cnty_stats['total_area'])
grpd_cnty_stats

Unnamed: 0_level_0,total_area
country_or_area,Unnamed: 1_level_1
Afghanistan,50459041
Africa,2379223464
Albania,33120672
Algeria,131346811
American Samoa,3031452
...,...
World,31987642121
Yemen,27300230
Yugoslav SFR,67563139
Zambia,21058739


In [16]:
grpd_cnty_stats['total_area'].sort_values(ascending=False).head(20)           # top 20 countries/area with the highest area harvested

country_or_area
World                                      31987642121
Asia                                       21097955854
Eastern Asia                               13688466543
China                                      12299100483
China, mainland                            12163200217
Low Income Food Deficit Countries           5528106999
Europe                                      5087994500
Southern Asia                               4470370904
India                                       3617228303
European Union                              3463937953
Americas                                    3302121461
Net Food Importing Developing Countries     2542528821
Africa                                      2379223464
Eastern Europe                              2161214749
Northern America                            1813528819
Southern Europe                             1753438549
United States of America                    1701194297
South-eastern Asia                          13512

In [17]:
cnt_area_grp.get_group('World')

Unnamed: 0,country_or_area,element,year,unit,value
38391,World,Area harvested,2018,ha,57883608.0
38392,World,Area harvested,2017,ha,57375777.0
38393,World,Area harvested,2016,ha,57075509.0
38394,World,Area harvested,2015,ha,56776521.0
38395,World,Area harvested,2014,ha,55324328.0
...,...,...,...,...,...
38560,World,Production,1965,tonnes,202758068.0
38561,World,Production,1964,tonnes,199994150.0
38562,World,Production,1963,tonnes,197459315.0
38563,World,Production,1962,tonnes,196718210.0


In [18]:
cnt_area_grp.get_group('Asia')

Unnamed: 0,country_or_area,element,year,unit,value
1647,Asia,Area harvested,2018,ha,41569624.0
1648,Asia,Area harvested,2017,ha,41165005.0
1649,Asia,Area harvested,2016,ha,40659114.0
1650,Asia,Area harvested,2015,ha,39888437.0
1651,Asia,Area harvested,2014,ha,39365783.0
...,...,...,...,...,...
1816,Asia,Production,1965,tonnes,95744092.0
1817,Asia,Production,1964,tonnes,91213958.0
1818,Asia,Production,1963,tonnes,94349866.0
1819,Asia,Production,1962,tonnes,96790984.0


In [19]:
cnt_area_grp.get_group('Africa')

Unnamed: 0,country_or_area,element,year,unit,value
174,Africa,Area harvested,2018,ha,8951918.0
175,Africa,Area harvested,2017,ha,8789304.0
176,Africa,Area harvested,2016,ha,8939916.0
177,Africa,Area harvested,2015,ha,9445882.0
178,Africa,Area harvested,2014,ha,8506530.0
...,...,...,...,...,...
343,Africa,Production,1965,tonnes,13855321.0
344,Africa,Production,1964,tonnes,13531554.0
345,Africa,Production,1963,tonnes,13051201.0
346,Africa,Production,1962,tonnes,12586847.0


In [20]:
cnt_area_grp.get_group('Europe')

Unnamed: 0,country_or_area,element,year,unit,value
12114,Europe,Area harvested,2018,ha,3595238.0
12115,Europe,Area harvested,2017,ha,3616084.0
12116,Europe,Area harvested,2016,ha,3633149.0
12117,Europe,Area harvested,2015,ha,3715632.0
12118,Europe,Area harvested,2014,ha,3698455.0
...,...,...,...,...,...
12283,Europe,Production,1965,tonnes,64271489.0
12284,Europe,Production,1964,tonnes,67192125.0
12285,Europe,Production,1963,tonnes,62637185.0
12286,Europe,Production,1962,tonnes,58864846.0


In [21]:
cnt_area_grp.get_group('Americas')

Unnamed: 0,country_or_area,element,year,unit,value
870,Americas,Area harvested,2018,ha,3588651.0
871,Americas,Area harvested,2017,ha,3633672.0
872,Americas,Area harvested,2016,ha,3671137.0
873,Americas,Area harvested,2015,ha,3552926.0
874,Americas,Area harvested,2014,ha,3578021.0
...,...,...,...,...,...
1039,Americas,Production,1965,tonnes,27668118.0
1040,Americas,Production,1964,tonnes,26869296.0
1041,Americas,Production,1963,tonnes,26280423.0
1042,Americas,Production,1962,tonnes,27381328.0


In [22]:
cnt_area_grp.get_group('Least Developed Countries')

Unnamed: 0,country_or_area,element,year,unit,value
19050,Least Developed Countries,Area harvested,2018,ha,4842266.0
19051,Least Developed Countries,Area harvested,2017,ha,4700358.0
19052,Least Developed Countries,Area harvested,2016,ha,4700909.0
19053,Least Developed Countries,Area harvested,2015,ha,4606169.0
19054,Least Developed Countries,Area harvested,2014,ha,4444550.0
...,...,...,...,...,...
19219,Least Developed Countries,Production,1965,tonnes,6917255.0
19220,Least Developed Countries,Production,1964,tonnes,6779608.0
19221,Least Developed Countries,Production,1963,tonnes,6573707.0
19222,Least Developed Countries,Production,1962,tonnes,6386004.0


In [23]:
grpd_cnty_stats['total_area'].sort_values(ascending=True).head(20)           # top 20 countries/area with the lowest area harvested

country_or_area
Samoa                                642262
Wallis and Futuna Islands           1507239
Montserrat                          1813183
Brunei Darussalam                   1863914
New Caledonia                       1938930
Eritrea                             2101264
Montenegro                          2119006
Cayman Islands                      2282884
Saint Pierre and Miquelon           2480099
Tonga                               2591105
Micronesia (Federated States of)    2758254
American Samoa                      3031452
Bhutan                              3071933
Pacific Islands Trust Territory     3163450
Timor-Leste                         3169710
Antigua and Barbuda                 3299693
Polynesia                           3487039
Gambia                              3539169
Mauritania                          3749866
Nauru                               3917425
Name: total_area, dtype: int64

In [24]:
cnt_area_grp.get_group('Samoa')

Unnamed: 0,country_or_area,element,year,unit,value
30057,Samoa,Area harvested,2018,ha,1140.0
30058,Samoa,Area harvested,2017,ha,1135.0
30059,Samoa,Area harvested,2016,ha,1130.0
30060,Samoa,Area harvested,2015,ha,1125.0
30061,Samoa,Area harvested,2014,ha,1122.0
...,...,...,...,...,...
30226,Samoa,Production,1965,tonnes,107.0
30227,Samoa,Production,1964,tonnes,106.0
30228,Samoa,Production,1963,tonnes,105.0
30229,Samoa,Production,1962,tonnes,104.0


In [25]:
cnt_area_grp.get_group('Wallis and Futuna Islands')

Unnamed: 0,country_or_area,element,year,unit,value
37695,Wallis and Futuna Islands,Area harvested,2018,ha,247.0
37696,Wallis and Futuna Islands,Area harvested,2017,ha,246.0
37697,Wallis and Futuna Islands,Area harvested,2016,ha,246.0
37698,Wallis and Futuna Islands,Area harvested,2015,ha,245.0
37699,Wallis and Futuna Islands,Area harvested,2014,ha,244.0
...,...,...,...,...,...
37864,Wallis and Futuna Islands,Production,1965,tonnes,338.0
37865,Wallis and Futuna Islands,Production,1964,tonnes,333.0
37866,Wallis and Futuna Islands,Production,1963,tonnes,328.0
37867,Wallis and Futuna Islands,Production,1962,tonnes,323.0


In [26]:
cnt_area_grp.get_group('Montserrat')

Unnamed: 0,country_or_area,element,year,unit,value
22779,Montserrat,Area harvested,2018,ha,231.0
22780,Montserrat,Area harvested,2017,ha,228.0
22781,Montserrat,Area harvested,2016,ha,225.0
22782,Montserrat,Area harvested,2015,ha,224.0
22783,Montserrat,Area harvested,2014,ha,221.0
...,...,...,...,...,...
22948,Montserrat,Production,1965,tonnes,150.0
22949,Montserrat,Production,1964,tonnes,155.0
22950,Montserrat,Production,1963,tonnes,155.0
22951,Montserrat,Production,1962,tonnes,165.0


In [27]:
cnt_area_grp.get_group('Brunei Darussalam')

Unnamed: 0,country_or_area,element,year,unit,value
4674,Brunei Darussalam,Area harvested,2018,ha,6187.0
4675,Brunei Darussalam,Area harvested,2017,ha,6514.0
4676,Brunei Darussalam,Area harvested,2016,ha,6723.0
4677,Brunei Darussalam,Area harvested,2015,ha,5653.0
4678,Brunei Darussalam,Area harvested,2014,ha,7130.0
...,...,...,...,...,...
4843,Brunei Darussalam,Production,1965,tonnes,3800.0
4844,Brunei Darussalam,Production,1964,tonnes,3500.0
4845,Brunei Darussalam,Production,1963,tonnes,3200.0
4846,Brunei Darussalam,Production,1962,tonnes,3000.0


In [28]:
cnt_area_grp.get_group('New Caledonia')

Unnamed: 0,country_or_area,element,year,unit,value
24345,New Caledonia,Area harvested,2018,ha,4271.0
24346,New Caledonia,Area harvested,2017,ha,4466.0
24347,New Caledonia,Area harvested,2016,ha,4097.0
24348,New Caledonia,Area harvested,2015,ha,5021.0
24349,New Caledonia,Area harvested,2014,ha,4434.0
...,...,...,...,...,...
24514,New Caledonia,Production,1965,tonnes,2500.0
24515,New Caledonia,Production,1964,tonnes,2200.0
24516,New Caledonia,Production,1963,tonnes,2000.0
24517,New Caledonia,Production,1962,tonnes,2000.0


In [29]:
cnt_area_grp.get_group('Eritrea')

Unnamed: 0,country_or_area,element,year,unit,value
11607,Eritrea,Area harvested,2018,ha,13833.0
11608,Eritrea,Area harvested,2017,ha,14843.0
11609,Eritrea,Area harvested,2016,ha,14726.0
11610,Eritrea,Area harvested,2015,ha,14559.0
11611,Eritrea,Area harvested,2014,ha,13998.0
...,...,...,...,...,...
11680,Eritrea,Production,1997,tonnes,30000.0
11681,Eritrea,Production,1996,tonnes,28000.0
11682,Eritrea,Production,1995,tonnes,30000.0
11683,Eritrea,Production,1994,tonnes,34000.0


In [30]:
cnt_area_grp.get_group('Montenegro').head()

Unnamed: 0,country_or_area,element,year,unit,value
22740,Montenegro,Area harvested,2018,ha,1738.0
22741,Montenegro,Area harvested,2017,ha,1732.0
22742,Montenegro,Area harvested,2016,ha,1720.0
22743,Montenegro,Area harvested,2015,ha,1571.0
22744,Montenegro,Area harvested,2014,ha,1349.0


In [31]:
cnt_area_grp.get_group('Cayman Islands')

Unnamed: 0,country_or_area,element,year,unit,value
6240,Cayman Islands,Area harvested,2018,ha,28.0
6241,Cayman Islands,Area harvested,2017,ha,27.0
6242,Cayman Islands,Area harvested,2016,ha,27.0
6243,Cayman Islands,Area harvested,2015,ha,27.0
6244,Cayman Islands,Area harvested,2014,ha,23.0
...,...,...,...,...,...
6394,Cayman Islands,Production,1970,tonnes,8.0
6395,Cayman Islands,Production,1969,tonnes,8.0
6396,Cayman Islands,Production,1968,tonnes,7.0
6397,Cayman Islands,Production,1967,tonnes,7.0


In [32]:
cnt_area_grp.get_group('Saint Pierre and Miquelon')

Unnamed: 0,country_or_area,element,year,unit,value
29787,Saint Pierre and Miquelon,Area harvested,2018,ha,7.0
29788,Saint Pierre and Miquelon,Area harvested,2017,ha,7.0
29789,Saint Pierre and Miquelon,Area harvested,2016,ha,7.0
29790,Saint Pierre and Miquelon,Area harvested,2015,ha,6.0
29791,Saint Pierre and Miquelon,Area harvested,2014,ha,6.0
...,...,...,...,...,...
29878,Saint Pierre and Miquelon,Production,1989,tonnes,15.0
29879,Saint Pierre and Miquelon,Production,1988,tonnes,10.0
29880,Saint Pierre and Miquelon,Production,1987,tonnes,3.0
29881,Saint Pierre and Miquelon,Production,1986,tonnes,6.0


In [33]:
cnt_area_grp.get_group('Tonga')

Unnamed: 0,country_or_area,element,year,unit,value
34923,Tonga,Area harvested,2018,ha,7036.0
34924,Tonga,Area harvested,2017,ha,6982.0
34925,Tonga,Area harvested,2016,ha,6928.0
34926,Tonga,Area harvested,2015,ha,6902.0
34927,Tonga,Area harvested,2014,ha,6806.0
...,...,...,...,...,...
35092,Tonga,Production,1965,tonnes,3750.0
35093,Tonga,Production,1964,tonnes,3600.0
35094,Tonga,Production,1963,tonnes,3500.0
35095,Tonga,Production,1962,tonnes,3380.0
