# Project 3
----

### Summary
This notebook shows the process we followed to clean, explore and analyze the data set 
that contains information about the __Food Prices__. Also, contains the variables of interest for the project. 

However to make the final visualizations, we included only a small set of these variables.

----

In [1]:
# Dependencies
import pandas as pd

In [10]:
# Use Pandas to read data
food_df = pd.read_csv("./Resources/Prices_E_Americas.csv", encoding="ISO-8859-1", low_memory=False) 

# Show the number of records
food_df.count()

Area Code       5376
Area            5376
Item Code       5376
Item            5376
Element Code    5376
Element         5376
Unit            5376
Y1991           3033
Y1991F           210
Y1992           3192
Y1992F           318
Y1993           3315
Y1993F           420
Y1994           3387
Y1994F           462
Y1995           3576
Y1995F           496
Y1996           3787
Y1996F           523
Y1997           3847
Y1997F           502
Y1998           3946
Y1998F           508
Y1999           4032
Y1999F           490
Y2000           4110
Y2000F           463
Y2001           4122
Y2001F           487
Y2002           4077
                ... 
Y2003           4119
Y2003F           373
Y2004           3840
Y2004F           613
Y2005           3885
Y2005F           586
Y2006           3870
Y2006F           553
Y2007           3945
Y2007F           574
Y2008           4049
Y2008F           697
Y2009           3710
Y2009F           562
Y2010           3857
Y2010F           463
Y2011        

In [11]:
# Show the results
food_df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1991,Y1991F,Y1992,...,Y2013,Y2013F,Y2014,Y2014F,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F
0,8,Antigua and Barbuda,486,Bananas,5530,Producer Price (LCU/tonne),LCU,,,,...,,,,,,,,,,
1,8,Antigua and Barbuda,486,Bananas,5531,Producer Price (SLC/tonne),SLC,,,,...,,,,,,,,,,
2,8,Antigua and Barbuda,486,Bananas,5532,Producer Price (USD/tonne),USD,,,,...,,,,,,,,,,
3,8,Antigua and Barbuda,414,"Beans, green",5530,Producer Price (LCU/tonne),LCU,,,,...,,,,,,,,,,
4,8,Antigua and Barbuda,414,"Beans, green",5531,Producer Price (SLC/tonne),SLC,,,,...,,,,,,,,,,


In [22]:
food_df.dtypes

Area Code         int64
Area             object
Item Code         int64
Item             object
Element Code      int64
Element          object
Unit             object
Y1991           float64
Y1991F           object
Y1992           float64
Y1992F           object
Y1993           float64
Y1993F           object
Y1994           float64
Y1994F           object
Y1995           float64
Y1995F           object
Y1996           float64
Y1996F           object
Y1997           float64
Y1997F           object
Y1998           float64
Y1998F           object
Y1999           float64
Y1999F           object
Y2000           float64
Y2000F           object
Y2001           float64
Y2001F           object
Y2002           float64
                 ...   
Y2003           float64
Y2003F           object
Y2004           float64
Y2004F           object
Y2005           float64
Y2005F           object
Y2006           float64
Y2006F           object
Y2007           float64
Y2007F           object
Y2008           

In [23]:
mex_df = food_df.loc[(food_df["Area"] == 'Mexico') & (food_df["Unit"] == 'USD')]
#LCU - Local Currency Unit
mex_df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1991,Y1991F,Y1992,...,Y2013,Y2013F,Y2014,Y2014F,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F
2858,138,Mexico,800,Agave fibres nes,5532,Producer Price (USD/tonne),USD,82.8,,84.0,...,,,,,,,,,,
2861,138,Mexico,711,"Anise, badian, fennel, coriander",5532,Producer Price (USD/tonne),USD,882.2,,1909.3,...,2038.5,,1669.9,,1628.7,,1147.7,,1170.5,
2864,138,Mexico,515,Apples,5532,Producer Price (USD/tonne),USD,403.9,,287.2,...,389.0,,441.3,,363.5,,348.2,,461.0,
2867,138,Mexico,526,Apricots,5532,Producer Price (USD/tonne),USD,,,,...,905.7,F,834.2,F,405.1,,340.7,,323.9,
2870,138,Mexico,366,Artichokes,5532,Producer Price (USD/tonne),USD,988.3,,965.8,...,681.0,,847.5,,449.6,,467.6,,542.0,


In [29]:
# Verify that there are not more missing values
percent_missing = mex_df.isnull().sum() * 100 / len(mex_df)
missing_value_df = pd.DataFrame({'percent_missing': percent_missing})
missing_value_df.sort_values('percent_missing', inplace=True, ascending=False)
missing_value_df

Unnamed: 0,percent_missing
Y2017F,100.000000
Y1991F,100.000000
Y1996F,98.496241
Y1992F,98.496241
Y1994F,97.744361
Y2016F,97.744361
Y2015F,97.744361
Y2000F,97.744361
Y1998F,97.744361
Y1999F,96.992481


In [24]:
can_df = food_df.loc[(food_df["Area"] == 'Canada') & (food_df["Unit"] == 'USD')]
can_df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1991,Y1991F,Y1992,...,Y2013,Y2013F,Y2014,Y2014F,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F
992,33,Canada,515,Apples,5532,Producer Price (USD/tonne),USD,247.0,,221.7,...,553.0,,475.2,,414.8,,429.4,,471.8,
995,33,Canada,526,Apricots,5532,Producer Price (USD/tonne),USD,807.4,,614.7,...,1657.1,,1317.0,,1328.9,,1426.6,,1459.1,
998,33,Canada,367,Asparagus,5532,Producer Price (USD/tonne),USD,1840.8,,1981.4,...,3479.0,,3425.4,,3133.3,,3105.6,,3516.5,
1001,33,Canada,44,Barley,5532,Producer Price (USD/tonne),USD,64.6,,62.9,...,237.3,,169.0,,174.1,,166.3,,148.8,
1004,33,Canada,176,"Beans, dry",5532,Producer Price (USD/tonne),USD,255.7,,285.4,...,817.5,,793.6,,726.4,,656.7,,622.5,


In [None]:
percent_missing = can_df.isnull().sum() * 100 / len(can_df)
missing_value_df = pd.DataFrame({'percent_missing': percent_missing})
missing_value_df.sort_values('percent_missing', inplace=True, ascending=False)
missing_value_df

In [25]:
usa_df = food_df.loc[(food_df["Area"] == 'United States of America') & (food_df["Unit"] == 'USD')]
usa_df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1991,Y1991F,Y1992,...,Y2013,Y2013F,Y2014,Y2014F,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F
4679,231,United States of America,221,"Almonds, with shell",5532,Producer Price (USD/tonne),USD,1571.0,,1716.0,...,7077.0,,8818.0,,6900.0,,5379.0,,,
4682,231,United States of America,515,Apples,5532,Producer Price (USD/tonne),USD,395.0,,300.0,...,668.0,,567.0,,741.0,,705.0,,,
4685,231,United States of America,526,Apricots,5532,Producer Price (USD/tonne),USD,407.0,,356.0,...,812.0,,902.0,,1113.0,,990.0,,,
4688,231,United States of America,226,Areca nuts,5532,Producer Price (USD/tonne),USD,,,,...,1918.0,,1918.0,,2138.0,,2205.0,,,
4691,231,United States of America,366,Artichokes,5532,Producer Price (USD/tonne),USD,,,869.0,...,1345.0,,1270.0,,1936.0,,1737.0,,1543.0,


In [None]:
percent_missing = usa_df.isnull().sum() * 100 / len(usa_df)
missing_value_df = pd.DataFrame({'percent_missing': percent_missing})
missing_value_df.sort_values('percent_missing', inplace=True, ascending=False)
missing_value_df

In [26]:
# Count the rows by country
mex_df.groupby('Area')['Item'].value_counts()

Area    Item                                        
Mexico  Agave fibres nes                                1
        Anise, badian, fennel, coriander                1
        Apples                                          1
        Apricots                                        1
        Artichokes                                      1
        Asparagus                                       1
        Avocados                                        1
        Bananas                                         1
        Bananas non-Cavendish                           1
        Barley                                          1
        Beans, dry                                      1
        Beans, green                                    1
        Beeswax                                         1
        Berries nes                                     1
        Blueberries                                     1
        Broad beans, horse beans, dry                   1
        Cabbages an

In [30]:
# Get information for the graph - totals by variable
mex_df.groupby(['Item']).agg({i:'value_counts' for i in mex_df.columns[9:102]})

Unnamed: 0,Unnamed: 1,Y1992,Y1992F,Y1993,Y1993F,Y1994,Y1994F,Y1995,Y1995F,Y1996,Y1996F,...,Y2013,Y2013F,Y2014,Y2014F,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F
Agave fibres nes,84.0,1.0,,,,,,,,,,...,,,,,,,,,,
"Anise, badian, fennel, coriander",1909.3,1.0,,,,,,,,,,...,,,,,,,,,,
Apples,287.2,1.0,,,,,,,,,,...,,,,,,,,,,
Artichokes,965.8,1.0,,,,,,,,,,...,,,,,,,,,,
Asparagus,2297.3,1.0,,,,,,,,,,...,,,,,,,,,,
Avocados,602.6,1.0,,,,,,,,,,...,,,,,,,,,,
Bananas,186.4,1.0,,,,,,,,,,...,,,,,,,,,,
Barley,224.6,1.0,,,,,,,,,,...,,,,,,,,,,
"Beans, dry",732.2,1.0,,,,,,,,,,...,,,,,,,,,,
"Beans, green",370.3,1.0,,,,,,,,,,...,,,,,,,,,,
