In [1]:
# Dependencies and Setup
import pandas as pd

# Files to load
CQI_merged_data = 'Resources/merged_data_cleaned.csv'
ICO_disappearances = 'Resources/disappearance.csv'
ICO_domestic_consumption = 'Resources/domestic-consumption.csv'

# Read CSV files into Dataframe
merged_data_df = pd.read_csv(CQI_merged_data)
disappearances_df = pd.read_csv(ICO_disappearances)
domestic_consumption_df = pd.read_csv(ICO_domestic_consumption)

In [2]:
merged_data_df.head(5)

Unnamed: 0.1,Unnamed: 0,Species,Owner,Country.of.Origin,Farm.Name,Lot.Number,Mill,ICO.Number,Company,Altitude,...,Color,Category.Two.Defects,Expiration,Certification.Body,Certification.Address,Certification.Contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters
0,0,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0
1,1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0
2,2,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,...,,0,"May 31st, 2011",Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.0,1800.0,1700.0
3,3,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,...,Green,2,"March 25th, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.0,2200.0,2000.0
4,4,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0


In [3]:
# verify data types
merged_data_df.dtypes

# only load needed data points
merged_data_df = merged_data_df[['Species','Owner','Country.of.Origin', 'Farm.Name', 'Aroma', 'Flavor', 'Aftertaste', 'Acidity', 'Body', 'Balance', 'Uniformity', 'Clean.Cup', 'Sweetness', 'Cupper.Points', 'Total.Cup.Points']]

#rename columns
merged_data_df = merged_data_df.rename(columns={"Country.of.Origin": "Country of Origin", "Farm.Name": "Farm Name", "Clean.Cup": "Clean Cup", "Cupper.Points": "Cupper Points", "Total.Cup.Points": "Total Cup Points"})

# drop any columns that are all 0
merged_data_df = merged_data_df.loc[:, (merged_data_df != 0).any(axis=0)]

merged_data_df.head(5)

Unnamed: 0,Species,Owner,Country of Origin,Farm Name,Aroma,Flavor,Aftertaste,Acidity,Body,Balance,Uniformity,Clean Cup,Sweetness,Cupper Points,Total Cup Points
0,Arabica,metad plc,Ethiopia,metad plc,8.67,8.83,8.67,8.75,8.5,8.42,10.0,10.0,10.0,8.75,90.58
1,Arabica,metad plc,Ethiopia,metad plc,8.75,8.67,8.5,8.58,8.42,8.42,10.0,10.0,10.0,8.58,89.92
2,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",8.42,8.5,8.42,8.42,8.33,8.42,10.0,10.0,10.0,9.25,89.75
3,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,8.17,8.58,8.42,8.42,8.5,8.25,10.0,10.0,10.0,8.67,89.0
4,Arabica,metad plc,Ethiopia,metad plc,8.25,8.5,8.25,8.5,8.42,8.33,10.0,10.0,10.0,8.58,88.83


In [4]:
disappearances_df.head(5)

Unnamed: 0,disappearance,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Austria,1340.123985,1302.08597,1212.333964,1335.000982,1086.218996,1011.769996,1075.788001,1087.079597,1104.738197,...,885.715848,902.854329,1117.345643,1269.019458,1249.0,,,,,
1,Belgium,,,,,,,,,,...,934.292881,870.665179,934.468749,914.636559,1245.0,,,,,
2,Belgium/Luxembourg,1124.109984,708.15698,696.372979,789.436984,958.21499,1073.014984,1038.637071,886.263764,1309.406477,...,,,,,,,,,,
3,Bulgaria,101.782998,16.761001,174.878012,326.955006,352.764006,393.408007,246.474995,274.918003,324.160004,...,409.275903,395.357991,359.519751,375.941251,419.0,,,,,
4,Croatia,,,134.545004,131.89,163.862001,288.481005,304.468998,361.396003,319.73,...,365.637529,369.838593,367.153143,360.36412,387.0,,,,,


In [5]:
# verify data types
disappearances_df.dtypes

# rename first column to Country
disappearances_df = disappearances_df.rename(columns={'disappearance': 'Country'})

# drop null values
disappearances_df = disappearances_df.dropna()

# only load last 10 years of data
disappearances_df = disappearances_df[['Country', '2009','2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']]

# pivot years to rows
disappearances_df = disappearances_df.transpose()
disappearances_df = disappearances_df.reset_index(drop=False)
new_header = disappearances_df.iloc[0] #grab the first row for the header
disappearances_df = disappearances_df[1:] #take the data less the header row
disappearances_df.columns = new_header 
disappearances_df = disappearances_df.rename(columns={'Country': 'Year'})

# drop any columns that are all 0
disappearances_df = disappearances_df.loc[:, (disappearances_df != 0).any(axis=0)]

disappearances_df.head(5)

Unnamed: 0,Year,Japan,Norway,Switzerland,Tunisia,USA
1,2009,7130.4,715.239,965.545,288.727,21436.0
2,2010,7192.4,745.7,1011.81,301.474,21783.5
3,2011,7014.85,785.127,1034.97,415.374,22043.9
4,2012,7130.95,723.367,1047.08,421.136,22231.7
5,2013,7435.0,763.0,1123.0,429.0,23417.0


In [6]:
domestic_consumption_df.head(5)

Unnamed: 0,domestic_consumption,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Angola,20.0,30.0,35.0,20.0,25.0,10.0,20.0,40.0,30.0,...,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
1,Bolivia (Plurinational State of),25.0,27.0,27.5,28.5,29.5,30.5,31.5,32.5,33.0,...,46.0,47.5,49.0,50.5,52.0,53.5,55.0,57.0,58.5,60.0
2,Brazil,8200.0,8500.0,8900.0,9100.0,9300.0,10100.0,11000.0,11500.0,12200.0,...,18390.0,19132.0,19720.0,20330.0,20085.0,20333.0,20508.0,21225.0,21997.0,22250.0
3,Burundi,2.0,1.6,1.7,1.91,2.0,2.0,2.0,2.0,2.0,...,1.399,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
4,Ecuador,350.0,350.0,350.0,350.0,350.0,350.0,300.0,300.0,300.0,...,150.0,150.0,150.0,150.0,155.0,155.0,155.0,155.0,155.0,155.0


In [7]:
# verify data types
domestic_consumption_df.dtypes

# rename first column to Country
domestic_consumption_df = domestic_consumption_df.rename(columns={'domestic_consumption': 'Country'})

# drop null values
domestic_consumption_df = domestic_consumption_df.dropna()

# only load last 10 years of data
domestic_consumption_df = domestic_consumption_df[['Country', '2009','2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']]

# pivot years to rows
domestic_consumption_df = domestic_consumption_df.transpose()
domestic_consumption_df = domestic_consumption_df.reset_index(drop=False)
new_header = domestic_consumption_df.iloc[0] #grab the first row for the header
domestic_consumption_df = domestic_consumption_df[1:] #take the data less the header row
domestic_consumption_df.columns = new_header 
domestic_consumption_df = domestic_consumption_df.rename(columns={'Country': 'Year'})

# drop any columns that are all 0
domestic_consumption_df = domestic_consumption_df.loc[:, (domestic_consumption_df != 0).any(axis=0)]

domestic_consumption_df.head(5)

Unnamed: 0,Year,Angola,Bolivia (Plurinational State of),Brazil,Burundi,Ecuador,Indonesia,Madagascar,Malawi,Papua New Guinea,...,Panama,Sierra Leone,Sri Lanka,Thailand,Togo,Trinidad & Tobago,Uganda,Venezuela,Viet Nam,Yemen
1,2009,30,46.0,18390,1.399,150,3333,467,1,2.0,...,67,5,30,700,2.0,11,188.7,1650,1208,130
2,2010,30,47.5,19132,2.0,150,3333,467,1,1.43,...,67,5,35,775,2.0,10,204.0,1650,1583,130
3,2011,30,49.0,19720,2.0,150,3667,450,1,1.236,...,67,5,35,1100,0.21,10,210.0,1650,1650,130
4,2012,30,50.5,20330,2.0,150,3900,430,1,1.711,...,67,5,35,1130,0.242,10,216.0,1650,1825,130
5,2013,30,52.0,20085,2.0,155,4250,410,1,2.039,...,67,5,35,1200,0.238,10,220.8,1650,2000,130
