In [1]:
#imports
import pandas as pd
import numpy as np

In [2]:
#import dataframe with relation country/regions from csv downloaded from http://www.fao.org/faostat/en/#definitions
region_country = pd.read_csv('original/FAOSTAT_data_2-24-2021.csv', usecols=['Country Group', 'Country'])
regions = list(region_country['Country Group'].unique())
europecountries = list(region_country[region_country['Country Group'] == 'Europe']['Country'])

# Crops - production

**1. all data crops**

In [3]:
#import original dataset from csv downloaded from http://www.fao.org/faostat/en/#data/QC
df = pd.read_csv('original/Production_Crops_E_All_Data.csv', encoding='latin1')
original_df = pd.DataFrame(df) #this is to be sure the original df (not just the location) is saved
original_df.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1961,Y1961F,Y1962,...,Y2015,Y2015F,Y2016,Y2016F,Y2017,Y2017F,Y2018,Y2018F,Y2019,Y2019F
0,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,ha,,,,...,14676.0,,19481.0,,19793.0,,20053.0,,29203.0,
1,2,Afghanistan,221,"Almonds, with shell",5419,Yield,hg/ha,,,,...,16521.0,Fc,16859.0,Fc,13788.0,Fc,17161.0,Fc,13083.0,Fc
2,2,Afghanistan,221,"Almonds, with shell",5510,Production,tonnes,,,,...,24246.0,,32843.0,,27291.0,,34413.0,,38205.0,
3,2,Afghanistan,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,,M,,...,25000.0,F,25787.0,Im,28398.0,Im,26725.0,Im,27562.0,Im
4,2,Afghanistan,711,"Anise, badian, fennel, coriander",5419,Yield,hg/ha,,,,...,7200.0,Fc,6982.0,Fc,6863.0,Fc,6898.0,Fc,6903.0,Fc


In [4]:
#check i those colums are redundant
print('Number of Area codes:', 
      len(df['Area Code'].unique()))

print('Number of Areas:',
      len(df.Area.unique()))

Number of Area codes: 246
Number of Areas: 246


In [5]:
#remove redundant/unuseful columns for simple analysis, remove 'Y' from years
cols=[]

for col in df.columns:
    if col.endswith('F'):
        df.drop(columns=[col], inplace= True)
    elif col.endswith('Code'):
        df.drop(columns=[col], inplace= True)
    elif col.startswith('Y'):
        cols.append(int(col.replace('Y','')))
    else:
        cols.append(col)
        
df.columns = cols
df.head()

Unnamed: 0,Area,Item,Element,Unit,1961,1962,1963,1964,1965,1966,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Afghanistan,"Almonds, with shell",Area harvested,ha,,,,,,,...,11210.0,13469.0,13490.0,14114.0,13703.0,14676.0,19481.0,19793.0,20053.0,29203.0
1,Afghanistan,"Almonds, with shell",Yield,hg/ha,,,,,,,...,49955.0,45000.0,45960.0,29910.0,19996.0,16521.0,16859.0,13788.0,17161.0,13083.0
2,Afghanistan,"Almonds, with shell",Production,tonnes,,,,,,,...,56000.0,60611.0,62000.0,42215.0,27400.0,24246.0,32843.0,27291.0,34413.0,38205.0
3,Afghanistan,"Anise, badian, fennel, coriander",Area harvested,ha,,,,,,,...,17000.0,19500.0,18500.0,18500.0,30000.0,25000.0,25787.0,28398.0,26725.0,27562.0
4,Afghanistan,"Anise, badian, fennel, coriander",Yield,hg/ha,,,,,,,...,6000.0,6414.0,6757.0,6757.0,7167.0,7200.0,6982.0,6863.0,6898.0,6903.0


In [6]:
dftemp = df.drop(columns=['Area', 'Item', 'Element', 'Unit'])
# add new column for yearly avg
df['yearly_average_1961-2019'] = dftemp.mean(axis=1)
# add new column for std of the values used for the mean, be careful to use dataframe without new added column!
df['std_1961-2019'] = dftemp.std(axis=1) 
df.head()

Unnamed: 0,Area,Item,Element,Unit,1961,1962,1963,1964,1965,1966,...,2012,2013,2014,2015,2016,2017,2018,2019,yearly_average_1961-2019,std_1961-2019
0,Afghanistan,"Almonds, with shell",Area harvested,ha,,,,,,,...,13490.0,14114.0,13703.0,14676.0,19481.0,19793.0,20053.0,29203.0,8951.688889,5452.496458
1,Afghanistan,"Almonds, with shell",Yield,hg/ha,,,,,,,...,45960.0,29910.0,19996.0,16521.0,16859.0,13788.0,17161.0,13083.0,20382.704545,8979.79167
2,Afghanistan,"Almonds, with shell",Production,tonnes,,,,,,,...,62000.0,42215.0,27400.0,24246.0,32843.0,27291.0,34413.0,38205.0,18990.933333,15382.579516
3,Afghanistan,"Anise, badian, fennel, coriander",Area harvested,ha,,,,,,,...,18500.0,18500.0,30000.0,25000.0,25787.0,28398.0,26725.0,27562.0,12087.823529,10162.35077
4,Afghanistan,"Anise, badian, fennel, coriander",Yield,hg/ha,,,,,,,...,6757.0,6757.0,7167.0,7200.0,6982.0,6863.0,6898.0,6903.0,6589.647059,485.336302


In [7]:
#save all data in 'easy-to-use' format
df.to_csv('clean/all_data_crops.csv')

#### 2. stack dataframe of all data crops

In [8]:
alldataoveryears = df.set_index(['Area', 'Item', 'Element', 'Unit']) \
        .drop(columns=['yearly_average_1961-2019', 'std_1961-2019']).stack() \
        .reset_index()
alldataoveryears = alldataoveryears.rename(columns={0:'Value', 'level_4': 'Year'})  # rename specific column
alldataoveryears.to_csv('clean/all_data_crops_over_years.csv')
alldataoveryears.head()

Unnamed: 0,Area,Item,Element,Unit,Year,Value
0,Afghanistan,"Almonds, with shell",Area harvested,ha,1975,0.0
1,Afghanistan,"Almonds, with shell",Area harvested,ha,1976,5900.0
2,Afghanistan,"Almonds, with shell",Area harvested,ha,1977,6000.0
3,Afghanistan,"Almonds, with shell",Area harvested,ha,1978,6000.0
4,Afghanistan,"Almonds, with shell",Area harvested,ha,1979,6000.0


**3. data crops for europe countries**

In [9]:
eu_data_crops = df[df.Area.isin(europecountries)].reset_index(drop=True) #when subsenting and non informative index, drop it
eu_data_crops = eu_data_crops.round(2)  #round all values in df to 2 decimal places
eu_data_crops.head()

Unnamed: 0,Area,Item,Element,Unit,1961,1962,1963,1964,1965,1966,...,2012,2013,2014,2015,2016,2017,2018,2019,yearly_average_1961-2019,std_1961-2019
0,Albania,Apples,Area harvested,ha,,,,,,,...,3719.0,3838.0,3863.0,4008.0,4230.0,4346.0,4294.0,4407.0,3339.63,1274.72
1,Albania,Apples,Yield,hg/ha,,,,,,,...,191718.0,197316.0,212426.0,228882.0,240028.0,221671.0,252387.0,240374.0,99296.09,74287.41
2,Albania,Apples,Production,tonnes,10004.0,8039.0,7931.0,8498.0,8000.0,9000.0,...,71300.0,75730.0,82060.0,91736.0,101532.0,96338.0,108375.0,105933.0,26934.61,28529.3
3,Albania,Apricots,Area harvested,ha,,,,,,,...,301.0,315.0,320.0,331.0,326.0,326.0,337.0,341.0,388.6,106.93
4,Albania,Apricots,Yield,hg/ha,,,,,,,...,147176.0,139683.0,129375.0,152417.0,159325.0,154601.0,152493.0,142962.0,68517.34,55400.62


In [10]:
eu_data_crops.to_csv('clean/eu_data_crops.csv')

#### 4. stacked data crops for europe countries

In [11]:
eudataoveryears = alldataoveryears[alldataoveryears.Area.isin(europecountries)].reset_index(drop=True)
eudataoveryears.to_csv('clean/all_data_crops_over_years.csv')
display(eudataoveryears.shape, eudataoveryears.head())

(337321, 6)

Unnamed: 0,Area,Item,Element,Unit,Year,Value
0,Albania,Apples,Area harvested,ha,1985,3600.0
1,Albania,Apples,Area harvested,ha,1986,3000.0
2,Albania,Apples,Area harvested,ha,1987,3000.0
3,Albania,Apples,Area harvested,ha,1988,2800.0
4,Albania,Apples,Area harvested,ha,1989,3500.0


#### ** 5. production**

In [12]:
#dataframe with only production data
production = (df[df.Element == 'Production']).drop(columns=['Element'])

#check if unit always tonnes
production.Unit.unique()

array(['tonnes'], dtype=object)

In [13]:
#drop 'unit' column and save
production = production.drop(columns=['Unit'])
production.to_csv('clean/production.csv')

#### **6. apple production over the years**

In [14]:
production.head()

Unnamed: 0,Area,Item,1961,1962,1963,1964,1965,1966,1967,1968,...,2012,2013,2014,2015,2016,2017,2018,2019,yearly_average_1961-2019,std_1961-2019
2,Afghanistan,"Almonds, with shell",,,,,,,,,...,62000.0,42215.0,27400.0,24246.0,32843.0,27291.0,34413.0,38205.0,18990.933333,15382.579516
5,Afghanistan,"Anise, badian, fennel, coriander",,,,,,,,,...,12500.0,12500.0,21500.0,18000.0,18005.0,19490.0,18436.0,19025.0,7933.705882,6920.749305
8,Afghanistan,Apples,15100.0,15100.0,15100.0,18400.0,20400.0,22800.0,27600.0,27900.0,...,70000.0,78597.0,89403.0,89733.0,140903.0,175000.0,217192.0,250324.0,42550.508475,47077.857625
11,Afghanistan,Apricots,32000.0,32000.0,32000.0,39200.0,43400.0,48400.0,58700.0,59400.0,...,83500.0,90000.0,90000.0,87686.0,17894.0,131816.0,109086.0,129363.0,52387.457627,22188.061657
14,Afghanistan,Barley,378000.0,378000.0,378000.0,380000.0,380000.0,375000.0,357000.0,361000.0,...,504000.0,514000.0,521000.0,403000.0,301856.0,94995.0,56781.0,123576.0,303493.355932,103313.817707


In [15]:
#dataframe for apple production over this period in all countries
apples = production[production.Item == 'Apples']\
    .set_index('Area').drop(columns=['Item', 'yearly_average_1961-2019', 'std_1961-2019']).transpose()
apples.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59 entries, 1961 to 2019
Columns: 131 entries, Afghanistan to Net Food Importing Developing Countries
dtypes: float64(131)
memory usage: 60.8+ KB


In [16]:
apples.to_csv('clean/apples.csv')
apples.head()

Area,Afghanistan,Albania,Algeria,Argentina,Armenia,Australia,Austria,Azerbaijan,Belarus,Belgium,...,Western Europe,Oceania,Australia and New Zealand,European Union (28),European Union (27),Least Developed Countries,Land Locked Developing Countries,Small Island Developing States,Low Income Food Deficit Countries,Net Food Importing Developing Countries
1961,15100.0,10004.0,15400.0,415000.0,,295034.0,456000.0,,,,...,4142800.0,369434.0,369434.0,8362520.0,8023520.0,26100.0,35300.0,530.0,310300.0,126230.0
1962,15100.0,8039.0,14000.0,397000.0,,326282.0,431000.0,,,,...,8391400.0,419772.0,419772.0,12509286.0,11926286.0,26200.0,35500.0,535.0,303400.0,121556.0
1963,15100.0,7931.0,13000.0,474000.0,,349571.0,436000.0,,,,...,6006600.0,433571.0,433571.0,10951698.0,10406698.0,26300.0,35451.0,535.0,278700.0,137660.0
1964,18400.0,8498.0,12441.0,371000.0,,367397.0,447000.0,,,,...,6310800.0,476797.0,476797.0,11735102.0,11082102.0,30650.0,38907.0,540.0,250100.0,156225.0
1965,20400.0,8000.0,13151.0,544000.0,,360002.0,222000.0,,,,...,6156400.0,462902.0,462902.0,10717120.0,10188120.0,32700.0,42220.0,540.0,243200.0,156438.0


### **7. apple production over the years in europe**

In [17]:
apples_europe = pd.DataFrame (apples)

for col in apples_europe:
    if col not in europecountries:
        apples_europe.drop(columns=col, inplace=True)

print(len(apples_europe.columns))
apples_europe.to_csv('clean/apples_eu.csv')
apples_europe.head()

43


Area,Albania,Austria,Belarus,Belgium,Belgium-Luxembourg,Bosnia and Herzegovina,Bulgaria,Croatia,Czechia,Czechoslovakia,...,Serbia and Montenegro,Slovakia,Slovenia,Spain,Sweden,Switzerland,Ukraine,United Kingdom of Great Britain and Northern Ireland,USSR,Yugoslav SFR
1961,10004.0,456000.0,,,132000.0,,272703.0,,,117138.0,...,,,,385500.0,100000.0,269800.0,,339000.0,1744000.0,343000.0
1962,8039.0,431000.0,,,145000.0,,350612.0,,,130763.0,...,,,,274500.0,161000.0,480400.0,,583000.0,1865000.0,177000.0
1963,7931.0,436000.0,,,135000.0,,289695.0,,,133985.0,...,,,,480700.0,136000.0,359600.0,,545000.0,2382000.0,280000.0
1964,8498.0,447000.0,,,189200.0,,364126.0,,,158662.0,...,,,,321600.0,183000.0,389600.0,,653000.0,2639000.0,159000.0
1965,8000.0,222000.0,,,176000.0,,299404.0,,,91630.0,...,,,,446800.0,115000.0,255400.0,,529000.0,2716000.0,135000.0


#### 8. stacked strawberry data over decades in europe

In [18]:
strawberries = eudataoveryears[eudataoveryears.Item == 'Strawberries'] \
             .reset_index(drop=True).drop(columns=['Item'])
display(strawberries.head(), strawberries.Area.unique())

Unnamed: 0,Area,Element,Unit,Year,Value
0,Albania,Area harvested,ha,2017,102.0
1,Albania,Area harvested,ha,2018,111.0
2,Albania,Area harvested,ha,2019,121.0
3,Albania,Yield,hg/ha,2017,498922.0
4,Albania,Yield,hg/ha,2018,395766.0


array(['Albania', 'Austria', 'Belarus', 'Belgium', 'Belgium-Luxembourg',
       'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Czechia',
       'Czechoslovakia', 'Denmark', 'Estonia', 'Finland', 'France',
       'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia',
       'Lithuania', 'Luxembourg', 'Malta', 'Netherlands',
       'North Macedonia', 'Norway', 'Poland', 'Portugal',
       'Republic of Moldova', 'Romania', 'Russian Federation', 'Serbia',
       'Serbia and Montenegro', 'Slovakia', 'Slovenia', 'Spain', 'Sweden',
       'Switzerland', 'Ukraine',
       'United Kingdom of Great Britain and Northern Ireland', 'USSR',
       'Yugoslav SFR'], dtype=object)

In [19]:
strawberries.to_csv('clean/strawberries_over_years_eu.csv')

#### **9. simple dataframe for apple production over the years in just 4 countries**

In [20]:
simpledf_applevalues = apples[['Brazil', 'Portugal', 'Spain', 'United Kingdom of Great Britain and Northern Ireland' ]]
simpledf_applevalues.columns = [['Br', 'Pt', 'Sp', 'UK']]
simpledf_applevalues.to_csv('clean/simpledf_applevalues.csv')

# Population 
**10. pop**

In [21]:
#import original dataset from csv downloaded from http://www.fao.org/faostat/en/#data/OA
pop = pd.read_csv('original/FAOSTAT_data_population.csv', encoding='latin1')
pop.head()

Unnamed: 0,ï»¿Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,OA,Annual population,2,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1950,1950,1000 persons,7752.118,X,International reliable sources,"UNDESA, Population Division â World Populati..."
1,OA,Annual population,2,Afghanistan,512,Total Population - Male,3010,Population - Est. & Proj.,1950,1950,1000 persons,4099.243,X,International reliable sources,"UNDESA, Population Division â World Populati..."
2,OA,Annual population,2,Afghanistan,513,Total Population - Female,3010,Population - Est. & Proj.,1950,1950,1000 persons,3652.874,X,International reliable sources,"UNDESA, Population Division â World Populati..."
3,OA,Annual population,2,Afghanistan,551,Rural population,3010,Population - Est. & Proj.,1950,1950,1000 persons,7286.991,X,International reliable sources,
4,OA,Annual population,2,Afghanistan,561,Urban population,3010,Population - Est. & Proj.,1950,1950,1000 persons,465.127,X,International reliable sources,"UNDESA, Population Division â World Urbaniza..."


In [22]:
#remove redundant/unuseful columns for simple analysis, remove 'Y' from years
cols=[]

for col in pop.columns:
    if col.startswith(('Flag', 'Domain', 'Note')):
        pop.drop(columns=[col], inplace= True)
    elif col.endswith('Item'):
        pop.drop(columns=[col], inplace= True)
    elif col.endswith('Code'):
        pop.drop(columns=[col], inplace= True)
    else:
        cols.append(col)
        
pop.columns = cols
print(pop.shape)
pop.head()

(70585, 5)


Unnamed: 0,Area,Element,Year,Unit,Value
0,Afghanistan,Total Population - Both sexes,1950,1000 persons,7752.118
1,Afghanistan,Total Population - Male,1950,1000 persons,4099.243
2,Afghanistan,Total Population - Female,1950,1000 persons,3652.874
3,Afghanistan,Rural population,1950,1000 persons,7286.991
4,Afghanistan,Urban population,1950,1000 persons,465.127


In [23]:
pop.to_csv('clean/pop.csv')

# Crop residues

**11. all crop residues**

In [24]:
#import original dataset from csv downloaded from http://www.fao.org/faostat/en/#data/GA
df2 = pd.read_csv('original/Emissions_Agriculture_Crop_Residues_E_All_Data.csv', encoding='latin1')
df2.head()

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y1961,Y1961F,Y1962,...,Y2016,Y2016F,Y2017,Y2017F,Y2018,Y2018F,Y2030,Y2030F,Y2050,Y2050F
0,2,Afghanistan,44,Barley,72392,Residues (Crop residues),kg of nutrients,5925706.0,Fc,5925706.0,...,4402174.0,Fc,1380965.0,Fc,1062870.0,Fc,8779633.0,Fc,14060710.0,Fc
1,2,Afghanistan,44,Barley,72292,Implied emission factor for N2O (Crop residues),kg N2O-N/kg N,0.0123,Fc,0.0123,...,0.0123,Fc,0.0123,Fc,0.0123,Fc,0.0122,Fc,0.0123,Fc
2,2,Afghanistan,44,Barley,72342,Direct emissions (N2O) (Crop residues),gigagrams,0.0931,Fc,0.0931,...,0.0692,Fc,0.0217,Fc,0.0167,Fc,0.138,Fc,0.221,Fc
3,2,Afghanistan,44,Barley,72352,Direct emissions (CO2eq) (Crop residues),gigagrams,28.8667,Fc,28.8667,...,21.4449,Fc,6.7273,Fc,5.1777,Fc,42.7694,Fc,68.4958,Fc
4,2,Afghanistan,44,Barley,72362,Indirect emissions (N2O) (Crop residues),gigagrams,0.021,Fc,0.021,...,0.0156,Fc,0.0049,Fc,0.0038,Fc,0.031,Fc,0.0497,Fc


In [25]:
#remove redundant/unuseful columns for simple analysis, remove 'Y' from years
cols=[]

for col in df2:
    if col.endswith('F'):
        df2.drop(columns=[col], inplace= True)
    elif col.endswith('Code'):
        df2.drop(columns=[col], inplace= True)
    elif col.startswith('Y'):
        cols.append(int(col.replace('Y','')))
    else:
        cols.append(col)
        
df2.columns = cols
df2t = df2.drop(columns=['Area', 'Item', 'Element', 'Unit'])
df2['average_1961-2019'] = df2t.mean(axis=1)
df2['std_1961-2019'] = df2t.std(axis=1)
df2.to_csv('clean/all_data_crop_residues.csv')
df2.head()

Unnamed: 0,Area,Item,Element,Unit,1961,1962,1963,1964,1965,1966,...,2013,2014,2015,2016,2017,2018,2030,2050,average_1961-2019,std_1961-2019
0,Afghanistan,Barley,Residues (Crop residues),kg of nutrients,5925706.0,5925706.0,5925706.0,5946927.0,5946927.0,5893875.0,...,6974770.0,7401796.0,5818908.0,4402174.0,1380965.0,1062870.0,8779633.0,14060710.0,4892098.0,1890840.0
1,Afghanistan,Barley,Implied emission factor for N2O (Crop residues),kg N2O-N/kg N,0.0123,0.0123,0.0123,0.0123,0.0123,0.0123,...,0.0123,0.0123,0.0123,0.0123,0.0123,0.0123,0.0122,0.0123,0.01229,3.025317e-05
2,Afghanistan,Barley,Direct emissions (N2O) (Crop residues),gigagrams,0.0931,0.0931,0.0931,0.0935,0.0935,0.0926,...,0.1096,0.1163,0.0914,0.0692,0.0217,0.0167,0.138,0.221,0.07687167,0.02971819
3,Afghanistan,Barley,Direct emissions (CO2eq) (Crop residues),gigagrams,28.8667,28.8667,28.8667,28.97,28.97,28.7116,...,33.9771,36.0573,28.3464,21.4449,6.7273,5.1777,42.7694,68.4958,23.83151,9.211096
4,Afghanistan,Barley,Indirect emissions (N2O) (Crop residues),gigagrams,0.021,0.021,0.021,0.021,0.021,0.0208,...,0.0247,0.0262,0.0206,0.0156,0.0049,0.0038,0.031,0.0497,0.01730167,0.006681533


#### **12 . crop residues in europe**

In [26]:
cropres_eu = pd.DataFrame (df2)

for i, row in cropres_eu.iterrows():
    try:
        if cropres_eu.iloc[i,0] not in europecountries:
            cropres_eu.drop(i, inplace=True)
    except:
        continue

print(cropres_eu.shape)
cropres_eu.to_csv('clean/cropresidues_europe.csv')
cropres_eu.head()

(9144, 66)


Unnamed: 0,Area,Item,Element,Unit,1961,1962,1963,1964,1965,1966,...,2013,2014,2015,2016,2017,2018,2030,2050,average_1961-2019,std_1961-2019
28,Afghanistan,Potatoes,Indirect emissions (N2O) (Crop residues),gigagrams,0.0016,0.0015,0.0016,0.0016,0.0016,0.0016,...,0.0028,0.0031,0.003,0.0042,0.0042,0.0046,0.0053,0.0094,0.00236,0.001189103
29,Afghanistan,Potatoes,Indirect emissions (CO2eq) (Crop residues),gigagrams,0.4931,0.4766,0.4843,0.492,0.4954,0.4998,...,0.8697,0.9587,0.9448,1.3046,1.3153,1.4352,1.6357,2.9023,0.731305,0.3682238
30,Afghanistan,Potatoes,Emissions (N2O) (Crop residues),gigagrams,0.0087,0.0084,0.0085,0.0086,0.0087,0.0088,...,0.0153,0.0168,0.0166,0.0229,0.0231,0.0252,0.0287,0.051,0.01285,0.006465882
31,Afghanistan,Potatoes,Emissions (CO2eq) (Crop residues),gigagrams,2.6849,2.5947,2.6368,2.6789,2.697,2.721,...,4.7352,5.2194,5.144,7.1029,7.1612,7.8137,8.9056,15.8012,3.981562,2.004742
32,Afghanistan,"Rice, paddy",Residues (Crop residues),kg of nutrients,6481699.0,6481699.0,6481699.0,7145110.0,7145110.0,6850311.0,...,7881552.0,8360951.0,6307757.0,5034382.0,4711483.0,4972493.0,10610530.0,14543650.0,6773198.0,1578952.0


# Food waste 
**13. most simple dataframe**

In [27]:
#import original dataset from csv downloaded from http://www.fao.org/sustainable-development-goals/indicators/1231/en/
foodwaste = pd.read_excel('original/Foodwaste_March_2020.xlsx', usecols=['GeoAreaName', 'Value'])
foodwaste.dropna(subset=['Value'], inplace=True)
foodwaste = foodwaste.reset_index(drop=True)
foodwaste.head()

Unnamed: 0,GeoAreaName,Value
0,Northern America (M49) and Europe (M49),15.7
1,Sub-Saharan Africa (M49),14.0
2,Latin America and the Caribbean (MDG=M49),11.6
3,Central Asia (M49) and Southern Asia (MDG=M49),20.7
4,Eastern Asia (M49) and South-eastern Asia (MDG...,7.8


In [28]:
foodwaste.shape

(9, 2)

In [29]:
foodwaste.to_csv('clean/foodwaste.csv')

## food loss
**14. all data food loss**

In [30]:
#import original dataset from csv downloaded from http://www.fao.org/platform-food-loss-waste/flw-data/en/
#looking at missing data I focus in this columns for exercises
alldatafoodloss = pd.read_csv('original/Data_all_food_loss.csv', encoding='latin1', usecols=['country', 
                            'crop', 'timepointyears','percentage_loss_of_quantity', 'activity',
                            'fsc_location1'])
print(alldatafoodloss.shape)
display(alldatafoodloss.tail())
# convert percent loss values in floats when possible (now str), when not possible set to missing value
for i, item in alldatafoodloss.percentage_loss_of_quantity.iteritems():
    try:
        alldatafoodloss.loc[i,'percentage_loss_of_quantity'] = float(item)
    except:
        alldatafoodloss.loc[i,'percentage_loss_of_quantity'] = np.nan
display(alldatafoodloss.info())

(9492, 6)


Unnamed: 0,country,crop,timepointyears,percentage_loss_of_quantity,activity,fsc_location1
9487,Zambia,Maize (corn),2007,3.3,,Farm
9488,Zambia,Maize (corn),2007,3.8,Platform drying,Farm
9489,Zambia,Maize (corn),2007,5.1,Harvesting/field drying,Harvest
9490,Zambia,Maize (corn),2007,2.2,Transport to farm,Farm
9491,Zambia,Maize (corn),2007,3.3,Farm storage,Storage


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9492 entries, 0 to 9491
Data columns (total 6 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   country                      9492 non-null   object
 1   crop                         9492 non-null   object
 2   timepointyears               9492 non-null   int64 
 3   percentage_loss_of_quantity  7972 non-null   object
 4   activity                     6647 non-null   object
 5   fsc_location1                9384 non-null   object
dtypes: int64(1), object(5)
memory usage: 445.1+ KB


None

In [31]:
alldatafoodloss.to_csv('clean/alldatafoodloss.csv')

**15. data food loss in europe**

In [32]:
eufoodloss = alldatafoodloss[alldatafoodloss.country.isin(europecountries)].reset_index(drop=True)
eufoodloss = eufoodloss[eufoodloss.fsc_location1 != 'SWS_Total'] #SWS the FAO Statistical Working System
eufoodloss.tail()

Unnamed: 0,country,crop,timepointyears,percentage_loss_of_quantity,activity,fsc_location1
362,Switzerland,Potatoes,2015,0.008,,Processing
363,Switzerland,Potatoes,2015,0.045,,Farm
364,Switzerland,Potatoes,2015,0.003,,Wholesale
365,Switzerland,Potatoes,2015,0.008,,Processing
366,Switzerland,Potatoes,2015,0.045,,Farm


In [33]:
eufoodloss.to_csv('clean/eufoodloss.csv')