In [1]:
import io
import boto3
import pandas as pd

def read_datasets():
    # Read three datasets stored in the Amazon S3 bucket
    bucket = "faostat-ml"
    file_name = "filtered_files/Emissions_Totals_E_All_Data_(Normalized).csv"
    s3_client = boto3.client("s3")
    obj = s3_client.get_object(Bucket=bucket, Key=file_name)
    df_emission = pd.read_csv(io.BytesIO(obj['Body'].read()))
    df_emission.drop(df_emission.columns[df_emission.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

    #fix the feature names
    df_emission = df_emission.rename(columns={'Item': 'EmissionItem','Value': 'EmissionValue','Element': 'EmissionElement','Unit': 'EmissionUnit'})
    print(df_emission.head(5))

    file_name = "filtered_files/Production_Crops_Livestock_E_All_Data_(Normalized).csv"
    obj = s3_client.get_object(Bucket=bucket, Key=file_name)
    df_prod = pd.read_csv(io.BytesIO(obj['Body'].read()))
    df_prod.drop(df_prod.columns[df_prod.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
    df_prod.drop('Unit', axis=1, inplace=True)
    print(df_prod.head(5))


    file_name = "filtered_files/Forestry_E_All_Data_(Normalized).csv"
    obj = s3_client.get_object(Bucket=bucket, Key=file_name)
    df_forest = pd.read_csv(io.BytesIO(obj['Body'].read()),encoding='latin-1')
    df_forest.drop(df_forest.columns[df_forest.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
    print(df_forest.head(5))
    return df_emission, df_prod, df_forest

In [2]:
def pre_processing():
    remove_rows_df = pre_process_df.copy()
    
    # Removes area, source and emission unit that place no significant role
    remove_rows_df.drop('Area', axis=1, inplace=True)
    remove_rows_df.drop('Source', axis=1, inplace=True)
    remove_rows_df.drop('EmissionUnit', axis=1, inplace=True)
    
    # Remove redudant instance
    remove_rows_df = remove_rows_df[remove_rows_df.Element != "Area harvested"]
    remove_rows_df = remove_rows_df[remove_rows_df.Element != "Production"]
    remove_rows_df = remove_rows_df[remove_rows_df.Element != "Producing Animals/Slaughtered"]
    remove_rows_df = remove_rows_df[remove_rows_df.EmissionElement != "Indirect emissions (N2O)"]
    remove_rows_df = remove_rows_df[remove_rows_df.EmissionElement != "Direct emissions (N2O)"]
    remove_rows_df.index = remove_rows_df.Year
    remove_rows_df.drop('Year', axis=1, inplace=True)

    #identify partial string to look for
    discard = ['from']
    remove_rows_df = remove_rows_df[~remove_rows_df.EmissionElement.str.contains('|'.join(discard))]

    # Combine two features
    remove_rows_df["Emission"] = remove_rows_df["EmissionItem"] + str("_") + remove_rows_df["EmissionElement"]
    remove_rows_df.drop('EmissionItem', axis=1, inplace=True)
    remove_rows_df.drop('EmissionElement', axis=1, inplace=True)

    # Remove redudant instance
    remove_rows_df = remove_rows_df[remove_rows_df.Element != 'Import Value']
    remove_rows_df = remove_rows_df[remove_rows_df.Element != 'Export Value']
    
    emission_list = list(pre_process_df.EmissionItem.unique())

    # Create pivot table for production items + forestry products based on year and item
    df_item = remove_rows_df.pivot_table(index=['Year'], 
                columns=['Item'], values='Value')
    
    # Remove columns that have atleast one NaN value since it would affect the forecast
    sum = df_item.isnull().sum(axis = 0)
    for items in sum.iteritems():
        if(items[1]>0):
            df_item.drop(items[0], axis=1, inplace=True)
    nan_cols = [i for i in df_item.columns if df_item[i].isnull().any()]

    # Create pivot table for emissions
    df_emi = remove_rows_df.pivot_table(index=['Year'], 
                columns=['Emission'], values='EmissionValue')
    # Remove columns that have atleast one NaN value since it would affect the forecast
    sum = df_emi.isnull().sum(axis = 0)
    for items in sum.iteritems():
        if(items[1]>0):
            df_emi.drop(items[0], axis=1, inplace=True)
    nan_cols = [i for i in df_emi.columns if df_emi[i].isnull().any()]
    
    display(df_item.head(5))
    display(df_emi.head(5))
    return df_item, df_emi, emission_list

In [3]:
# Perform granger causation test - to check the influence of one variable over another

def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    import numpy as np
    from statsmodels.tsa.stattools import grangercausalitytests
    maxlag=12
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [4]:
# Perform ADF test to check if each series is stationary or not

def adfuller_test(series, signif=0.05, name='', verbose=False):
    i = 0
    from statsmodels.tsa.api import VAR
    from statsmodels.tsa.stattools import adfuller
    from statsmodels.tools.eval_measures import rmse, aic
    non_stationary = []
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')
    
    for key,val in r[4].items():
        i = i + 1
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.")

In [5]:
# Revert back the differencing to get the forecast to original scale

def invert_transformation(df_train, df_forecast, second_diff=False):
    df_fc = df_forecast.copy()
    columns = df_train.columns
    for col in columns:        
        # Roll back 2nd Diff
        if second_diff:
            df_fc[str(col)+'_1d'] = (df_train[col].iloc[-1]-df_train[col].iloc[-2]) + df_fc[str(col)+'_2d'].cumsum()
        # Roll back 1st Diff
        df_fc[str(col)+'_forecast'] = df_train[col].iloc[-1] + df_fc[str(col)+'_1d'].cumsum()
    return df_fc

In [6]:
# Perform stationary test and differencing


""" Note: Comment out the 'for loop' when adf fuller test results need to be displayed"""

def stationary(df):
    nobs = 9
    df_train, df_test = df[0:-nobs], df[-nobs:]
    
    # ADF Test on each column
    #for name, column in df_train.iteritems():
        #adfuller_test(column, name=column.name)
        #print('\n')
        
    # 1st difference
    df_differenced = df_train.diff().dropna()
    #for name, column in df_differenced.iteritems():
        #adfuller_test(column, name=column.name)
        #print('\n')
        
    # Second Differencing
    df_differenced = df_differenced.diff().dropna()   
    #for name, column in df_differenced.iteritems():
        #adfuller_test(column, name=column.name)
        #print('\n')
        
    return df_differenced, df_train, df_test

In [14]:
# The function that performs forecasting

mae =[]
mse = []
sqr = []
    
def forecasting(df_item, df_emi, emission_list):
    import warnings
    import pandas as pd
    from numpy import sqrt 
    import plotly.graph_objs as go
    from statsmodels.tsa.api import VAR
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    warnings.simplefilter(action='ignore', category=FutureWarning)
    col = df_item.columns
    pandas_df = pd.DataFrame()
    nobs = 9

    # iterate through every item - production and forestry product - for the country/area specified
    for i in col:
        selected_columns = df_item[i]

        new_df = selected_columns.copy()
        new_merged_df = pd.merge(new_df, df_emi, on=['Year'])

        for j in emission_list:
            emission = new_merged_df.filter(regex=j)
            col = list(emission.columns) 
            new_merged_df[j] = new_merged_df[col].sum(axis=1)

        pandas_df = new_merged_df[new_merged_df.columns.intersection(emission_list)]
        pandas_df[str(new_merged_df.columns[0])] = new_merged_df[str(new_merged_df.columns[0])]
        pandas_df = pandas_df.loc[:, (pandas_df != 0).any(axis=0)]
         
        """ Note: comment out if granger test needs to be performed"""
        #print("Granger's causality test:")
        #display(grangers_causation_matrix(pandas_df, variables = pandas_df.columns))        
        
        historical_split1 = pandas_df.iloc[:50,:]
        historical_split2 = pandas_df.iloc[50:,:]
    
        df_differenced,df_train,_ = stationary(historical_split1)
        model = VAR(df_differenced)
        model_fitted = model.fit(6)

        # Get the lag order
        lag_order = model_fitted.k_ar

        # Input data for forecasting
        forecast_input = df_differenced.values[-lag_order:]
        fc = model_fitted.forecast(y=forecast_input, steps=nobs)
        df_forecast = pd.DataFrame(fc, index=historical_split1.index[-nobs:], columns=historical_split1.columns + '_2d')

        df_results = invert_transformation(df_train, df_forecast, second_diff=True)
        d = historical_split1.tail(nobs)
        d.reset_index(inplace = True)


        range_year = pd.date_range(start = str(d.Year.iloc[-1]), periods = (len(d)+1), freq = 'A')
        range_year = range_year.year
        year = pd.DataFrame({'Year': range_year})
        d = d.append(year)
        year_forecast = d['Year'].iat[-1]

        d.set_index('Year', inplace = True)
        d = d.tail(nobs)
            
        df_results.index = d.index
        mean_old = historical_split1[i].mean()
        recent_forecast = df_results[i+"_forecast"]
        recent_forecast = recent_forecast.iat[-1]

        #combining predicted and real data set
        combine = pd.concat([df_results[i+"_forecast"], historical_split2[i]], axis=1)
        combine = combine.round(decimals=2)
        combine = combine.reset_index()

        combine[i+"_Unscaled"] = combine[i]
        combine[i+ "_Forecast_Unscaled"] = combine[i+"_forecast"]

        combine[i]=(combine[i]-combine[i].min())/(combine[i].max()-combine[i].min())
        combine[i+"_forecast"]=(combine[i+"_forecast"]-combine[i+"_forecast"].min())/(combine[i+"_forecast"].max()-combine[i+"_forecast"].min())


        display(combine)
        null_check = combine.isnull().sum().sum()
        if null_check > 0:
            continue
        #Forecast metrics

        mae.append(mean_absolute_error(combine[i].values, combine[i+"_forecast"].values))
        mse.append(mean_squared_error(combine[i].values, combine[i+"_forecast"].values))
        sqr.append(sqrt(mean_squared_error(combine[i].values, combine[i+"_forecast"].values)))
        
        fig = go.Figure()
        n = df_results.index[0]
        fig.add_trace(go.Scatter(x = pandas_df.index[-200:], y = pandas_df[str(i)][-200:], marker = dict(color ="red"), name = "Actual close price"))
        fig.add_trace(go.Scatter(x = df_results.index, y = df_results[str(i)+'_forecast'], marker=dict(color = "green"), name = "Future prediction"))
        fig.update_xaxes(showline = True, linewidth = 2, linecolor='black', mirror = True, showspikes = True,)
        fig.update_yaxes(showline = True, linewidth = 2, linecolor='black', mirror = True, showspikes = True,)
        fig.update_layout(title= "9 Years Forecast", yaxis_title = str(i), hovermode = "x", hoverdistance = 100) #, # Distance to show hover label of data point spikedistance = 1000,shapes = [dict( x0 = n, x1 = n, y0 = 0, y1 = 1, xref = 'x', yref = 'paper', line_width = 2)], annotations = [dict(x = n, y = 0.05, xref = 'x', yref = 'paper', showarrow = False, xanchor = 'left', text = 'Prediction')])
        fig.update_layout(autosize = False, width = 1000, height = 400,)
        fig.show()

In [8]:
def time_series_prediction():
    pre_process_df = pd.DataFrame()
    combined_df = pd.DataFrame()

    import numpy as np
    import seaborn as sb
    import ipywidgets as widgets
    import matplotlib.pyplot as plt
    from sklearn import preprocessing
    from sklearn.preprocessing import LabelEncoder
    from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider

    def unique_sorted_values_plus_ALL(array):
        unique = array.unique().tolist()
        unique.sort()
        return unique
    output_area = widgets.Output()
    print("Area:")
    dropdown_area = widgets.Dropdown(options=unique_sorted_values_plus_ALL(df_prod.Area))
    def dropdown_area_eventhandler(change):
        output_area.clear_output()
        with output_area:
            if(change.new):
                country_df_prod = df_prod[df_prod.Area == change.new]
                country_df_forest = df_forest[df_forest.Area == change.new]
                country_df_emission = df_emission[df_emission.Area == change.new]

                prod_forest_df = pd.concat([country_df_prod, country_df_forest], ignore_index=True)
                global pre_process_df, combined_df
                pre_process_df = pd.merge(prod_forest_df, country_df_emission, on=['Year','Area'])
    dropdown_area.observe(dropdown_area_eventhandler, names='value')
    display(dropdown_area)

In [9]:
# Run the functions from this module onwards for time series prediction

df_emission, df_prod, df_forest = read_datasets()

          Area          EmissionItem  EmissionElement  Year      Source  \
0  Afghanistan  Enteric Fermentation  Emissions (CH4)  1961  FAO TIER 1   
1  Afghanistan  Enteric Fermentation  Emissions (CH4)  1962  FAO TIER 1   
2  Afghanistan  Enteric Fermentation  Emissions (CH4)  1963  FAO TIER 1   
3  Afghanistan  Enteric Fermentation  Emissions (CH4)  1964  FAO TIER 1   
4  Afghanistan  Enteric Fermentation  Emissions (CH4)  1965  FAO TIER 1   

  EmissionUnit  EmissionValue  
0   kilotonnes       240.6831  
1   kilotonnes       245.3106  
2   kilotonnes       255.8285  
3   kilotonnes       259.0650  
4   kilotonnes       265.5980  
          Area                 Item         Element  Year   Value
0  Afghanistan  Almonds, with shell  Area harvested  1975     0.0
1  Afghanistan  Almonds, with shell  Area harvested  1976  5900.0
2  Afghanistan  Almonds, with shell  Area harvested  1977  6000.0
3  Afghanistan  Almonds, with shell  Area harvested  1978  6000.0
4  Afghanistan  Almonds, wi

In [10]:
time_series_prediction()

Area:


Dropdown(options=('Afghanistan', 'Africa', 'Albania', 'Algeria', 'Americas', 'Angola', 'Antigua and Barbuda', …

The below cell outputs consist of Granger causality matrix, ADF fuller test results for each feature and prediction graphs

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.warn('DelftStack')
warnings.warn('Do not show this message')

display(pre_process_df.head(5))
df_item, df_emi, emission_list = pre_processing()
mae,mse,sqr = forecasting(df_item, df_emi, emission_list)

Unnamed: 0,Area,Item,Element,Year,Value,EmissionItem,EmissionElement,Source,EmissionUnit,EmissionValue
0,Asia,Agave fibres nes,Area harvested,1961,2600.0,Enteric Fermentation,Emissions (CH4),FAO TIER 1,kilotonnes,18825.8193
1,Asia,Agave fibres nes,Area harvested,1961,2600.0,Enteric Fermentation,Emissions (CO2eq) from CH4 (AR5),FAO TIER 1,kilotonnes,527122.94
2,Asia,Agave fibres nes,Area harvested,1961,2600.0,Enteric Fermentation,Emissions (CO2eq) (AR5),FAO TIER 1,kilotonnes,527122.94
3,Asia,Agave fibres nes,Area harvested,1961,2600.0,Manure Management,Emissions (CH4),FAO TIER 1,kilotonnes,1473.8451
4,Asia,Agave fibres nes,Area harvested,1961,2600.0,Manure Management,Emissions (N2O),FAO TIER 1,kilotonnes,91.8648


Item,Agave fibres nes,"Almonds, with shell","Anise, badian, fennel, coriander",Apples,Apricots,Areca nuts,Artichokes,Asparagus,Asses,Avocados,...,Veneer sheets,Vetches,"Walnuts, with shell",Watermelons,Wheat,Wood charcoal,Wood fuel,Wood pulp,Wood-based panels,Yams
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961,8462.0,17073.0,5554.0,64850.0,34471.0,7127.0,119101.0,25416.0,16372019.0,45409.0,...,24038.5,8658.0,21995.0,117565.0,7486.0,115850.0,159700.0,117750.0,310265.0,87255.0
1962,8194.0,17537.0,5514.0,68007.0,33381.0,6885.0,113918.0,26957.0,15063037.0,45074.0,...,37608.0,8501.0,22457.0,119400.0,8448.0,123725.0,115925.0,141050.0,356068.5,79543.0
1963,8247.0,15168.0,5506.0,73024.0,27217.0,6983.0,111058.0,28299.0,15183296.0,45968.0,...,46673.0,8722.0,22462.0,116409.0,8532.0,112000.0,73849.5,254350.0,411407.0,99741.0
1964,9619.0,16589.0,5610.0,73626.0,33213.0,6915.0,110407.0,27962.0,15577427.0,47689.0,...,70192.5,9472.0,22330.0,120710.0,8431.0,87882.0,90300.0,280600.0,566880.5,108969.0
1965,9268.0,15175.0,5638.0,74239.0,31811.0,6760.0,96002.0,28821.0,16116872.0,49053.0,...,93340.5,9276.0,22652.0,123218.0,9755.0,98850.0,75583.5,272381.5,623491.0,110151.0


Emission,Agricultural Soils_Emissions (CO2eq) (AR5),Agricultural Soils_Emissions (N2O),Burning - Crop residues_Emissions (CH4),Burning - Crop residues_Emissions (CO2eq) (AR5),Burning - Crop residues_Emissions (N2O),Crop Residues_Emissions (CO2eq) (AR5),Crop Residues_Emissions (N2O),Enteric Fermentation_Emissions (CH4),Enteric Fermentation_Emissions (CO2eq) (AR5),IPCC Agriculture_Emissions (CH4),...,Manure Management_Emissions (CO2eq) (AR5),Manure Management_Emissions (N2O),Manure applied to Soils_Emissions (CO2eq) (AR5),Manure applied to Soils_Emissions (N2O),Manure left on Pasture_Emissions (CO2eq) (AR5),Manure left on Pasture_Emissions (N2O),Rice Cultivation_Emissions (CH4),Rice Cultivation_Emissions (CO2eq) (AR5),Synthetic Fertilizers_Emissions (CO2eq) (AR5),Synthetic Fertilizers_Emissions (N2O)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961,165579.9232,624.8299,306.4455,10685.8668,7.9449,30568.4824,115.3528,18825.8193,527122.94,36811.6988,...,65611.8249,91.8648,23479.2353,88.6009,100294.035,378.4681,16205.5889,453756.4904,11238.1705,42.4082
1962,168860.1411,637.2081,307.5017,10722.6995,7.9723,31839.0365,120.1473,18708.9777,523851.3764,37214.2253,...,65268.4716,90.5605,23383.5959,88.24,100319.4493,378.564,16723.8195,468266.9451,13318.0594,50.2568
1963,173352.1618,654.1591,312.4578,10895.5195,8.1008,32732.7848,123.5199,18878.996,528611.8875,37446.955,...,67706.1641,94.1558,24155.0408,91.1511,101541.6972,383.1762,16728.5412,468399.1545,14922.6389,56.3118
1964,180384.2549,680.6953,324.7525,11324.2394,8.4195,34063.2791,128.5407,19358.3426,542033.592,38686.3553,...,71906.2975,100.7026,25449.897,96.0373,103608.2048,390.9744,17388.2565,486871.1813,17262.874,65.1429
1965,186646.5882,704.3267,324.0673,11300.3478,8.4017,33802.2397,127.5556,19673.3413,550853.5573,39011.8179,...,75078.6113,106.0929,26413.5986,99.674,105313.3967,397.409,17337.1234,485439.4554,21117.3532,79.6881


Unnamed: 0,Year,Agave fibres nes_forecast,Agave fibres nes,Agave fibres nes_Unscaled,Agave fibres nes_Forecast_Unscaled
0,2011,0.529588,1.0,10076.0,10776.63
1,2012,0.622206,0.913636,10000.0,12036.0
2,2013,0.480168,0.913636,10000.0,10104.64
3,2014,0.0,0.913636,10000.0,3575.61
4,2015,0.160242,0.860227,9953.0,5754.48
5,2016,0.815703,0.568182,9696.0,14667.05
6,2017,1.0,0.104545,9288.0,17173.01
7,2018,0.530601,0.0,9196.0,10790.41
8,2019,0.690993,0.0,9196.0,12971.32


Unnamed: 0,Year,"Almonds, with shell_forecast","Almonds, with shell","Almonds, with shell_Unscaled","Almonds, with shell_Forecast_Unscaled"
0,2011,0.916774,0.693616,19332.0,25006.99
1,2012,0.427781,0.246122,16332.0,18953.22
2,2013,0.788029,0.0,14682.0,23413.12
3,2014,0.768277,0.186158,15930.0,23168.59
4,2015,1.0,0.363514,17119.0,26037.34
5,2016,0.0,0.514618,18132.0,13657.26
6,2017,0.56792,0.65707,19087.0,20688.16
7,2018,0.281193,0.973598,21209.0,17138.45
8,2019,0.627704,1.0,21386.0,21428.29


Unnamed: 0,Year,"Anise, badian, fennel, coriander_forecast","Anise, badian, fennel, coriander","Anise, badian, fennel, coriander_Unscaled","Anise, badian, fennel, coriander_Forecast_Unscaled"
0,2011,0.433487,0.350488,8467.0,5885.49
1,2012,0.360063,0.263814,8307.0,5341.66
2,2013,0.503735,0.320693,8412.0,6405.79
3,2014,0.103987,0.372156,8507.0,3445.0
4,2015,0.392759,0.420368,8596.0,5583.83
5,2016,0.314985,0.0,7820.0,5007.79
6,2017,0.623923,0.989166,9646.0,7295.98
7,2018,0.0,1.0,9666.0,2674.81
8,2019,1.0,0.666306,9050.0,10081.44


Unnamed: 0,Year,Apples_forecast,Apples,Apples_Unscaled,Apples_Forecast_Unscaled
0,2011,1.0,0.086243,145010.0,78452.99
1,2012,0.811775,0.17366,147837.0,68899.89
2,2013,0.521856,0.209438,148994.0,54185.43
3,2014,0.750777,0.234021,149789.0,65804.03
4,2015,0.31665,0.0,142221.0,43770.49
5,2016,0.708537,0.622128,162340.0,63660.2
6,2017,0.504075,0.987847,174167.0,53282.97
7,2018,0.545436,0.844924,169545.0,55382.21
8,2019,0.0,1.0,174560.0,27699.35


Unnamed: 0,Year,Apricots_forecast,Apricots,Apricots_Unscaled,Apricots_Forecast_Unscaled
0,2011,0.540476,0.670758,64478.0,88404.59
1,2012,0.0,0.881685,69049.0,53171.37
2,2013,0.368472,0.898897,69422.0,77191.76
3,2014,0.067475,0.0,49942.0,57570.02
4,2015,0.5006,0.636611,63738.0,85805.1
5,2016,0.314993,0.564303,62171.0,73705.54
6,2017,1.0,1.0,71613.0,118360.57
7,2018,0.424919,0.673296,64533.0,80871.53
8,2019,0.777247,0.688662,64866.0,103839.48


Unnamed: 0,Year,Areca nuts_forecast,Areca nuts,Areca nuts_Unscaled,Areca nuts_Forecast_Unscaled
0,2011,0.209673,0.0,12469.0,13892.68
1,2012,0.086464,0.469666,13607.0,13478.58
2,2013,0.657834,0.120924,12762.0,15398.92
3,2014,0.889409,0.30293,13203.0,16177.23
4,2015,0.339539,1.0,14892.0,14329.15
5,2016,0.0,0.638052,14015.0,13187.98
6,2017,0.909008,0.57078,13852.0,16243.1
7,2018,1.0,0.740817,14264.0,16548.92
8,2019,0.265685,0.584812,13886.0,14080.93


Unnamed: 0,Year,Artichokes_forecast,Artichokes,Artichokes_Unscaled,Artichokes_Forecast_Unscaled
0,2011,0.145333,0.563309,93615.0,76700.88
1,2012,0.228873,0.0,85429.0,87156.1
2,2013,0.605584,0.130815,87330.0,134302.08
3,2014,0.831674,0.254129,89122.0,162597.61
4,2015,0.0,0.416804,91486.0,58512.23
5,2016,0.025422,0.478805,92387.0,61693.85
6,2017,0.585478,0.549133,93409.0,131785.75
7,2018,1.0,1.0,99961.0,183663.89
8,2019,0.011068,0.809317,97190.0,59897.47


Unnamed: 0,Year,Asparagus_forecast,Asparagus,Asparagus_Unscaled,Asparagus_Forecast_Unscaled
0,2011,0.61251,0.645731,55071.0,39565.97
1,2012,0.686381,0.541797,54606.0,40295.04
2,2013,0.803892,0.141708,52816.0,41454.83
3,2014,0.990787,0.0,52182.0,43299.4
4,2015,0.719255,0.709209,55355.0,40619.49
5,2016,1.0,0.715914,55385.0,43390.33
6,2017,0.69433,0.81046,55808.0,40373.5
7,2018,0.0,0.905677,56234.0,33520.75
8,2019,0.03689,1.0,56656.0,33884.84


Unnamed: 0,Year,Asses_forecast,Asses,Asses_Unscaled,Asses_Forecast_Unscaled
0,2011,0.221681,1.0,15236017.0,18664613.33
1,2012,0.0,0.900384,15005502.0,18177097.54
2,2013,0.055003,0.778209,14722783.0,18298059.4
3,2014,0.27644,0.771959,14708321.0,18785036.93
4,2015,0.772533,0.622614,14362732.0,19876033.41
5,2016,0.82796,0.449917,13963103.0,19997927.25
6,2017,0.632989,0.052506,13043480.0,19569153.02
7,2018,0.380248,0.0,12921978.0,19013331.27
8,2019,1.0,0.134962,13234286.0,20376274.52


Unnamed: 0,Year,Avocados_forecast,Avocados,Avocados_Unscaled,Avocados_Forecast_Unscaled
0,2011,0.0,0.084746,97004.0,105353.75
1,2012,0.242557,1.0,103376.0,136484.4
2,2013,0.146071,0.419563,99335.0,124101.08
3,2014,0.367536,0.666906,101057.0,152524.76
4,2015,0.218008,0.84559,102301.0,133333.73
5,2016,0.140981,0.0,96414.0,123447.84
6,2017,0.266877,0.214306,97906.0,139605.84
7,2018,1.0,0.666762,101056.0,233697.65
8,2019,0.506729,0.543235,100196.0,170389.33


Unnamed: 0,Year,Bananas_forecast,Bananas,Bananas_Unscaled,Bananas_Forecast_Unscaled
0,2011,0.338415,0.031901,280487.0,173292.68
1,2012,0.137816,0.0,278969.0,135852.12
2,2013,0.317211,0.198886,288433.0,169335.04
3,2014,0.965265,1.0,326554.0,290290.7
4,2015,1.0,0.797289,316908.0,296773.85
5,2016,0.767128,0.743049,314327.0,253309.61
6,2017,0.449037,0.931764,323307.0,193939.75
7,2018,0.502165,0.980876,325644.0,203855.77
8,2019,0.0,0.972344,325238.0,110129.48


Unnamed: 0,Year,Barley_forecast,Barley,Barley_Unscaled,Barley_Forecast_Unscaled
0,2011,0.300638,0.440265,18962.0,16484.79
1,2012,0.140504,0.250369,18447.0,14175.71
2,2013,0.092878,0.728614,19744.0,13488.95
3,2014,0.575863,0.0,17768.0,20453.44
4,2015,1.0,0.692847,19647.0,26569.37
5,2016,0.403916,0.889749,20181.0,17974.03
6,2017,0.0,1.0,20480.0,12149.68
7,2018,0.351453,0.450959,18991.0,17217.52
8,2019,0.573483,0.938053,20312.0,20419.13


Unnamed: 0,Year,"Bastfibres, other_forecast","Bastfibres, other","Bastfibres, other_Unscaled","Bastfibres, other_Forecast_Unscaled"
0,2011,0.353772,0.956897,15038.0,14532.74
1,2012,0.906827,1.0,15168.0,19999.1
2,2013,1.0,0.253647,12917.0,20920.02
3,2014,0.724447,0.554377,13824.0,18196.47
4,2015,0.0,0.640915,14085.0,11036.08
5,2016,0.351054,0.261936,12942.0,14505.88
6,2017,0.493027,0.0,12152.0,15909.13
7,2018,0.852648,0.268568,12962.0,19463.6
8,2019,0.451479,0.26061,12938.0,15498.47


Unnamed: 0,Year,"Beans, dry_forecast","Beans, dry","Beans, dry_Unscaled","Beans, dry_Forecast_Unscaled"
0,2011,0.860121,0.0,6827.0,7912.97
1,2012,0.451642,0.640238,7473.0,6842.44
2,2013,0.202597,0.960357,7796.0,6189.75
3,2014,0.0,0.909812,7745.0,5658.79
4,2015,0.836159,0.929633,7765.0,7850.17
5,2016,0.643456,0.375619,7206.0,7345.14
6,2017,0.992506,0.409316,7240.0,8259.92
7,2018,0.235511,0.529237,7361.0,6276.01
8,2019,1.0,1.0,7836.0,8279.56


Unnamed: 0,Year,"Beans, green_forecast","Beans, green","Beans, green_Unscaled","Beans, green_Forecast_Unscaled"
0,2011,0.765784,0.0,150894.0,87180.31
1,2012,0.953325,0.123284,154432.0,89703.31
2,2013,0.428563,0.145237,155062.0,82643.67
3,2014,0.018078,0.278068,158874.0,77121.4
4,2015,0.0,0.610008,168400.0,76878.19
5,2016,0.165824,0.620008,168687.0,79109.03
6,2017,0.108103,0.756324,172599.0,78332.5
7,2018,1.0,0.884278,176271.0,90331.23
8,2019,0.720523,1.0,179592.0,86571.41


Unnamed: 0,Year,Beef and Buffalo Meat_forecast,Beef and Buffalo Meat,Beef and Buffalo Meat_Unscaled,Beef and Buffalo Meat_Forecast_Unscaled
0,2011,0.300299,0.203125,1540.0,1512.81
1,2012,0.711458,0.421875,1554.0,1608.98
2,2013,0.640616,0.6875,1571.0,1592.41
3,2014,0.477811,0.0,1527.0,1554.33
4,2015,0.435827,0.59375,1565.0,1544.51
5,2016,0.227362,0.609375,1566.0,1495.75
6,2017,0.0,0.984375,1590.0,1442.57
7,2018,0.311971,1.0,1591.0,1515.54
8,2019,1.0,0.875,1583.0,1676.47


Unnamed: 0,Year,Beehives_forecast,Beehives,Beehives_Unscaled,Beehives_Forecast_Unscaled
0,2011,0.397331,0.0,36564559.0,28961145.52
1,2012,0.428502,0.165402,37729639.0,29227104.32
2,2013,0.66432,0.367184,39150980.0,31239164.95
3,2014,0.211329,0.560898,40515487.0,27374133.95
4,2015,0.124928,0.729272,41701503.0,26636937.99
5,2016,0.0,0.755149,41883783.0,25571019.32
6,2017,0.253107,0.846,42523732.0,27730591.25
7,2018,0.279141,0.942115,43200756.0,27952719.07
8,2019,1.0,1.0,43608496.0,34103275.84


Unnamed: 0,Year,Beeswax_forecast,Beeswax,Beeswax_Unscaled,Beeswax_Forecast_Unscaled
0,2011,0.726531,,8.0,10.14
1,2012,1.0,,8.0,10.81
2,2013,0.604082,,8.0,9.84
3,2014,0.436735,,8.0,9.43
4,2015,0.522449,,8.0,9.64
5,2016,0.95102,,8.0,10.69
6,2017,0.334694,,8.0,9.18
7,2018,0.591837,,8.0,9.81
8,2019,0.0,,8.0,8.36


Unnamed: 0,Year,Berries nes_forecast,Berries nes,Berries nes_Unscaled,Berries nes_Forecast_Unscaled
0,2011,0.384164,0.458652,78492.0,78053.03
1,2012,0.284682,1.0,83657.0,74615.71
2,2013,0.859183,0.873913,82454.0,94465.92
3,2014,0.204849,0.488104,78773.0,71857.3
4,2015,0.174665,0.177445,75809.0,70814.38
5,2016,0.724178,0.0,74116.0,89801.22
6,2017,0.982667,0.396709,77901.0,98732.56
7,2018,0.0,0.17451,75781.0,64779.35
8,2019,1.0,0.128498,75342.0,99331.45


Unnamed: 0,Year,"Broad beans, horse beans, dry_forecast","Broad beans, horse beans, dry","Broad beans, horse beans, dry_Unscaled","Broad beans, horse beans, dry_Forecast_Unscaled"
0,2011,0.73542,0.232448,19932.0,21614.5
1,2012,0.003018,0.0,19538.0,13224.36
2,2013,0.272015,0.363422,20154.0,16305.9
3,2014,0.298258,0.515634,20412.0,16606.53
4,2015,0.645238,0.297935,20043.0,20581.41
5,2016,0.0,0.234218,19935.0,13189.79
6,2017,0.929897,1.0,21233.0,23842.37
7,2018,1.0,0.650147,20640.0,24645.44
8,2019,0.307783,0.669617,20673.0,16715.65


Unnamed: 0,Year,Buckwheat_forecast,Buckwheat,Buckwheat_Unscaled,Buckwheat_Forecast_Unscaled
0,2011,0.0,1.0,9781.0,10426.23
1,2012,0.456879,0.738201,9193.0,17951.84
2,2013,0.302056,0.650935,8997.0,15401.64
3,2014,0.204289,0.150935,7874.0,13791.23
4,2015,0.781099,0.0,7535.0,23292.33
5,2016,0.91372,0.312556,8237.0,25476.84
6,2017,0.259646,0.383348,8396.0,14703.07
7,2018,0.784063,0.343277,8306.0,23341.15
8,2019,1.0,0.328139,8272.0,26898.02


Unnamed: 0,Year,Buffaloes_forecast,Buffaloes,Buffaloes_Unscaled,Buffaloes_Forecast_Unscaled
0,2011,0.046253,0.0,189533629.0,167099200.0
1,2012,0.0,0.120395,190602815.0,166369500.0
2,2013,0.322828,0.175917,191095886.0,171461900.0
3,2014,0.414516,0.322666,192399108.0,172908200.0
4,2015,0.60004,0.495158,193930946.0,175834700.0
5,2016,0.16757,0.619757,195037459.0,169012800.0
6,2017,0.43723,0.675895,195535997.0,173266500.0
7,2018,0.541456,0.841249,197004451.0,174910600.0
8,2019,1.0,1.0,198414255.0,182143800.0


Unnamed: 0,Year,Cabbages and other brassicas_forecast,Cabbages and other brassicas,Cabbages and other brassicas_Unscaled,Cabbages and other brassicas_Forecast_Unscaled
0,2011,0.70688,0.627677,305930.0,341993.84
1,2012,0.759222,0.0,299276.0,363301.94
2,2013,1.0,0.722856,306939.0,461320.44
3,2014,0.452019,1.0,309877.0,238242.41
4,2015,0.451281,0.869163,308490.0,237942.23
5,2016,0.435109,0.49052,304476.0,231358.47
6,2017,0.415195,0.317706,302644.0,223251.74
7,2018,0.0,0.793416,307687.0,54229.78
8,2019,0.746148,0.834355,308121.0,357979.6


In [None]:
# Evaluation metrics

average_mean_absolute_error = sum(mae)/len(mae)
average_mean_squared_error = sum(mse)/len(mse)
average_sqrt = sum(sqr)/len(sqr)
print('Mean absolute error:', average_mean_absolute_error)
print('Mean squared error:', average_mean_squared_error)
print('Root mean squared error:', average_sqrt)