In [1]:
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import numpy as np
# no warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the dataset
file_path = './input_data/TreeCoverLoss_2001-2020_ByRegion.csv'
data = pd.read_csv(file_path)

# Pivot data to create time series for each country
tree_cover_loss = data.pivot(index='Year', columns='CountryCode', values='TreeCoverLoss_ha')
gross_emissions = data.pivot(index='Year', columns='CountryCode', values='GrossEmissions_Co2_all_gases_Mg')

# Forecast for each country
def forecast_metric(metric_data, metric_name):
    forecasts = []
    for country in metric_data.columns:
        series = metric_data[country].dropna()  # Drop missing values
        if len(series) >= 3:  # Ensure there are enough data points to forecast
            # Fit the Exponential Smoothing model
            model = ExponentialSmoothing(series, trend='add', seasonal=None, seasonal_periods=None)
            fit_model = model.fit()

            # Forecast for the next 10 years (2021-2030)
            forecast_years = np.arange(2021, 2031)
            forecast = fit_model.forecast(len(forecast_years))

            # Append the forecast to the result list
            forecasts.append(pd.DataFrame({
                'Year': forecast_years,
                'CountryCode': country,
                metric_name: forecast
            }))

    return pd.concat(forecasts, ignore_index=True)

# Forecast tree cover loss for each country
tree_cover_loss_df = forecast_metric(tree_cover_loss, 'TreeCoverLoss_ha')
gross_emissions_df = forecast_metric(gross_emissions, 'GrossEmissions_Co2_all_gases_Mg')


# Combine all forecasts into a single DataFrame
forecasts_df = pd.merge(tree_cover_loss_df, gross_emissions_df, on=['Year', 'CountryCode'])


In [3]:
forecasts_df

Unnamed: 0,Year,CountryCode,TreeCoverLoss_ha,GrossEmissions_Co2_all_gases_Mg
0,2021,ABW,0.080184,5.056906e+01
1,2022,ABW,-0.018594,5.054659e+01
2,2023,ABW,-0.117372,5.052413e+01
3,2024,ABW,-0.216149,5.050166e+01
4,2025,ABW,-0.314927,5.047920e+01
...,...,...,...,...
2085,2026,ZWE,17523.292619,5.582235e+06
2086,2027,ZWE,17898.216040,5.688493e+06
2087,2028,ZWE,18273.139462,5.794750e+06
2088,2029,ZWE,18648.062884,5.901008e+06


In [4]:
# Save the results to a new CSV file
output_file_path = './output_data/prediction_ByRegion_exp_smooth.csv'
forecasts_df.to_csv(output_file_path, index=False)

# Display the output file path
output_file_path

'./output_data/prediction_ByRegion_exp_smooth.csv'