![ds4a_colombia.svg](attachment:ds4a_colombia.svg)

# Impacto de la deforestación en el regimen de caudales de los rios en Colombia (TEAM 28)

## Read individual files

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
figure(num = None, figsize = (15, 12), dpi = 80, facecolor = 'w', edgecolor = 'k')
plt.rcParams.update({'font.size': 16, 'figure.figsize': (15, 3), 
                     'figure.max_open_warning': 2000})

## Scenarios

In [None]:
scenarios = pd.read_excel('../data/matrix/Esc_Predicciones_longitudinal.xlsx')

scenarios['v_flow_mean'] = 0

scenarios = scenarios[['date', 'mc', 'v_flow_mean', 'v_loss_cover', 'v_rainfall_total', 
                       'scenario']]

scenarios.head(10)

In [None]:
temp_df = scenarios.copy()

mcs = temp_df['mc'].unique()
escen = temp_df['scenario'].unique()

for i in mcs:
    for j in escen:
        
        df_v_loss_cover = temp_df[(temp_df['mc'] == i) & 
                                  (temp_df['scenario'] == j)][['date', 'v_loss_cover']]
        df_v_loss_cover.reset_index(drop = True)
        df_v_loss_cover.set_index(keys = 'date', drop = True)
        
        df_v_rainfall_total = temp_df[(temp_df['mc'] == i) & 
                                  (temp_df['scenario'] == j)][['date', 'v_rainfall_total']]
        df_v_rainfall_total.reset_index(drop = True)
        df_v_rainfall_total.set_index(keys = 'date', drop = True)        
        
        df_v_rainfall_total.plot(title = 'Esc : ' + str(j) + ' - MC : ' + str(i))

## Model results

In [None]:
VAR = pd.read_csv('VAR_forecast_2020_2021.csv')

VAR[['year','month']] = VAR.month_year.str.split("-",expand=True)
VAR = VAR.astype({'year': float, 'month': int})

VAR['date'] = pd.to_datetime(dict(year = VAR.year, month = VAR.month, day = 1))

VAR = VAR[['date', 'year', 'month', 'basin', 'v_flow_mean_forecast', 
           'v_loss_cover', 'v_rainfall_total', 'scenario']]

VAR.rename(columns={'basin':'mc', 'v_loss_cover':'v_loss_cover_assum', 
                    'v_rainfall_total': 'v_rainfall_total_assum',
                    'v_flow_mean_forecast': 'v_flow_mean_pred'}, inplace=True)

VAR['model_type'] = 'VAR'

VAR[['tmp1', 'loss_cover_scenario', 
     'tmp2', 'climate_change_scenario',]] = VAR.scenario.str.split("_",expand=True)

VAR.drop(columns = ['tmp1', 'tmp2'], inplace = True)

VAR.head()

In [None]:
LSTM = pd.read_csv('LSTM_forecast_2020_2021.csv')

LSTM[['year','month']] = LSTM.month_year.str.split("-",expand=True)
LSTM = LSTM.astype({'year': float, 'month': int})

LSTM['date'] = pd.to_datetime(dict(year = LSTM.year, month = LSTM.month, day = 1))

LSTM = LSTM[['date', 'year', 'month', 'basin', 'v_flow_mean_forecast', 
           'v_loss_cover', 'v_rainfall_total', 'scenario']]

LSTM.rename(columns={'basin':'mc', 'v_loss_cover':'v_loss_cover_assum', 
                    'v_rainfall_total': 'v_rainfall_total_assum',
                    'v_flow_mean_forecast': 'v_flow_mean_pred'}, inplace=True)

LSTM['model_type'] = 'LSTM'

LSTM[['tmp1', 'loss_cover_scenario', 
     'tmp2', 'climate_change_scenario',]] = LSTM.scenario.str.split("_",expand=True)

LSTM.drop(columns = ['tmp1', 'tmp2'], inplace = True)

LSTM.head()

In [None]:
RFR = pd.read_csv('RFR_forecast_2020_2021.csv')

RFR['model_type'] = 'RandomForest'

RFR[['tmp1', 'loss_cover_scenario', 'tmp2', 
     'climate_change_scenario',]] = RFR.scenario.str.split("_",expand=True)

RFR['year'] = pd.DatetimeIndex(RFR['date']).year
RFR['month'] = pd.DatetimeIndex(RFR['date']).month

RFR.rename(columns = {'v_loss_cover':'v_loss_cover_assum', 
                      'v_rainfall_total': 'v_rainfall_total_assum',
                      'v_flow_mean_forecast': 'v_flow_mean_pred'}, inplace=True)

RFR.drop(columns = ['tmp1', 'tmp2'], inplace = True)

RFR = RFR[['date', 'year', 'month', 'mc', 'v_flow_mean_pred', 'v_loss_cover_assum', 
           'v_rainfall_total_assum', 'scenario', 'model_type', 'loss_cover_scenario', 
           'climate_change_scenario']]

RFR.head()

In [None]:
VECM = pd.read_csv('VECM_forecast_2020_2021.csv')

VECM['model_type'] = 'VECM'

VECM[['tmp1', 'loss_cover_scenario', 
      'tmp2', 'climate_change_scenario',]] = VECM.scenario.str.split("_",expand=True)

VECM.rename(columns = {'v_loss_cover':'v_loss_cover_assum', 
                       'v_rainfall_total': 'v_rainfall_total_assum',
                       'v_flow_mean_forecast': 'v_flow_mean_pred'}, inplace=True)

VECM.drop(columns = ['tmp1', 'tmp2'], inplace = True)

VECM.head()

In [None]:
XGB = pd.read_csv('XGB_forecast_2020_2021.csv')

XGB['model_type'] = 'XGBoost'

XGB[['tmp1', 'loss_cover_scenario', 
      'tmp2', 'climate_change_scenario',]] = XGB.scenario.str.split("_",expand=True)

XGB.rename(columns = {'v_loss_cover':'v_loss_cover_assum', 
                       'v_rainfall_total': 'v_rainfall_total_assum',
                       'v_flow_mean_forecast': 'v_flow_mean_pred'}, inplace=True)

XGB.drop(columns = ['tmp1', 'tmp2'], inplace = True)

XGB.head()

## Consolidate forecast

In [None]:
forecast = pd.concat([VAR, VECM, RFR, XGB, LSTM], axis = 0)

forecast['model_optimum'] = 1

forecast = forecast[['mc', 'date', 'year', 'month', 'model_type', 'climate_change_scenario', 
                     'loss_cover_scenario', 'model_optimum', 'v_flow_mean_pred',
                     'v_loss_cover_assum', 'v_rainfall_total_assum']]

forecast.head()

In [None]:
forecast.to_csv('../model/forecast_2020_2021.csv', index = False)