In [1]:
import numpy as np
from netCDF4 import Dataset
import glob
import matplotlib.pyplot as plt
import scipy.stats as sp
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import cos, asin, sqrt, pi

#import warnings
#warnings.filterwarnings('ignore')

import sys
sys.path.insert(1, '../Modules/')
from ML_models_regressors import clf_LR, clf_SVR, clf_DT, clf_RF, clf_neigh, clf_AB, clf_MLP, clf_LGBM
from seasonal_forecasts import *

In [2]:
clf=clf_LR
mod="LR"

output_file = f"Output/DDHWSF_Forecasts_{mod}_19792021.csv"

init="May" # forecast "initialisation" month
trg="MJJ" # forecast target month


In [3]:
# Solutions files for each grid point #
files=sorted(glob.glob("Output/optimisation_output.csv"))
print ("Number of grid points: ",len(files))

Number of grid points:  1


In [4]:
### Open solutions and extract best solution ###

# Lists for best solutions #

nevals=[] # number of evaluations 
cv_best=[] # best cross-validation/training score
test_best=[] # test score corresponding to cv_best
sols_best=[] # predictors correspondin to cv_best

for file in files:
    #print (file[-9:-4])
    sol_file_av = pd.read_csv(file, index_col=None, sep=' ', header=0)#[:20]
    if sol_file_av.shape[0]>0:
        nevals.append(sol_file_av.shape[0])
        sols_best.append(np.fromstring(sol_file_av.Sol[sol_file_av.sort_values(by=['CV'],ascending=True).index[0]].replace('[', '').replace(']', '').replace('\n', ''), dtype=float, sep=' '))
        cv_best.append(sol_file_av.CV[sol_file_av.sort_values(by=['CV'],ascending=True).index[0]])
        test_best.append(sol_file_av.Test[sol_file_av.sort_values(by=['CV'],ascending=True).index[0]])
    else:
        print ("Empty file - no solutions")


In [5]:
### Open HW target dataset ###

dataset=Dataset("Output/NumberHWdays_past2k_Cluj-Napoca.nc",'r')
target_past2k=dataset['NumberHWDays'][:]

dataset=Dataset("Output/NumberHWdays_ERA5_Cluj-Napoca.nc",'r') 
target_ERA5=dataset['NumberHWDays'][1979-1940:2021-1940]
  
#===============================#

pred_dataframe_era5 = pd.read_csv('Predictors_dataset_ERA5_weekly.csv', index_col=0)

pred_dataframe_past2k = pd.read_csv('Predictors_dataset_past2k_weekly.csv', index_col=0)

pred_dataframe=pd.concat([pred_dataframe_past2k,pred_dataframe_era5])

# Convert ERA5 predictor to past2k units
# Soil Moisture kg/m2 , ERA5 - m3/s3 (divide by 0.1m, divide by 1000 kg.m3, times by 0.7 = divide by 70)
pred_dataframe['smEurope_cluster1']['1979-01-01':]=(pred_dataframe['smEurope_cluster1']['1979-01-01':].values)*70
pred_dataframe['smEurope_cluster2']['1979-01-01':]=(pred_dataframe['smEurope_cluster2']['1979-01-01':].values)*70
pred_dataframe['smEurope_cluster3']['1979-01-01':]=(pred_dataframe['smEurope_cluster3']['1979-01-01':].values)*70
pred_dataframe['smEurope_cluster4']['1979-01-01':]=(pred_dataframe['smEurope_cluster4']['1979-01-01':].values)*70
pred_dataframe['smEurope_cluster5']['1979-01-01':]=(pred_dataframe['smEurope_cluster5']['1979-01-01':].values)*70

# SIC Arctic
# past2k - percentage , ERA5 - proportion 
pred_dataframe['sicArctic_cluster1']['1979-01-01':]=pred_dataframe['sicArctic_cluster1']['1979-01-01':].values*100
pred_dataframe['sicArctic_cluster2']['1979-01-01':]=pred_dataframe['sicArctic_cluster2']['1979-01-01':].values*100
pred_dataframe['sicArctic_cluster3']['1979-01-01':]=pred_dataframe['sicArctic_cluster3']['1979-01-01':].values*100
pred_dataframe['sicArctic_cluster4']['1979-01-01':]=pred_dataframe['sicArctic_cluster4']['1979-01-01':].values*100
pred_dataframe['sicArctic_cluster5']['1979-01-01':]=pred_dataframe['sicArctic_cluster5']['1979-01-01':].values*100

l=42 # (length of period: 1950-2022)

In [6]:
remove_co2=True

preds=forecast(target_past2k, target_ERA5, sols_best[0], clf, pred_dataframe, remove_co2=True)
print (preds)
saver(output_file,preds[1],l)

('LinearRegression', array([ 4.38852912,  2.16353778,  6.80066067,  4.74404305, 11.90068475,
        8.14156729,  7.06632814,  3.87474861,  5.01801922, 14.31489484,
        4.79892366, 11.13270093,  4.65979454,  9.08541449,  3.94975005,
        4.74042149,  5.51727131,  6.2208489 , 10.7881366 ,  7.13534234,
        9.48664615, 11.74181037,  4.61538962,  5.62371493, 15.27600022,
        9.5567436 ,  9.62906527, 10.91329812, 15.05162398, 13.55527152,
       12.74272923,  4.85176735, 13.55220792,  9.7723232 ,  7.08502428,
       12.1406066 , 12.66468034, 11.70345232, 13.68363235, 11.86235011,
       14.67341543, 21.39269134]))
Saved predictions with metadata to Output/DDHWSF_Forecasts_LR_19792021.csv
