In [None]:
#Machine Learning approaches for statistical downscaling and prediction in climate.
#Amir Ebrahimi
#Amirreza70ebrahimi@gmail.com
#https://github.com/amirrez92
#https://www.linkedin.com/in/amir-ebrahimi1992/
#https://www.researchgate.net/profile/Amir-Ebrahimi-9

In [None]:
#First, import the essential libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn import metrics

In [None]:
#Choose regressors.
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [None]:
#Read observations and NCEPs in one .csv file
dat = pd.read_csv("datase.csv")

In [None]:
#Calculate the correlation between observations and NCEPs.
corr=dat.corr()
fig = plt.figure(figsize=(15,12))
r = sns.heatmap(corr, cmap='Purples' , annot=True)
r.set_title("Correlation ")
print (corr)

In [None]:
#Based on the correlation, choose your suitable NCEPs and bring their columns in X. Then bring observation column (MT) in Y.
X = dat[['a','b','s','t','z']]
Y = dat[['MT']]

In [None]:
#Define Calibration period in train and Validation period in test. Then specify the length of periods by iloc commands.
#for example, we considered days 1 to 2921 for train and days 2922 to the end for test.
X_train = X.iloc[:2922,].values
Y_train = Y.iloc[:2922,].values

X_test = X.iloc[2922:,].values
Y_test = Y.iloc[2922:,].values

In [None]:
#print.
X_train

In [None]:
#print.
X_test

In [None]:
#print.
Y_train

In [None]:
#print.
Y_test

In [None]:
#Run regressors. in Random forest, you can change n_estimators and random_state. random state can be 42 or 26.
lrregressor = LinearRegression()
lrregressor.fit(X_train, Y_train)
rfregressor = RandomForestRegressor(n_estimators = 10, random_state = 42)
rfregressor.fit(X,Y)
svmrbfregressor = SVR(kernel='rbf')
svmrbfregressor.fit(X,Y)
svmpolregressor = SVR(kernel='poly')
svmpolregressor.fit(X,Y)

In [None]:
#Statistical downscaling.
prlrtrain=lrregressor.predict(X_train)
prlrtest=lrregressor.predict(X_test)
predrftrain=rfregressor.predict(X_train)
predrftest=rfregressor.predict(X_test)
predsvmrbftrain=svmrbfregressor.predict(X_train)
predsvmrbftest=svmrbfregressor.predict(X_test)
predsvmpoltrain=svmpolregressor.predict(X_train)
predsvmpoltest=svmpolregressor.predict(X_test)

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_train, prlrtrain))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_train, prlrtrain))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_train, prlrtrain)))
print('R^2:',metrics.r2_score(Y_train, prlrtrain))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, prlrtest))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, prlrtest))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, prlrtest)))
print('R^2:',metrics.r2_score(Y_test, prlrtest))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_train, predrftrain))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_train, predrftrain))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_train, predrftrain)))
print('R^2:',metrics.r2_score(Y_train, predrftrain))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, predrftest))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, predrftest))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, predrftest)))
print('R^2:',metrics.r2_score(Y_test, predrftest))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_train, predsvmrbftrain))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_train, predsvmrbftrain))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_train, predsvmrbftrain)))
print('R^2:',metrics.r2_score(Y_train, predsvmrbftrain))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, predsvmrbftest))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, predsvmrbftest))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, predsvmrbftest)))
print('R^2:',metrics.r2_score(Y_test, predsvmrbftest))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_train, predsvmpoltrain))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_train, predsvmpoltrain))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_train, predsvmpoltrain)))
print('R^2:',metrics.r2_score(Y_train, predsvmpoltrain))

In [None]:
#calculate metrics to evaluate regressors.
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, predsvmpoltest))  
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, predsvmpoltest))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, predsvmpoltest)))
print('R^2:',metrics.r2_score(Y_test, predsvmpoltest))

In [None]:
#read RCP database. in CanESM2 model, RCPs are from 2006 to 2100.
df = pd.read_csv("rcp26.csv")

In [None]:
#select suitable variables that were previously used.
X_fut = df[['a','b','s','t','z']]
Y_fut = df['MT']

In [None]:
#Predict MT in future using the appropriate regressor.
future = rfregressor.predict(X_fut)

In [None]:
datafuture = pd.DataFrame(data=future)

In [None]:
#Export predicted data.
datafuture.to_csv('predictedMT.csv')