<a href="https://colab.research.google.com/github/ge43jef/GEEHYDRO/blob/block3/kernel_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scikit-Learn Regression 2
[Support Vector Regression](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR)

In this lab , we will use FLUXNET Dataset, utilize scikit-learn to implement Support Vector Regression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import learning_curve

In [None]:
data = pd.read_csv('FLX_US-Ne1_FLUXNET2015_SUBSET_DD_2001-2013_1-4.csv' , delimiter=",", skipinitialspace=True,  parse_dates=True)

meteo = pd.DataFrame(
            {"sw": data.SW_IN_F, "lw": data.LW_IN_F, "tmp": data.TA_F,
             "pre": data.PA_F, "u10": data.WS_F,  "vpd": data.VPD_F , "lh": data.LE_CORR})

data_all = np.array(meteo)
X = data_all[ : , 0:6]
y = data_all[ : , 6]

In [None]:
kf = KFold(n_splits=5)
for train, test in kf.split(X):
    X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
    kernel_model = SVR(C=1.0, epsilon=0.2)
    kernel_model.fit(X_train , y_train)
    y_pred = kernel_model.predict(X_test)

In [None]:
y_pred = kernel_model.predict(X_test)
# plot the data
# Plot the data points
fig = plt.figure()
fig,ax=plt.subplots(2, 1, figsize=(6, 12), sharey=True)
ax[0].plot( y_test, marker='x', c='r',label='True Value')
ax[0].plot( y_pred, c='b',label='Our Prediction')
ax[0].set(xlabel="time (day)", ylabel="evaporation rate (mm/d)")
ax[1].scatter( y_test , y_pred, c='b')
z = np.polyfit(y_test , y_pred, 1)
y_hat = np.poly1d(z)(y_pred)
plt.plot(y_pred, y_hat, "r--", lw=2)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y_test, y_hat):0.3f}$\n" \
                   f"$RMSE = {mean_squared_error(y_test, y_hat, squared=False):0.3f} $ "
plt.gca().text(0.05, 0.95, text, transform=plt.gca().transAxes,
                           fontsize=14, verticalalignment='top')
plt.ylabel('Predict Value')
# Set the x-axis label
plt.xlabel('True Value')
ax[0].legend()

print("coefficient of determination R^2 =",kernel_model.score(X_test , y_test.reshape(-1, 1)))

### Now let's try different kernel functions

#### Linear kernel function

In [None]:
kf = KFold(n_splits=5)
for train, test in kf.split(X):
    X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
    kernel_model = SVR( kernel = 'linear')
    kernel_model.fit(X_train , y_train)
    y_pred = kernel_model.predict(X_test)

y_pred = kernel_model.predict(X_test)
# plot the data
# Plot the data points
fig = plt.figure()
fig,ax=plt.subplots(2, 1, figsize=(6, 12), sharey=True)
ax[0].plot( y_test, marker='x', c='r',label='True Value')
ax[0].plot( y_pred, c='b',label='Our Prediction')
ax[0].set(xlabel="time (day)", ylabel="evaporation rate (mm/d)")
ax[1].scatter( y_test , y_pred, c='b')
z = np.polyfit(y_test , y_pred, 1)
y_hat = np.poly1d(z)(y_pred)
plt.plot(y_pred, y_hat, "r--", lw=2)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y_test, y_hat):0.3f}$\n" \
                   f"$RMSE = {mean_squared_error(y_test, y_hat, squared=False):0.3f} $ "
plt.gca().text(0.05, 0.95, text, transform=plt.gca().transAxes,
                           fontsize=14, verticalalignment='top')
plt.ylabel('Predict Value')
# Set the x-axis label
plt.xlabel('True Value')
ax[0].legend()

print("coefficient of determination R^2 =",kernel_model.score(X_test , y_test.reshape(-1, 1)))

#### Poly kernel function

In [None]:
kf = KFold(n_splits=5)
for train, test in kf.split(X):
    X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
    kernel_model = SVR( kernel = 'poly')
    kernel_model.fit(X_train , y_train)
    y_pred = kernel_model.predict(X_test)

y_pred = kernel_model.predict(X_test)
# plot the data
# Plot the data points
fig = plt.figure()
fig,ax=plt.subplots(2, 1, figsize=(6, 12), sharey=True)
ax[0].plot( y_test, marker='x', c='r',label='True Value')
ax[0].plot( y_pred, c='b',label='Our Prediction')
ax[0].set(xlabel="time (day)", ylabel="evaporation rate (mm/d)")
ax[1].scatter( y_test , y_pred, c='b')
z = np.polyfit(y_test , y_pred, 1)
y_hat = np.poly1d(z)(y_pred)
plt.plot(y_pred, y_hat, "r--", lw=2)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y_test, y_hat):0.3f}$\n" \
                   f"$RMSE = {mean_squared_error(y_test, y_hat, squared=False):0.3f} $ "
plt.gca().text(0.05, 0.95, text, transform=plt.gca().transAxes,
                           fontsize=14, verticalalignment='top')
plt.ylabel('Predict Value')
# Set the x-axis label
plt.xlabel('True Value')
ax[0].legend()

print("coefficient of determination R^2 =",kernel_model.score(X_test , y_test.reshape(-1, 1)))

## Please try leaning learning curve here according to jupyter notebook: regression_linear_polynomial.ipyn