In [2]:
import numpy as np
import pandas as pd
import sklearn
import sklearn.svm as svm
from operator import add
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

In [3]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass
        
    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
    return False 

    
def plot_(h_axis,v1_axis,v2_axis, title="Daily Delivered (kWh)"):
    plt.rcParams.update({'font.size': 15})
    plt.tight_layout()
    plt.figure(figsize=(20,3))
    plt.plot(h_axis, v1_axis, color="blue",marker="o", label="actual")
    plt.plot(h_axis,v2_axis, color="red",marker="o",label="predicted")
    #plt.xticks(h_axis[0::92],rotation=45)
    plt.xticks(h_axis[0::92])
    #plt.title(title)
    plt.legend(loc='upper right')
    plt.ylabel('Normalized \nEnergy Consumption')
    plt.show()
        
# split into train and test sets
def split_(ds, dimension=2):
    #train_size = int(len(ds) * train_size)
    train_size = 365
    test_size = len(ds) - train_size
    train=[]
    test=[]
    if dimension==1:
        train, test = ds[0:train_size], ds[train_size:len(ds)]
    else:
        train, test = ds[0:train_size,:], ds[train_size:len(ds),:]
    return train,test


def evaluation_(y, yhat):
    mse = metrics.mean_squared_error(y, yhat)
    rmse = np.sqrt(mse)
    r2 = metrics.r2_score(y,yhat)
    #er = ((y - yhat)/y) * 100 

    print("Results of sklearn.metrics:")
    print("MSE:", mse)
    print("RMSE:", rmse)
    print("R-Squared:", r2)
    #print("Error rate:" , er)
    
    
def svm_model(ds, X,Y,kernel,trainig_percentage=0.76):
        
    model = None
        
    if kernel == 'poly':
        model = svm.SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1, coef0=1)
    elif kernel == 'linear':
        model = svm.SVR(kernel='linear', C=100, gamma=0.1, epsilon=.1)
    else: # default: rfb 
        model = svm.SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
    
    #train on precovid data and test on all dataset
    Xtrain,Xtest=split_(X)
    Ytrain,Ytest=split_(Y,1)
    
    model.fit(Xtrain, Ytrain)
    
    #prediction error on pre-covid data (training data)
    yhat1=model.predict(Xtrain)
    prediction_error1=np.square(Ytrain-yhat1).mean(axis=None).ravel()
    print("prediction error on pre-covid data")
    print("**********************************")
    print(prediction_error1)
    
    #prediction error on post-covid data (testing data)
    yhat2 = model.predict(Xtest)
    prediction_error2=np.square(Ytest-yhat2).mean(axis=None).ravel()
    print("prediction error on post-covid data")
    print("**********************************")
    print(prediction_error2)
    
    print("Error sign (actual-prediction) for post covid data: If Positive--> actual is greater than predicted")
    print("**********************************")
    print((Ytest-yhat2).mean(axis=None))
    
    #predict the entire data
    yhat=model.predict(X)
    
    #ds["dailyDelivered"] = pd.to_datetime(ds["dailyDelivered"], format='%Y-%m')
    

    h_axis=ds["time"]
    v1_axis=ds["dailyDelivered"]
    v2_axis=yhat.flatten()
    plot_(h_axis,v1_axis,v2_axis)
    
    evaluation_(Y, yhat)