# SVR RBF Regression

In [1]:
## load modules
import sys
sys.path.append("..")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from modeling.functions import get_features, modelling_fc 
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler



RSEED = 42



In [2]:
## read data
data = pd.read_csv('../data/GEFCom2014Data/Wind/raw_data_incl_features.csv', \
                    parse_dates= ['TIMESTAMP'],
                    index_col= 'TIMESTAMP' )
                    
data.interpolate(method = 'linear', inplace= True)
data = pd.get_dummies(data, columns = ['WD100CARD','WD10CARD'], drop_first=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 175440 entries, 2012-01-01 01:00:00 to 2014-01-01 00:00:00
Data columns (total 46 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   ZONEID         175440 non-null  int64  
 1   TARGETVAR      175440 non-null  float64
 2   U10            175440 non-null  float64
 3   V10            175440 non-null  float64
 4   U100           175440 non-null  float64
 5   V100           175440 non-null  float64
 6   HOUR           175440 non-null  int64  
 7   MONTH          175440 non-null  int64  
 8   WEEKDAY        175440 non-null  int64  
 9   IS_HOLIDAY     175440 non-null  int64  
 10  WS10           175440 non-null  float64
 11  WS100          175440 non-null  float64
 12  WD10           175440 non-null  float64
 13  WD100          175440 non-null  float64
 14  U100NORM       175440 non-null  float64
 15  V100NORM       175440 non-null  float64
 16  WD100CARD_ENE  175440 non-null  uint8  


In [11]:
# train-test-split and get features
data_train = data[:'2013-07-01 00:00:00']
data_test = data['2013-07-01 01:00:00':]

feature_dict = get_features(data)

In [4]:
param_grid = {           
        'C': [0.001, 0.01, 0.1, 1, 10]
    }
    
model = SVR(kernel = 'rbf')
scaler = MinMaxScaler()

In [8]:
results = modelling_fc(data_train, data_test, feature_dict, 
                        model = model,
                        scaler = scaler, 
                        print_scores = True, 
                        log = False, 
                        infotext_mlflow = None, 
                        param_grid = param_grid,
                        zone_params = None, 
                        n_jobs = 3)

Scaler: MinMaxScaler
Scaled X_train min/max: 0.0, 1.0
Scaled X_test min/max: -0.0, 1.0

ZONEID 1
Fitting 5 folds for each of 1 candidates, totalling 5 fits




[CV] END ............................................C=0.001; total time=   4.6s
[CV] END ............................................C=0.001; total time=   4.7s
[CV] END ............................................C=0.001; total time=   4.7s
[CV] END ............................................C=0.001; total time=   3.6s
[CV] END ............................................C=0.001; total time=   3.7s
Scaler: MinMaxScaler
Scaled X_train min/max: 0.0, 1.0
Scaled X_test min/max: -0.01, 1.0

ZONEID 2
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ............................................C=0.001; total time=   4.0s
[CV] END ............................................C=0.001; total time=   4.0s
[CV] END ............................................C=0.001; total time=   4.0s
[CV] END ............................................C=0.001; total time=   3.5s
[CV] END ............................................C=0.001; total time=   3.5s
Scaler: MinMaxScaler
Scaled X_tra

In [9]:
results.to_csv(f'../results/{results.MODEL.iloc[0]}.csv')