# SVR RBF Regression

In [1]:
## load modules
import sys
sys.path.append("..")

import pandas as pd

from modeling.functions import get_features, modelling_fc 
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler

RSEED = 42



In [2]:
## read data
data = pd.read_csv('../data/GEFCom2014Data/Wind/raw_data_incl_features.csv', \
                    parse_dates= ['TIMESTAMP'],
                    index_col= 'TIMESTAMP' )
                    
data.interpolate(method = 'linear', inplace= True)
data = pd.get_dummies(data, columns = ['WD100CARD','WD10CARD'], drop_first=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 175440 entries, 2012-01-01 01:00:00 to 2014-01-01 00:00:00
Data columns (total 46 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   ZONEID         175440 non-null  int64  
 1   TARGETVAR      175440 non-null  float64
 2   U10            175440 non-null  float64
 3   V10            175440 non-null  float64
 4   U100           175440 non-null  float64
 5   V100           175440 non-null  float64
 6   HOUR           175440 non-null  int64  
 7   MONTH          175440 non-null  int64  
 8   WEEKDAY        175440 non-null  int64  
 9   IS_HOLIDAY     175440 non-null  int64  
 10  WS10           175440 non-null  float64
 11  WS100          175440 non-null  float64
 12  WD10           175440 non-null  float64
 13  WD100          175440 non-null  float64
 14  U100NORM       175440 non-null  float64
 15  V100NORM       175440 non-null  float64
 16  WD100CARD_ENE  175440 non-null  uint8  


In [3]:
# train-test-split and get features
data_train = data[:'2013-07-01 00:00:00']
data_test = data['2013-07-01 01:00:00':]

feature_dict = get_features(data)

In [4]:
param_grid = {           
        'C': [0.001, 0.01, 0.1, 1, 10]
    }
    
model = SVR(kernel = 'rbf')
scaler = MinMaxScaler()

In [5]:
results = modelling_fc(data_train, data_test, feature_dict, 
                        model = model,
                        scaler = scaler, 
                        print_scores = True, 
                        log = False, 
                        infotext_mlflow = None, 
                        param_grid = param_grid,
                        zone_params = None, 
                        n_jobs = 3)

Total number of fits: 2000
feature combination: all

Scaler: MinMaxScaler
Scaled X_train min/max: 0.0, 1.0
Scaled X_test min/max: -0.0, 1.0

ZONEID 1
Fitting 5 folds for each of 5 candidates, totalling 25 fits




[CV] END ............................................C=0.001; total time=   4.1s
[CV] END ............................................C=0.001; total time=   4.2s
[CV] END ............................................C=0.001; total time=   4.2s
[CV] END .............................................C=0.01; total time=   3.5s
[CV] END ............................................C=0.001; total time=   4.3s
[CV] END ............................................C=0.001; total time=   4.5s
[CV] END .............................................C=0.01; total time=   3.8s
[CV] END .............................................C=0.01; total time=   3.5s
[CV] END .............................................C=0.01; total time=   3.5s
[CV] END ..............................................C=0.1; total time=   2.7s
[CV] END .............................................C=0.01; total time=   3.3s
[CV] END ..............................................C=0.1; total time=   2.8s
[CV] END ...................

In [6]:
results.to_csv(f'../results/{results.MODEL.iloc[0]}.csv')

# SVR Linear

In [7]:
from sklearn.svm import LinearSVR

param_grid = {           
        'C': [0.001, 0.01, 0.1, 1, 10]
    }
    
model = LinearSVR(max_iter = 100000)
scaler = MinMaxScaler()

In [8]:
results = modelling_fc(data_train, data_test, feature_dict, 
                        model = model,
                        scaler = scaler, 
                        print_scores = True, 
                        log = False, 
                        infotext_mlflow = None, 
                        param_grid = param_grid,
                        zone_params = None, 
                        n_jobs = 3)

Total number of fits: 2000
feature combination: all

Scaler: MinMaxScaler
Scaled X_train min/max: 0.0, 1.0
Scaled X_test min/max: -0.0, 1.0

ZONEID 1
Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] END ............................................C=0.001; total time=   0.0s
[CV] END ............................................C=0.001; total time=   0.0s
[CV] END ............................................C=0.001; total time=   0.0s
[CV] END ............................................C=0.001; total time=   0.0s
[CV] END ............................................C=0.001; total time=   0.0s
[CV] END .............................................C=0.01; total time=   0.0s
[CV] END .............................................C=0.01; total time=   0.0s
[CV] END .............................................C=0.01; total time=   0.0s
[CV] END .............................................C=0.01; total time=   0.0s
[CV] END .............................................C=0.01;

In [9]:
results.to_csv(f'../results/{results.MODEL.iloc[0]}.csv')

In [10]:
results

Unnamed: 0_level_0,BEST_PARAMS,CV,MODEL,FC,TESTSCORE,TRAINSCORE
ZONE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ZONE1,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.185621,LinearSVR,no_comp_plus_100Norm,0.196151,0.181768
ZONE2,"{'C': 1, 'dual': True, 'epsilon': 0.0, 'fit_in...",0.146374,LinearSVR,no_deg_norm,0.175274,0.144052
ZONE3,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.154066,LinearSVR,no_deg,0.153906,0.151082
ZONE4,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.179809,LinearSVR,no_deg_norm,0.173779,0.177577
ZONE5,"{'C': 1, 'dual': True, 'epsilon': 0.0, 'fit_in...",0.180844,LinearSVR,no_deg,0.177918,0.177672
ZONE6,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.181829,LinearSVR,all,0.191555,0.178882
ZONE7,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.139328,LinearSVR,no_deg_norm,0.148585,0.135304
ZONE8,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.163255,LinearSVR,no_deg_norm,0.19347,0.160554
ZONE9,"{'C': 10, 'dual': True, 'epsilon': 0.0, 'fit_i...",0.165618,LinearSVR,no_comp_plus_100Norm,0.163529,0.164977
ZONE10,"{'C': 1, 'dual': True, 'epsilon': 0.0, 'fit_in...",0.200264,LinearSVR,no_deg_norm,0.212425,0.198059
