In [1]:
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.preprocessing import Normalizer, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.ensemble import IsolationForest
from commons import mean_absolute_percentage_error
from joblib import dump, load

In [2]:
data=pd.read_csv('notebooks/datasets/reg_seven.csv',sep=',')

In [3]:
data.tail()

Unnamed: 0,difficulty30rsi,difficulty90var,fee_to_reward90rsiUSD,hashrate30var,median_transaction_fee7rocUSD,mining_profitability,price30smaUSD,price3wmaUSD,price7wmaUSD,sentinusd90emaUSD,size90trx,top100cap,transactionvalueUSD,priceUSD
2461,58.535,1.008768e+23,48.628,5.188934e+37,-51.183,0.137,7309.0,7271.0,7311.0,8952225768,-0.057,15.143,19540.0,7263.0
2462,58.535,1.001958e+23,48.386,5.055737e+37,-44.109,0.145,7302.0,7315.0,7323.0,8866101420,-0.057,15.156,16883.0,7361.0
2463,58.535,9.950605e+22,48.278,5.1785539999999995e+37,0.51,0.144,7298.0,7399.0,7356.0,8763231772,-0.058,15.163,14961.0,7469.0
2464,58.535,9.880747e+22,48.427,5.196838999999999e+37,-48.673,0.152,7288.0,7411.0,7363.0,8730840664,-0.059,15.171,22475.0,7388.0
2465,58.535,9.81001e+22,48.495,4.726052e+37,-33.718,0.135,7286.0,7354.0,7350.0,8654833019,-0.06,15.162,17976.0,7293.0


In [4]:
data.shape

(2466, 14)

In [5]:
data.columns

Index(['difficulty30rsi', 'difficulty90var', 'fee_to_reward90rsiUSD',
       'hashrate30var', 'median_transaction_fee7rocUSD',
       'mining_profitability', 'price30smaUSD', 'price3wmaUSD', 'price7wmaUSD',
       'sentinusd90emaUSD', 'size90trx', 'top100cap', 'transactionvalueUSD',
       'priceUSD'],
      dtype='object')

In [6]:
length=data.shape[1]-1

In [7]:
X=data.iloc[:,:length]

In [8]:
y=data.iloc[:,length:]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, shuffle=True, random_state=7)

In [10]:
y_train=np.ravel(y_train)

In [11]:
y_test=np.ravel(y_test)

In [12]:
estimators=[]

In [13]:
estimators.append(['standard', StandardScaler()])

In [14]:
scaling=Pipeline(estimators)

In [15]:
X_train=scaling.fit_transform(X_train)

In [16]:
X_test=scaling.transform(X_test)

In [28]:
svr=SVR(kernel='rbf',C=10000,verbose=True)

In [29]:
svr.fit(X_train,y_train)

[LibSVM]

SVR(C=10000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=True)

In [30]:
svr.score(X_train,y_train)

0.9995155951691477

In [31]:
svr.score(X_test,y_test) #r^2

0.9985772643749331

In [32]:
y_pred=svr.predict(X_test)

In [33]:
pd.DataFrame(zip(y_test,y_pred),columns=['y_test','y_pred'])

Unnamed: 0,y_test,y_pred
0,6912.000,6922.510344
1,6233.000,6265.023442
2,235.065,235.892172
3,609.126,614.296264
4,453.641,461.248239
5,852.382,831.615338
6,342.284,338.982883
7,344.899,346.261751
8,644.487,623.170023
9,3804.000,3780.662482


In [34]:
mean_absolute_error(y_test,y_pred) #mae

58.10107452336661

In [35]:
mean_squared_error(y_test,y_pred) #mse

20412.569449136645

In [36]:
np.sqrt(mean_squared_error(y_test,y_pred)) #rmse

142.87256366824474

In [37]:
mean_absolute_percentage_error(y_test,y_pred) #mape

2.999104948099392

In [38]:
dump(svr,'notebooks/trained_models/SVM_reg_seven_new.joblib')

['notebooks/trained_models/SVM_reg_seven_new.joblib']