In [1]:
#modules to load saved models
from joblib import dump, load
from tensorflow.keras.models import load_model

#data preprocessing modules
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

#regression metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from commons import mean_absolute_percentage_error #keep commons.py in notebooks folder

#classification metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score, roc_auc_score, f1_score

#modules for plots if required
import matplotlib.pyplot as plt
%matplotlib inline
from keras.utils import plot_model
from IPython.display import SVG
from keras.utils import model_to_dot 

Using TensorFlow backend.


# Regression

In [2]:
#file path
data=pd.read_csv('datasets/reg_ninety.csv',sep=',')

# 'datasets/reg_interval1.csv'
# 'datasets/reg_interval2.csv'
# 'datasets/reg_interval3.csv'
# 'datasets/reg_seven.csv'
# 'datasets/reg_thirty.csv'
# 'datasets/reg_ninety.csv'

In [3]:
#visualize the data
data.head(3)

Unnamed: 0,difficulty30ema,difficulty30rsi,difficulty30sma,difficulty30wma,difficulty3ema,difficulty3sma,difficulty3wma,difficulty7ema,difficulty7sma,difficulty7wma,...,median_transaction_feeUSD,mining_profitability,price90emaUSD,price90wmaUSD,sentinusd90smaUSD,size90trx,top100cap,transactions,transactionvalueUSD,priceUSD
0,5385392.0,97.889,5182950.0,5652157.0,6690038.0,6695826.0,6695826.0,6526779.0,6695826.0,6695826.0,...,0.0474,7220.0,39.444,43.032,54550954,0.334,19.962,52572,2592.0,89.829
1,5469936.0,97.889,5260549.0,5749761.0,6692932.0,6695826.0,6695826.0,6569041.0,6695826.0,6695826.0,...,0.0539,7990.0,40.945,44.686,57359476,0.331,20.024,63095,4400.0,85.608
2,5549026.0,97.889,5338147.0,5842360.0,6694379.0,6695826.0,6695826.0,6600737.0,6695826.0,6695826.0,...,0.06,8852.0,42.682,46.586,60260338,0.329,19.987,63766,4478.0,83.204


In [4]:
#create train-test splits for SVM
length=data.shape[1]-1
X=data.iloc[:,:length]
y=data.iloc[:,length:]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, shuffle=True, random_state=7)
y_train=np.ravel(y_train)
y_test=np.ravel(y_test)
estimators=[]
estimators.append(['standard', StandardScaler()])
scaling=Pipeline(estimators)
X_train=scaling.fit_transform(X_train)
X_test=scaling.transform(X_test)

In [5]:
#load saved model
SVM=load('trained_models/SVM_reg_ninety.joblib')

# 'trained_models/SVM_reg_interval1.joblib'
# 'trained_models/SVM_reg_interval2.joblib'
# 'trained_models/SVM_reg_interval3.joblib'
# 'trained_models/SVM_reg_seven.joblib'
# 'trained_models/SVM_reg_thirty.joblib'
# 'trained_models/SVM_reg_ninety.joblib'

In [6]:
#show model properties
SVM.get_params()

{'C': 500000,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.1,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': True}

In [7]:
#make predictions
y_pred=SVM.predict(X_test)

In [8]:
#show predictions in tabular format
combine=zip(y_test,y_pred)
pd.DataFrame(combine,columns=['y_test','y_pred'])

Unnamed: 0,y_test,y_pred
0,1014.000,1078.368491
1,6767.000,6821.338126
2,69.751,51.602856
3,585.950,566.610420
4,852.382,786.980917
5,835.689,811.607507
6,224.866,252.192401
7,852.448,741.703594
8,638.456,653.968304
9,106.115,107.836498


In [9]:
#calculate metrics
mae=mean_absolute_error(y_test,y_pred)
mape=mean_absolute_percentage_error(y_test, y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
r2=abs(r2_score(y_test, y_pred))

In [10]:
#show metrics in tabular format
metrics=[mae,mape,rmse,r2]
metrics_labels=['MAE','MAPE','RMSE','R^2']
pd.DataFrame(zip( metrics_labels,metrics))

Unnamed: 0,0,1
0,MAE,98.028277
1,MAPE,4.967171
2,RMSE,203.109909
3,R^2,0.99586


# Classification

In [11]:
#file path
data=pd.read_csv('datasets/cls_ninety.csv',sep=',')

# 'datasets/cls_interval1.csv'
# 'datasets/cls_interval2.csv'
# 'datasets/cls_interval3.csv'
# 'datasets/cls_seven.csv'
# 'datasets/cls_thirty.csv'
# 'datasets/cls_ninety.csv'

In [12]:
#visualize the data
data.head(3)

Unnamed: 0,activeaddresses,confirmationtime,difficulty30mom,difficulty3var,difficulty7mom,difficulty90var,difficulty90wma,fee_to_reward30stdUSD,fee_to_rewardUSD,hashrate3std,...,top100cap30roc,top100cap30rsi,top100cap30trx,top100cap7trx,top100cap90mom,top100cap90roc,top100cap90rsi,transactions,transactionvalueUSD,category
0,75756,7.273,2327950.0,-0.047,0.0,1168546000000.0,4430808.0,1.147,0.867,9120894000000.0,...,3.6,65.405,0.011,0.191,0.426,2.183,50.301,52572,2592.0,0
1,91875,7.956,2327950.0,-0.031,0.0,1243755000000.0,4492115.0,1.145,1.338,9102326000000.0,...,3.498,66.711,0.014,0.209,0.427,2.179,50.893,63095,4400.0,0
2,107142,8.229,2327950.0,-0.039,0.0,1315554000000.0,4552514.0,1.151,1.596,4382927000000.0,...,3.051,65.191,0.017,0.216,0.366,1.864,50.53,63766,4478.0,0


In [13]:
#create train-test splits for SVM
length=data.shape[1]-1
X=data.iloc[:,:length]
y=data.iloc[:,length:]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, shuffle=False, random_state=7)
y_train=np.ravel(y_train)
y_test=np.ravel(y_test)
estimators=[]
estimators.append(['standard', StandardScaler()])
scaling=Pipeline(estimators)
X_train=scaling.fit_transform(X_train)
X_test=scaling.transform(X_test)

In [14]:
#load saved model
SVM=load('trained_models/SVM_cls_ninety.joblib')

# 'trained_models/SVM_cls_interval1.joblib'
# 'trained_models/SVM_cls_interval2.joblib'
# 'trained_models/SVM_cls_interval3.joblib'
# 'trained_models/SVM_cls_seven.joblib'
# 'trained_models/SVM_cls_thirty.joblib'
# 'trained_models/SVM_cls_ninety.joblib'

In [15]:
#show model properties
SVM.get_params()

{'C': 1000,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': True}

In [16]:
#make predictions
y_pred=SVM.predict(X_test)

In [17]:
#show predictions in tabular format
combine=zip(y_test,y_pred)
pd.DataFrame(combine,columns=['y_test','y_pred'])

Unnamed: 0,y_test,y_pred
0,0,1
1,0,1
2,1,1
3,1,1
4,1,1
5,1,1
6,1,1
7,0,1
8,0,1
9,0,1


In [18]:
#calculate metrics
accuracy=accuracy_score(y_test,y_pred)
f1=f1_score(y_test,y_pred, average='binary') # try average='weighted' or 'binary' take higher
auc=roc_auc_score(y_test,y_pred)
recall=recall_score(y_test,y_pred)
precision=precision_score(y_test,y_pred)

In [19]:
#show metrics in tabular format
metrics=[accuracy, f1, auc, recall, precision]
metrics_labels=['Accuracy','F1-score','AUC','Recall','Precision']
pd.DataFrame(zip( metrics_labels,metrics))

Unnamed: 0,0,1
0,Accuracy,0.536383
1,F1-score,0.655332
2,AUC,0.566281
3,Recall,0.954955
4,Precision,0.498824
