In [20]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import read_csv, set_option
from pandas.plotting import scatter_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

from mpl_toolkits.mplot3d import Axes3D

import re
from collections import OrderedDict
from time import time
import sqlite3       

from scipy.linalg import svd   
from scipy import stats
from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings('ignore')

from IPython.html.widgets import interactive, fixed

In [21]:
dataset = pd.read_csv("https://raw.githubusercontent.com/tatsath/fin-ml/master/Chapter%206%20-%20Sup.%20Learning%20-%20Classification%20models/CaseStudy3%20-%20Bitcoin%20Trading%20Strategy/BitstampData_sample.csv")

In [22]:
# describe data
set_option('precision', 3)
dataset.describe()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
count,500000.0,24521.0,24521.0,24521.0,24521.0,24521.0,24521.0,24521.0
mean,1340000000.0,9.822,9.825,9.818,9.821,21.022,206.749,9.822
std,8660000.0,2.666,2.667,2.665,2.666,55.478,547.135,2.666
min,1325000000.0,3.8,3.8,3.8,3.8,0.0,0.0,3.8
25%,1333000000.0,7.2,7.2,7.2,7.2,2.17,21.02,7.2
50%,1340000000.0,10.8,10.8,10.79,10.79,7.34,67.92,10.793
75%,1348000000.0,11.84,11.84,11.83,11.84,20.24,199.28,11.833
max,1355000000.0,16.41,16.41,15.49,16.0,2958.478,31212.195,16.387


In [23]:
dataset.isna().sum()

Timestamp                 0
Open                 475478
High                 475478
Low                  475478
Close                475478
Volume_(BTC)         475478
Volume_(Currency)    475478
Weighted_Price       475478
dtype: int64

In [25]:
dataset[dataset.columns.values] = dataset[dataset.columns.values].ffill()

In [26]:
dataset= dataset.drop(columns=["Timestamp"])

In [29]:
dataset["short_mavg"] = dataset["Close"].rolling(window=10,min_periods=1,center=False).mean()
dataset["long_mavg"] = dataset["Close"].rolling(window=10,min_periods=1,center=False).mean()

dataset["signal"] = np.where(dataset["short_mavg"] > dataset["long_mavg"],1.0,0.0)

In [30]:
#calculation of exponential moving average
def EMA(df, n):
    EMA = pd.Series(df['Close'].ewm(span=n, min_periods=n).mean(), name='EMA_' + str(n))
    return EMA
dataset['EMA10'] = EMA(dataset, 10)
dataset['EMA30'] = EMA(dataset, 30)
dataset['EMA200'] = EMA(dataset, 200)
dataset.head()

#calculation of rate of change
def ROC(df, n):  
    M = df.diff(n - 1)  
    N = df.shift(n - 1)  
    ROC = pd.Series(((M / N) * 100), name = 'ROC_' + str(n))   
    return ROC
dataset['ROC10'] = ROC(dataset['Close'], 10)
dataset['ROC30'] = ROC(dataset['Close'], 30)

#Calculation of price momentum
def MOM(df, n):   
    MOM = pd.Series(df.diff(n), name='Momentum_' + str(n))   
    return MOM
dataset['MOM10'] = MOM(dataset['Close'], 10)
dataset['MOM30'] = MOM(dataset['Close'], 30)

#calculation of relative strength index
def RSI(series, period):
 delta = series.diff().dropna()
 u = delta * 0
 d = u.copy()
 u[delta > 0] = delta[delta > 0]
 d[delta < 0] = -delta[delta < 0]
 u[u.index[period-1]] = np.mean( u[:period] ) #first value is sum of avg gains
 u = u.drop(u.index[:(period-1)])
 d[d.index[period-1]] = np.mean( d[:period] ) #first value is sum of avg losses
 d = d.drop(d.index[:(period-1)])
 rs = u.ewm(com=period-1, adjust=False).mean() / \
 d.ewm(com=period-1, adjust=False).mean()
 return 100 - 100 / (1 + rs)
dataset['RSI10'] = RSI(dataset['Close'], 10)
dataset['RSI30'] = RSI(dataset['Close'], 30)
dataset['RSI200'] = RSI(dataset['Close'], 200)

#calculation of stochastic osillator.

def STOK(close, low, high, n): 
 STOK = ((close - low.rolling(n).min()) / (high.rolling(n).max() - low.rolling(n).min())) * 100
 return STOK

def STOD(close, low, high, n):
 STOK = ((close - low.rolling(n).min()) / (high.rolling(n).max() - low.rolling(n).min())) * 100
 STOD = STOK.rolling(3).mean()
 return STOD

dataset['%K10'] = STOK(dataset['Close'], dataset['Low'], dataset['High'], 10)
dataset['%D10'] = STOD(dataset['Close'], dataset['Low'], dataset['High'], 10)
dataset['%K30'] = STOK(dataset['Close'], dataset['Low'], dataset['High'], 30)
dataset['%D30'] = STOD(dataset['Close'], dataset['Low'], dataset['High'], 30)
dataset['%K200'] = STOK(dataset['Close'], dataset['Low'], dataset['High'], 200)
dataset['%D200'] = STOD(dataset['Close'], dataset['Low'], dataset['High'], 200)

In [31]:
#Calculation of moving average
def MA(df, n):
    MA = pd.Series(df['Close'].rolling(n, min_periods=n).mean(), name='MA_' + str(n))
    return MA
dataset['MA21'] = MA(dataset, 10)
dataset['MA63'] = MA(dataset, 30)
dataset['MA252'] = MA(dataset, 200)
dataset.tail()

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,short_mavg,long_mavg,signal,...,RSI200,%K10,%D10,%K30,%D30,%K200,%D200,MA21,MA63,MA252
499994,13.34,13.34,13.34,13.34,3.9,52.026,13.34,13.34,13.34,0.0,...,44.067,,,,,10.0,10.0,13.34,13.34,13.38
499995,13.34,13.34,13.34,13.34,3.9,52.026,13.34,13.34,13.34,0.0,...,44.067,,,,,10.0,10.0,13.34,13.34,13.38
499996,13.34,13.34,13.34,13.34,3.9,52.026,13.34,13.34,13.34,0.0,...,44.067,,,,,10.0,10.0,13.34,13.34,13.38
499997,13.34,13.34,13.34,13.34,3.9,52.026,13.34,13.34,13.34,0.0,...,44.067,,,,,10.0,10.0,13.34,13.34,13.38
499998,13.34,13.34,13.34,13.34,3.9,52.026,13.34,13.34,13.34,0.0,...,44.067,,,,,10.0,10.0,13.34,13.34,13.379


In [32]:
dataset=dataset.drop(['High','Low','Open', 'Volume_(Currency)','short_mavg','long_mavg'], axis=1)
dataset = dataset.dropna(axis=0)
dataset.tail()


Unnamed: 0,Close,Volume_(BTC),Weighted_Price,signal,EMA10,EMA30,EMA200,ROC10,ROC30,MOM10,...,RSI200,%K10,%D10,%K30,%D30,%K200,%D200,MA21,MA63,MA252
499965,13.34,3.9,13.34,0.0,13.351,13.356,13.386,0.075,-0.299,0.01,...,44.067,11.111,11.11,11.111,11.111,10.0,10.0,13.362,13.345,13.388
499966,13.34,3.9,13.34,0.0,13.349,13.355,13.386,0.075,-0.224,0.01,...,44.067,11.111,11.11,11.111,11.111,10.0,10.0,13.363,13.343,13.387
499967,13.34,3.9,13.34,0.0,13.348,13.354,13.385,-0.596,-0.075,0.01,...,44.067,0.0,7.407,11.111,11.111,10.0,10.0,13.364,13.342,13.387
499968,13.34,3.9,13.34,0.0,13.346,13.353,13.385,-0.596,0.0,-0.08,...,44.067,0.0,3.704,11.111,11.111,10.0,10.0,13.356,13.342,13.387
499969,13.34,3.9,13.34,0.0,13.345,13.352,13.385,-0.596,0.075,-0.08,...,44.067,0.0,3.659e-13,11.111,11.111,10.0,10.0,13.348,13.342,13.387
