In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
plt.style.use('ggplot') 
import calendar
import quandl
import pickle
from datetime import datetime, timedelta
import re  
import psycopg2
import sys
api_key = '764-Cog1Q3xa6Rns5pmj'
quandl.ApiConfig.api_key = api_key
import sqlalchemy
from sqlalchemy.sql import select, and_, or_, not_, desc, asc
from sqlalchemy import Table, Column, Integer, DateTime, String,Float, ForeignKey
from sqlalchemy.orm import sessionmaker

In [2]:
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size

In [3]:
def connect(user, password, db, host='localhost', port=5432):
    '''Returns a connection and a metadata object'''
    # We connect with the help of the PostgreSQL URL
    url = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)

    # The return value of create_engine() is our connection object
    con = sqlalchemy.create_engine(url, client_encoding='utf8')

    # We then bind the connection to MetaData()
    meta = sqlalchemy.MetaData(bind=con)

    return con, meta

CREATE TABLES 

In [4]:
con, meta = connect('postgres', '', 'robotdb')
    
tickers = Table('Ticker', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('price', Float),
    Column('volume', Float),
    Column('screen', String, primary_key = True)    
)

macd = Table('Macd', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('ema12', Float),
    Column('ema_26', Float),
    Column('macd_line', Float),
    Column('signal_line', Float),
    Column('histogram', Float),
    Column('screen', Integer, primary_key=True)
)

boillinger = Table('Boillinger', meta,
                       Column('date', DateTime, primary_key=True),
                       Column('coin', String, primary_key=True),
                       Column('upper_band', Float),
                       Column('lower_band', Float),
                       Column('sma20', Float),
                       Column('height', Float),
                       Column('screen', Integer, primary_key=True)
                       )

ema = Table('Ema', meta,
                Column('date', DateTime, primary_key=True),
                Column('coin', String, primary_key=True),
                Column('ema5', Float),
                Column('ema20', Float),
                Column('ema5_theta', Float),
                Column('ema20_theta', Float),
                Column('screen', Integer, primary_key=True)
                )

long_positions = Table('Long', meta,
                       Column('id_position', Integer, primary_key=True),
                       Column('coin', String, primary_key=True),
                       Column('strategy', String),
                       Column('size_position', Float),
                       Column('date_ask', DateTime),
                       Column('ask', Float),
                       Column('date_settlement', DateTime),
                       Column('settlement', Float),
                       Column('take_profit', Float),
                       Column('stop_loss', Float),
                       Column('exit_date', DateTime),
                       Column('exit_price', Float),
                       Column('log_return', Float),
                       Column('source', String),
                       Column('status', String)
                       )


mkt_trend = Table('Market_trend', meta,
                  Column('coin', String, primary_key=True),
                  Column('date', DateTime, primary_key=True),
                  Column('screen', Integer, primary_key=True),
                  Column('dif_current', Float),
                  Column('dif_base', Float),
                  Column('d_dif', Float),
                  Column('theta_current', Float),
                  Column('theta_base', Float),
                  Column('d_theta', Float),
                  Column('max_growth', Float),
                  Column('max_loss', Float),
                  Column('vote', Integer)
                  )


rsi = Table('Rsi', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('rsi', Float),
    Column('screen', Integer, primary_key=True)
)

balances = Table('Balance', meta,
                 Column('date', DateTime, primary_key=True),
                 Column('coin', String, primary_key=True),
                 Column('size_position', Float)
                 )


years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
yearsFmt = mdates.DateFormatter('%Y')

In [5]:
def get_result(models_result, model, confusion_matrix):
    success_rate = confusion_matrix[1][1]/(confusion_matrix[1][1]+confusion_matrix[0][1])
    models_result.append({"model": model,
                          "roc_auc": roc_auc,
                          "succ_trades": success_rate})
    return models_result

In [6]:
def get_macds(coin = None, date = '2019-12-31', screen = 1):
    if coin:
        s = select([macd])\
            .where(and_(macd.c.coin == coin, macd.c.date <= date, macd.c.screen==screen))\
            .order_by(desc(macd.c.date))
    else:
         s = select([macd])\
            .where(and_(macd.c.date <= date, macd.c.screen==screen))\
            .order_by(desc(macd.c.date))
            
    rows = con.execute(s)
    macd_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not macd_df.empty:
        macd_df.columns = rows.keys()
    return macd_df

def get_tickers(coin = None, date = '2019-12-31', screen = 1):
    if coin:
        s = select([tickers])\
            .where(and_(tickers.c.coin == coin, tickers.c.date <= date, tickers.c.screen == screen))\
            .order_by(
            desc(tickers.c.date))
    else:
        s = select([tickers])\
            .where(and_(tickers.c.date <= date, tickers.c.screen == screen))\
            .order_by(
            desc(tickers.c.date))
    rows = con.execute(s)
    tickers_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not tickers_df.empty:
        tickers_df.columns = rows.keys()
    return tickers_df

def get_emas(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([ema])\
            .where(and_(ema.c.coin == coin, ema.c.date <= date, ema.c.screen == screen))\
            .order_by(ema.c.date.desc())
    else:
        s = select([ema])\
            .where(and_(ema.c.date <= date, ema.c.screen == screen))\
            .order_by(ema.c.date.desc())
    rows = con.execute(s)
    ema_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not ema_df.empty:
        ema_df.columns = rows.keys()
    return ema_df

def get_mkt_trend(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.coin == coin, mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    else:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    rows = con.execute(s)
    mkt_trend_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    mkt_trend_df.columns = rows.keys()
    return mkt_trend_df

def get_rsis(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([rsi]) \
            .where(and_(rsi.c.coin == coin,
                        rsi.c.date <= date,
                       rsi.c.screen == screen))\
            .order_by(desc(rsi.c.date))
    else:
        s = select([rsi]) \
            .where(and_(rsi.c.date <= date,
                        rsi.c.screen == screen
                       ))\
            .order_by(desc(rsi.c.date))
    rows = con.execute(s)
    rsi_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not rsi_df.empty:
        rsi_df.columns = rows.keys()
    return rsi_df

In [7]:
def manipulate_mkt_data():
    mkt_trend_df = get_mkt_trend()
    tickers_df_two = get_tickers()
    macds_df = get_macds()
    tickers_df = pd.merge(tickers_df_two, macds_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    tickers_df['ema_dif'] = np.log(tickers_df['price']/tickers_df['ema12'])
    df = pd.merge(mkt_trend_df, tickers_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    rsis_df = get_rsis()
    df = pd.merge(df, rsis_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    return df
    
df = manipulate_mkt_data()

In [8]:
df.loc[df['max_growth'] >= 0.1, 'growth_sign'] = 1
df.loc[df['max_growth'] < 0.1, 'growth_sign'] = 0
df.loc[df['max_loss'] <= -0.05, 'loss_sign'] = 1
df.loc[df['max_loss'] > -0.05, 'loss_sign'] = 0

In [9]:
full_data_set = df[['coin', 'date', 'dif_base', 'dif_current', 'd_dif',
                    'theta_base', 'theta_current','d_theta',
                    'rsi','ema_dif', 
                    'max_growth', 'max_loss', 'loss_sign','growth_sign']]

dates_ = full_data_set[['date']]
shifted_parms = full_data_set[['coin', 'dif_current', 'theta_current']].shift(2)
shifted_parms['date'] = dates_
full_data_set = pd.merge(full_data_set, shifted_parms, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
full_data_set.head()
full_data_set['dif_current'] = full_data_set['dif_current_x']
full_data_set['theta_current'] = full_data_set['theta_current_x']
full_data_set['dif_base_p'] = full_data_set['dif_current_y']
full_data_set['theta_base_p'] = full_data_set['theta_current_y']

In [10]:
df_model = full_data_set.sort_values(['date'])

In [11]:
def get_strength_index():
    tickers_df_two = get_tickers()
    for c in tickers_df_two.coin.unique():
        tickers_df_two_c = tickers_df_two[tickers_df_two['coin'] == c]
        print(c)
        # 6 * 4h = 24h
        delta_t = 1
        si_np = [np.nan]
        for index in range(len(tickers_df_two_c)):
            base_price = tickers_df_two_c.iloc[index].price
            if index >= (len(tickers_df_two_c)-delta_t):
                break
            current_price = tickers_df_two_c.iloc[index+1].price
            volume = tickers_df_two_c.iloc[index+1].volume
            si = (current_price - base_price) * volume
            si_np.append(si)
        tickers_df_two_c['strength'] = si_np
    return tickers_df_two_c.dropna()

df_model = pd.merge(df_model, get_strength_index(), how='inner',
                    left_on=['date', 'coin'], right_on=['date', 'coin'])
df_model['ema_s'] = df_model.drop(['date', 'coin'], axis=1).strength.\
                    ewm(span=2, min_periods=2, adjust=True, ignore_na=False).mean()
    

USDT_BTC


In [12]:
from sklearn.preprocessing import StandardScaler

df_model['strength'] = StandardScaler().fit_transform(df_model['strength'].reshape(-1, 1))

In [13]:
df_model = df_model[['date', 'dif_base', 'dif_current', 'd_dif', 'theta_base', 'theta_current', 'rsi',
                     'strength','d_theta', 'ema_dif', 'dif_base_p', 'theta_base_p',
                     'max_growth', 'max_loss', 'loss_sign','growth_sign']].dropna()
df_model.head()

Unnamed: 0,date,dif_base,dif_current,d_dif,theta_base,theta_current,rsi,strength,d_theta,ema_dif,dif_base_p,theta_base_p,max_growth,max_loss,loss_sign,growth_sign
0,2017-04-10 17:10:00,0.006839,0.006634,-0.030053,0.104919,0.011985,0.588285,0.0261,-0.885771,-0.000525,0.006599,-0.058632,0.007547,-0.00565,0.0,0.0
1,2017-04-10 21:10:00,0.006634,0.006448,-0.02804,0.011985,-0.179209,0.574948,0.027566,-15.953077,0.002439,0.006839,0.104919,0.00358,-0.009233,0.0,0.0
2,2017-04-11 01:10:00,0.006448,0.005566,-0.13671,-0.179209,0.100103,0.556087,0.026174,-1.558582,-0.005664,0.006634,0.011985,0.010713,-0.000406,0.0,0.0
3,2017-04-11 05:10:00,0.005566,0.005073,-0.088695,0.100103,-0.219351,0.512042,0.027169,-3.191264,-0.003687,0.006448,-0.179209,0.020385,-0.001018,0.0,0.0
4,2017-04-11 09:10:00,0.005073,0.005476,0.07953,-0.219351,0.043722,0.558849,0.027945,-1.199325,0.004088,0.005566,0.100103,0.016671,-6e-05,0.0,0.0


In [14]:
df_model.to_pickle("historical_mkt.pkl")

In [15]:
df_model = pd.read_pickle("historical_mkt.pkl")

In [16]:
df_model.head()

Unnamed: 0,date,dif_base,dif_current,d_dif,theta_base,theta_current,rsi,strength,d_theta,ema_dif,dif_base_p,theta_base_p,max_growth,max_loss,loss_sign,growth_sign
0,2017-04-10 17:10:00,0.006839,0.006634,-0.030053,0.104919,0.011985,0.588285,0.0261,-0.885771,-0.000525,0.006599,-0.058632,0.007547,-0.00565,0.0,0.0
1,2017-04-10 21:10:00,0.006634,0.006448,-0.02804,0.011985,-0.179209,0.574948,0.027566,-15.953077,0.002439,0.006839,0.104919,0.00358,-0.009233,0.0,0.0
2,2017-04-11 01:10:00,0.006448,0.005566,-0.13671,-0.179209,0.100103,0.556087,0.026174,-1.558582,-0.005664,0.006634,0.011985,0.010713,-0.000406,0.0,0.0
3,2017-04-11 05:10:00,0.005566,0.005073,-0.088695,0.100103,-0.219351,0.512042,0.027169,-3.191264,-0.003687,0.006448,-0.179209,0.020385,-0.001018,0.0,0.0
4,2017-04-11 09:10:00,0.005073,0.005476,0.07953,-0.219351,0.043722,0.558849,0.027945,-1.199325,0.004088,0.005566,0.100103,0.016671,-6e-05,0.0,0.0


In [50]:
#GOOD FOR KNN
# x_features = [ 'dif_current',
#              'theta_current',
#               'rsi', 'ema_dif']
#GOOD FOR SVC
x_features = ['dif_current', 'dif_base', 'd_dif',
             'theta_current', 'theta_base', 'd_theta',
              'strength', 'rsi', 'ema_dif']
y_features = ['growth_sign']

In [51]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB

models_result = []

In [52]:
X = df_model[x_features]
y = df_model[y_features].growth_sign.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

Random Forest Classifier SMOTE, With GridSearch

In [57]:
model = 'RF SMOTE GS'

param_grid = {'max_depth': np.arange(1, 20), 'min_samples_leaf':np.arange(1, 8)}

rf_os_cv = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='roc_auc')
rf_os_cv.fit(X_train_os, Y_train_os)
print(rf_os_cv.best_params_)

{'max_depth': 14, 'min_samples_leaf': 1}


In [58]:
Y_predict = rf_os_cv.predict(X_test)

In [59]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.560372060372


In [60]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

KNN Classifier With Balanced Dataset and GridSearchCV

In [24]:
model = 'KNN SMOTE GS'
param_grid = {'n_neighbors': np.arange(1, 20)}

knn_cv = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='roc_auc')
knn_cv.fit(X_train_os, Y_train_os)
print(knn_cv.best_params_)

{'n_neighbors': 9}


In [25]:
Y_predict = knn_cv.predict(X_test)

In [26]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.532116532117


In [27]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [28]:
model = 'SVC SMOTE GS'

kernels = ['linear', 'poly', 'rbf', 'sigmoid']
Cs = [0.01, 0.1, 1, 10, 100]
gammas = [0.001, 0.01, 0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas, 'kernel': kernels}

svm_greed = GridSearchCV(svm.SVC(random_state=42), param_grid, cv=5, scoring='precision')
svm_greed.fit(X_train_os, Y_train_os)
svm_greed.best_params_

{'C': 100, 'gamma': 1, 'kernel': 'rbf'}

In [29]:
Y_predict = svm_greed.predict(X_test)

In [30]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.612144612145


In [31]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [32]:
model = 'AdaBoost SMOTE GS'

param_grid = {'n_estimators': np.arange(1, 32)}

ada_greed = GridSearchCV(AdaBoostClassifier(random_state=42), param_grid, cv=5, scoring='precision')
ada_greed.fit(X_train_os, Y_train_os)
ada_greed.best_params_

{'n_estimators': 27}

In [33]:
Y_predict = ada_greed.predict(X_test)

In [34]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.457002457002


In [35]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [36]:
model = 'ExtraTree SMOTE GS'

param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(1,15),
              'min_samples_leaf':np.arange(1, 8),
             'n_estimators': np.arange(1, 5)}

extra_model = GridSearchCV(ExtraTreesClassifier(random_state=42),param_grid, cv=5,scoring='precision')
extra_model.fit(X_train_os, Y_train_os)
extra_model.best_params_

{'criterion': 'entropy',
 'max_depth': 14,
 'min_samples_leaf': 1,
 'n_estimators': 2}

In [37]:
Y_predict = extra_model.predict(X_test)

In [38]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.485257985258


In [39]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [40]:
model = 'Dummy'

dummy = DummyClassifier(random_state=42)
dummy.fit(X_train_os, Y_train_os)

DummyClassifier(constant=None, random_state=42, strategy='stratified')

In [41]:
Y_predict = dummy.predict(X_test)

In [42]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.473499473499


In [43]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [44]:
model = 'Naive Bayes'

gnb = GaussianNB()
y_pred = gnb.fit(X_train_os, Y_train_os)

In [45]:
Y_predict = gnb.predict(X_test)

In [46]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.506318006318


In [47]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [61]:
pd.DataFrame(models_result)

Unnamed: 0,model,roc_auc,succ_trades
0,RF SMOTE GS,0.487715,0.0
1,RF SMOTE GS,0.560372,0.1


In [None]:
extra_model.best_params_

In [None]:
model = 'ExtraTree SMOTE'

extra_model = ExtraTreesClassifier(random_state=42,
                                   criterion= 'entropy',
                                   max_depth= 14,
                                   min_samples_leaf= 1,
                                   n_estimators= 2 )
extra_model.fit(X_train_os, Y_train_os)

In [None]:
Y_predict = extra_model.predict(X_test)

In [None]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

In [None]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [None]:
# # save the model to disk
# filename = 'finalized_model.sav'
# pickle.dump(extra_model, open(filename, 'wb'))
# loaded_model = pickle.load(open(filename, 'rb'))
# result = loaded_model.score(X_test, Y_test)
# print(result)