In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
plt.style.use('ggplot') 
import calendar
import quandl
import pickle
from datetime import datetime, timedelta
import re  
import psycopg2
import sys
api_key = '764-Cog1Q3xa6Rns5pmj'
quandl.ApiConfig.api_key = api_key
import sqlalchemy
from sqlalchemy.sql import select, and_, or_, not_, desc, asc
from sqlalchemy import Table, Column, Integer, DateTime, String,Float, ForeignKey
from sqlalchemy.orm import sessionmaker

In [2]:
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size

In [3]:
def connect(user, password, db, host='localhost', port=5432):
    '''Returns a connection and a metadata object'''
    # We connect with the help of the PostgreSQL URL
    url = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)

    # The return value of create_engine() is our connection object
    con = sqlalchemy.create_engine(url, client_encoding='utf8')

    # We then bind the connection to MetaData()
    meta = sqlalchemy.MetaData(bind=con)

    return con, meta

CREATE TABLES 

In [4]:
con, meta = connect('postgres', '', 'robotdb')
    
tickers = Table('Ticker', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('price', Float),
    Column('volume', Float),
    Column('screen', String, primary_key = True)    
)

macd = Table('Macd', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('ema12', Float),
    Column('ema_26', Float),
    Column('macd_line', Float),
    Column('signal_line', Float),
    Column('histogram', Float),
    Column('screen', Integer, primary_key=True)
)

boillinger = Table('Boillinger', meta,
                       Column('date', DateTime, primary_key=True),
                       Column('coin', String, primary_key=True),
                       Column('upper_band', Float),
                       Column('lower_band', Float),
                       Column('sma20', Float),
                       Column('height', Float),
                       Column('screen', Integer, primary_key=True)
                       )

ema = Table('Ema', meta,
                Column('date', DateTime, primary_key=True),
                Column('coin', String, primary_key=True),
                Column('ema5', Float),
                Column('ema20', Float),
                Column('ema5_theta', Float),
                Column('ema20_theta', Float),
                Column('screen', Integer, primary_key=True)
                )

long_positions = Table('Long', meta,
                       Column('id_position', Integer, primary_key=True),
                       Column('coin', String, primary_key=True),
                       Column('strategy', String),
                       Column('size_position', Float),
                       Column('date_ask', DateTime),
                       Column('ask', Float),
                       Column('date_settlement', DateTime),
                       Column('settlement', Float),
                       Column('take_profit', Float),
                       Column('stop_loss', Float),
                       Column('exit_date', DateTime),
                       Column('exit_price', Float),
                       Column('log_return', Float),
                       Column('source', String),
                       Column('status', String)
                       )


mkt_trend = Table('Market_trend', meta,
                  Column('coin', String, primary_key=True),
                  Column('date', DateTime, primary_key=True),
                  Column('screen', Integer, primary_key=True),
                  Column('dif_current', Float),
                  Column('dif_base', Float),
                  Column('d_dif', Float),
                  Column('theta_current', Float),
                  Column('theta_base', Float),
                  Column('d_theta', Float),
                  Column('max_growth', Float),
                  Column('vote', Integer)
                  )


rsi = Table('Rsi', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('rsi', Float),
    Column('screen', Integer, primary_key=True)
)

balances = Table('Balance', meta,
                 Column('date', DateTime, primary_key=True),
                 Column('coin', String, primary_key=True),
                 Column('size_position', Float)
                 )


years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
yearsFmt = mdates.DateFormatter('%Y')

In [5]:
def get_result(models_result, model, confusion_matrix):
    succ_trades = (confusion_matrix[1][1] + confusion_matrix[0][1])/(confusion_matrix[1][1]+ confusion_matrix[0][1]+ confusion_matrix[1][0])
    precision = (confusion_matrix[1][1])/(confusion_matrix[1][1] + confusion_matrix[0][1])
    models_result.append({"model": model,
                          "roc_auc": roc_auc,
                          "succ_trades": succ_trades,
                          "precision": precision})
    return models_result

In [6]:
def get_macds(coin = None, date = '2019-12-31', screen = 1):
    if coin:
        s = select([macd])\
            .where(and_(macd.c.coin == coin, macd.c.date <= date, macd.c.screen==screen))\
            .order_by(desc(macd.c.date))
    else:
         s = select([macd])\
            .where(and_(macd.c.date <= date, macd.c.screen==screen))\
            .order_by(desc(macd.c.date))
            
    rows = con.execute(s)
    macd_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not macd_df.empty:
        macd_df.columns = rows.keys()
    return macd_df

def get_tickers(coin = None, date = '2019-12-31', screen = 1):
    if coin:
        s = select([tickers])\
            .where(and_(tickers.c.coin == coin, tickers.c.date <= date, tickers.c.screen == screen))\
            .order_by(
            desc(tickers.c.date))
    else:
        s = select([tickers])\
            .where(and_(tickers.c.date <= date, tickers.c.screen == screen))\
            .order_by(
            desc(tickers.c.date))
    rows = con.execute(s)
    tickers_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not tickers_df.empty:
        tickers_df.columns = rows.keys()
    return tickers_df

def get_emas(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([ema])\
            .where(and_(ema.c.coin == coin, ema.c.date <= date, ema.c.screen == screen))\
            .order_by(ema.c.date.desc())
    else:
        s = select([ema])\
            .where(and_(ema.c.date <= date, ema.c.screen == screen))\
            .order_by(ema.c.date.desc())
    rows = con.execute(s)
    ema_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not ema_df.empty:
        ema_df.columns = rows.keys()
    return ema_df

def get_mkt_trend(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.coin == coin, mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    else:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    rows = con.execute(s)
    mkt_trend_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    mkt_trend_df.columns = rows.keys()
    return mkt_trend_df

def get_rsis(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([rsi]) \
            .where(and_(rsi.c.coin == coin,
                        rsi.c.date <= date,
                       rsi.c.screen == screen))\
            .order_by(desc(rsi.c.date))
    else:
        s = select([rsi]) \
            .where(and_(rsi.c.date <= date,
                        rsi.c.screen == screen
                       ))\
            .order_by(desc(rsi.c.date))
    rows = con.execute(s)
    rsi_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    if not rsi_df.empty:
        rsi_df.columns = rows.keys()
    return rsi_df

In [7]:
def manipulate_mkt_data():
    mkt_trend_df = get_mkt_trend()
    tickers_df_two = get_tickers()
    macds_df = get_macds()
    tickers_df = pd.merge(tickers_df_two, macds_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    tickers_df['ema_dif'] = np.log(tickers_df['price']/tickers_df['ema12'])
    df = pd.merge(mkt_trend_df, tickers_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    rsis_df = get_rsis()
    df = pd.merge(df, rsis_df, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    return df
    
df = manipulate_mkt_data()

In [8]:
df.loc[df['max_growth'] >= 0.10, 'growth_sign'] = 1
df.loc[df['max_growth'] < 0.10, 'growth_sign'] = 0
df['rsi'] = df['rsi']/100

In [9]:
full_data_set = df[['coin', 'date', 'dif_base', 'dif_current', 'd_dif', 'theta_base', 'theta_current','d_theta', 'rsi','ema_dif', 'max_growth', 'growth_sign']]

dates_ = full_data_set[['date']]
shifted_parms = full_data_set[['coin', 'dif_current', 'theta_current']].shift(2)
shifted_parms['date'] = dates_
full_data_set = pd.merge(full_data_set, shifted_parms, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
full_data_set.head()
full_data_set['dif_current'] = full_data_set['dif_current_x']
full_data_set['theta_current'] = full_data_set['theta_current_x']
full_data_set['dif_base_p'] = full_data_set['dif_current_y']
full_data_set['theta_base_p'] = full_data_set['theta_current_y']

In [10]:
df_model = full_data_set.sort_values(['date'])

In [11]:
def get_strength_index():
    tickers_df_two = get_tickers()
    for c in tickers_df_two.coin.unique():
        tickers_df_two_c = tickers_df_two[tickers_df_two['coin'] == c]
        print(c)
        # 6 * 4h = 24h
        delta_t = 1
        si_np = [np.nan]
        for index in range(len(tickers_df_two_c)):
            base_price = tickers_df_two_c.iloc[index].price
            if index >= (len(tickers_df_two_c)-delta_t):
                break
            current_price = tickers_df_two_c.iloc[index+1].price
            volume = tickers_df_two_c.iloc[index+1].volume
            si = (current_price - base_price) * volume
            si_np.append(si)
        tickers_df_two_c['strength'] = si_np
    return tickers_df_two_c.dropna()

df_model = pd.merge(df_model, get_strength_index(), how='inner',
                    left_on=['date', 'coin'], right_on=['date', 'coin'])
df_model['ema_s'] = df_model.drop(['date', 'coin'], axis=1).strength.\
                    ewm(span=2, min_periods=2, adjust=True, ignore_na=False).mean()
    

USDT_BTC


In [12]:
from sklearn.preprocessing import scale

df_model['strength'] = scale(df_model['strength']/100)

In [13]:
df_model = df_model[['date', 'dif_base', 'dif_current', 'd_dif', 'theta_base', 'theta_current', 'rsi',
                     'strength','d_theta', 'ema_dif', 'dif_base_p', 'theta_base_p', 'max_growth', 'growth_sign']].dropna()
df_model.head()

Unnamed: 0,date,dif_base,dif_current,d_dif,theta_base,theta_current,rsi,strength,d_theta,ema_dif,dif_base_p,theta_base_p,max_growth,growth_sign
0,2017-05-27 19:15:00,-0.064036,-0.067736,0.057782,1.331456,1.168655,0.390028,-0.074462,-0.122273,-0.038297,-0.06593,-1.145981,0.085196,0.0
1,2017-05-27 23:15:00,-0.067736,-0.063983,-0.055416,1.168655,-1.096829,0.434543,0.113554,-1.93854,0.013925,-0.064036,1.331456,0.065394,0.0
2,2017-05-28 03:15:00,-0.063983,-0.060871,-0.048632,-1.096829,1.130954,0.390022,0.03607,-2.031112,0.01864,-0.067736,1.168655,0.061871,0.0
3,2017-05-28 07:15:00,-0.060871,-0.052909,-0.130798,1.130954,0.149442,0.385072,0.137623,-0.867862,0.063735,-0.063983,-1.096829,0.003772,0.0
4,2017-05-28 11:15:00,-0.052909,-0.046516,-0.120826,0.149442,1.166785,0.416881,-0.013278,6.807588,0.038641,-0.060871,1.130954,0.0,0.0


In [14]:
#GOOD FOR KNN
x_features = [ 'dif_current',
             'theta_current',
              'rsi', 'ema_dif']
#GOOD FOR SVC
# x_features = [ 'dif_current', 'dif_base',
#              'theta_current', 'theta_base',
#               'rsi', 'ema_dif']
y_features = ['growth_sign']

In [15]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

models_result = []

#Random Forest With Unbalanced Dataset: Not Good

Random Forest Classifier SMOTE, With GridSearch

In [16]:
model = 'RF SMOTE GS'
param_grid = {'max_depth': np.arange(1, 20), 'min_samples_leaf':np.arange(1, 8),}

X = df_model[x_features]
y = df_model[y_features].growth_sign.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)
oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

rf_os = RandomForestClassifier(random_state=42)
rf_os_cv = GridSearchCV(rf_os, param_grid, cv=5, scoring='roc_auc')
rf_os_cv.fit(X_train_os, Y_train_os)

print(rf_os_cv.best_params_)
print(rf_os_cv.best_score_)

{'max_depth': 13, 'min_samples_leaf': 1}
0.992749346868


In [17]:
Y_predict = rf_os_cv.predict(X_test)

In [18]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.558460271717


In [19]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

KNN Classifier With Balanced Dataset (SMOTE)

In [20]:
model = 'KNN SMOTE'
X = df_model[x_features]
y = df_model[y_features].growth_sign.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)
oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_os, Y_train_os)
Y_predict = knn.predict(X_test)

In [21]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.688349114862


In [22]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

KNN Classifier With Balanced Dataset and GridSearchCV

In [36]:
model = 'KNN SMOTE GS'
param_grid = {'n_neighbors': np.arange(1, 20)}

X = df_model[x_features]
y = df_model[y_features].growth_sign.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, param_grid, cv=5, scoring='roc_auc')
knn_cv.fit(X_train_os, Y_train_os)

print(knn_cv.best_params_)
print(knn_cv.best_score_)

{'n_neighbors': 2}
0.810606989428


In [42]:
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(knn_cv, open(filename, 'wb'))

In [43]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

1.0


Unnamed: 0,dif_current,theta_current,rsi,ema_dif
974,0.012276,-1.457141,0.711267,-0.011908
275,-0.020112,0.138120,0.358147,0.030742
411,0.003980,-0.820708,0.614763,0.025769
962,0.029566,-1.455359,0.673805,-0.005792
518,-0.015982,-1.309017,0.358793,-0.044602
1452,-0.007595,1.349404,0.643492,0.018355
1085,-0.001471,-1.464053,0.526326,0.006560
344,0.010142,-0.671063,0.503930,-0.016077
1050,0.017705,-1.456174,0.485120,0.011779
1185,0.011433,-1.519866,0.556622,0.077610


In [37]:
Y_predict = knn_cv.predict(X_test)

In [38]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

1.0


In [40]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [27]:
from sklearn import svm

model = 'SVC SMOTE GS'

X = df_model[x_features]
y = df_model[y_features].growth_sign.values

# X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}

oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

svm_greed = GridSearchCV(svm.SVC(), param_grid, cv=5, scoring='roc_auc')
svm_greed.fit(X_train_os, Y_train_os)
svm_greed.best_params_

{'C': 10, 'gamma': 1}

In [28]:
Y_predict = svm_greed.predict(X_test)

In [29]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.570811033347


In [30]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [31]:
model = 'AdaBoost SMOTE GS'

X = df_model[x_features]
y = df_model[y_features].growth_sign.values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {'n_estimators': np.arange(1, 32)}

oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,Y_train)

ada_greed = GridSearchCV(AdaBoostClassifier(), param_grid, cv=5, scoring='roc_auc')
ada_greed.fit(X_train_os, Y_train_os)
ada_greed.best_params_

{'n_estimators': 31}

In [32]:
Y_predict = ada_greed.predict(X_test)

In [33]:
FP, TP, thresholds = roc_curve(Y_test,Y_predict)
roc_auc = auc(FP, TP)
print (roc_auc)

0.59386578839


In [34]:
models_result = get_result(models_result, model, confusion_matrix(Y_test,Y_predict))

In [41]:
pd.DataFrame(models_result)

Unnamed: 0,model,precision,roc_auc,succ_trades
0,RF SMOTE GS,0.1,0.55846,0.625
1,KNN SMOTE,0.142857,0.688349,0.84
2,KNN SMOTE GS,0.090909,0.671058,0.891892
3,SVC SMOTE GS,0.038462,0.570811,0.912281
4,AdaBoost SMOTE GS,0.055556,0.593866,0.878049
5,KNN SMOTE GS,1.0,1.0,1.0
