In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
plt.style.use('ggplot') 
import calendar
import pickle
import math
from datetime import datetime, timedelta
import re  
import psycopg2
import sys
import matplotlib.gridspec as gridspec
import sqlalchemy
from sqlalchemy.sql import select, and_, or_, not_, desc, asc
from sqlalchemy import Table, Column, Integer, DateTime, String,Float, ForeignKey
from sqlalchemy.orm import sessionmaker
from sklearn import preprocessing

In [2]:
# fig_size = plt.rcParams["figure.figsize"]
# fig_size[0] = 10
# fig_size[1] = 10
# plt.rcParams["figure.figsize"] = fig_size

In [3]:
def connect(user, password, db, host='localhost', port=5432):
    '''Returns a connection and a metadata object'''
    # We connect with the help of the PostgreSQL URL
    url = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)

    # The return value of create_engine() is our connection object
    con = sqlalchemy.create_engine(url, client_encoding='utf8')

    # We then bind the connection to MetaData()
    meta = sqlalchemy.MetaData(bind=con)

    return con, meta

CREATE TABLES 

In [15]:
con, meta = connect('postgres', '', 'robotdb')

mkt_trend = Table('Market_trend', meta,
                      Column('coin', String, primary_key=True),
                      Column('date', DateTime, primary_key=True),
                      Column('screen', Integer, primary_key=True),
                      Column('dif_current', Float),
                      Column('dif_base', Float),
                      Column('d_dif', Float),
                      Column('theta_current', Float),
                      Column('theta_base', Float),
                      Column('d_theta', Float),
                      Column('long_dif', Float),
                      Column('max_growth', Float),
                      Column('max_loss', Float),
                      Column('max_price', Float),
                      Column('min_price', Float),
                      Column('max_rel', Float),
                      Column('min_rel', Float),
                      Column('log_ret', Float),
                      Column('log_ret_p', Float),
                      Column('log_ret_t_1', Float),
                      Column('histogram', Float),
                      Column('ema_dif', Float),
                      Column('rsi', Float),
                      Column('dif_sma', Float),
                      Column('max_growth_p', Float),
                      Column('obv', Float),
                      Column('strength', Float),
                      Column('vote', Integer)
                      )

tickers = Table('Ticker', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('price', Float),
    Column('volume', Float),
    Column('screen', String, primary_key = True)    
)

years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
yearsFmt = mdates.DateFormatter('%Y')

In [16]:
def get_mkt_trend(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.coin == coin, mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    else:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    rows = con.execute(s)
    mkt_trend_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    mkt_trend_df.columns = rows.keys()
    return mkt_trend_df

In [17]:
def scale_df(train, test, scale_columns):
    # #STANDARD SCALER
    scaler = preprocessing.StandardScaler().fit(train[scale_columns])
    train[scale_columns] = scaler.transform(train[scale_columns])
    test[scale_columns] = scaler.transform(test[scale_columns])
    return train, test

FETCH DATA FROM DATABASE AND JOIN TABLES 
MKTTREND - PRICE - MACDS - RSI - SMA

In [20]:
def manipulate_mkt_data():
    df_model = get_mkt_trend()
#     df_model = df_model.drop(['max_growth_p'], axis=1).dropna()
#     df_model.loc[df_model['max_growth'] >= 0.08, 'growth_sign'] = 1
#     df_model.loc[df_model['max_growth'] < 0.08, 'growth_sign'] = 0
#     dates_ = df_model[['date']]
#     shifted_parms = df_model[['coin', 'max_growth']].shift(3)
#     shifted_parms['date'] = dates_
#     df_model = pd.merge(df_model, shifted_parms, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
#     df_model['max_growth'] = df_model['max_growth_x']
#     df_model['max_growth_p'] = df_model['max_growth_y']
#     df_model = df_model.drop(['max_growth_x', 'max_growth_y'], axis=1)
    
#     dates_ = df_model[['date']]
#     shifted_parms = df_model[['coin', 'ema_dif']].shift(1)
#     shifted_parms['date'] = dates_
#     df_model = pd.merge(df_model, shifted_parms, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
#     df_model['ema_dif'] = df_model['ema_dif_x']
#     df_model['ema_dif_p'] = df_model['ema_dif_y']
#     df_model = df_model.drop(['ema_dif_x', 'ema_dif_y'], axis=1)
    
    df = df_model.drop(['screen', 'coin'] ,axis=1)
    
    return df
    
df_model = manipulate_mkt_data()
df_model.columns
df_model['strength_ema'] = df_model.strength.ewm(span=6,min_periods=6,adjust=True,ignore_na=False).mean()
# df_model = df_model.dropna()
df_model = df_model.sort_values(['date']).reset_index().drop(['index'], axis=1)

In [23]:
# plt.hist(df_model['max_growth'], bins=30)
# plt.show()
df_model['max_growth']

0       None
1       None
2       None
3       None
4       None
5       None
6       None
7       None
8       None
9       None
10      None
11      None
12      None
13      None
14      None
15      None
16      None
17      None
18      None
19      None
20      None
21      None
22      None
23      None
24      None
25      None
26      None
27      None
28      None
29      None
        ... 
1770    None
1771    None
1772    None
1773    None
1774    None
1775    None
1776    None
1777    None
1778    None
1779    None
1780    None
1781    None
1782    None
1783    None
1784    None
1785    None
1786    None
1787    None
1788    None
1789    None
1790    None
1791    None
1792    None
1793    None
1794    None
1795    None
1796    None
1797    None
1798    None
1799    None
Name: max_growth, Length: 1800, dtype: object

In [None]:
df = df_model.drop(['date', 'max_price', 'min_price', 'vote', 'max_growth', 'max_loss'], axis=1)
df.head()

In [None]:
plt.hist(df['growth_sign'], bins=3)
print('Unbalanced Dataset %s' % str(np.sum(df['growth_sign'])/len(df_model)))
plt.show()

In [None]:
drop_columns = ['d_dif', 'theta_current', 'theta_base', 'long_dif',
               'strength', 'log_ret', 'log_ret_p', 'log_ret_t_1']

In [None]:
plt.figure(figsize=(12,8*4))
gs = gridspec.GridSpec(7, 4)
for i, cn in enumerate(df.drop(['growth_sign'], axis=1).columns):
    ax = plt.subplot(gs[i])
    sns.distplot(df[cn][df.growth_sign == 1], bins=30, color='red')
    sns.distplot(df[cn][df.growth_sign == 0], bins=30, color = 'blue')
    ax.set_xlabel('')
    ax.set_title('feature: ' + str(cn))
plt.show()

In [None]:
df = df.drop(drop_columns, axis=1)

In [None]:
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size

correlations = df.corr()
names = df.columns
# plot correlation matrix
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,len(names),1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.xticks(rotation=90)
plt.show()

In [None]:
train = df.iloc[0:math.floor(len(df)*0.75)].copy()
test = df.iloc[math.floor(len(df)*0.75):len(df)].copy()

In [None]:
train.columns

In [None]:
scale_columns = df.drop(['growth_sign'], axis=1).columns
scaler = preprocessing.StandardScaler().fit(train[scale_columns])
train[scale_columns] = scaler.transform(train[scale_columns])
test[scale_columns] = scaler.transform(test[scale_columns])

In [None]:
# # #EXAMPLE FOR SCALE NEW ENTRY
# t_transform = scaler.transform(df.loc[0,scale_columns].values.reshape(1,-1))
# print(t_transform)

In [None]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
print('---------------------------------------------------')
X_train, y_train = train.drop(['growth_sign'], axis=1), train.growth_sign.values
X_test, y_test =  test.drop(['growth_sign'], axis=1), test.growth_sign.values
oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,y_train)

# RANDOM FOREST
print('Random Forest')
param_grid = {'max_depth': np.arange(1, 20), 'min_samples_leaf':np.arange(1, 8),}
model = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='precision')
model.fit(X_train_os, Y_train_os)
Y_predict = model.predict(X_test)

print(model.best_params_)
cm = confusion_matrix(y_test,Y_predict)
print('Precision: %s' % float(cm[1][1]/(cm[1][1]+cm[0][1])))
print(cm)
print('---------------------------------------------------')

# EXTRATREE
print('Extra Tree')
X_train, y_train = train.drop(['growth_sign'], axis=1), train.growth_sign.values
X_test, y_test =  test.drop(['growth_sign'], axis=1), test.growth_sign.values
oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,y_train)

param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(1,15),
              'min_samples_leaf':np.arange(1, 8),
             'n_estimators': np.arange(1, 5)}

model = GridSearchCV(ExtraTreesClassifier(random_state=42),param_grid, cv=5,scoring='precision')
model.fit(X_train_os,Y_train_os)
Y_predict = model.predict(X_test)
cm = confusion_matrix(y_test,Y_predict)
print('Precision: %s' % float(cm[1][1]/(cm[1][1]+cm[0][1])))
print(cm)


In [None]:
print('---------------------------------------------------')
print('SVM')

X_train, y_train = train.drop(['growth_sign'], axis=1), train.growth_sign.values
X_test, y_test =  test.drop(['growth_sign'], axis=1), test.growth_sign.values
from sklearn.decomposition import PCA
pca = PCA(n_components = 3)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train_pca,y_train)

kernels = ['rbf']
Cs = [10, 100, 1000]
gammas = [0.001, 0.01, 0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas, 'kernel': kernels}

model = GridSearchCV(svm.SVC(random_state=42), param_grid, cv=5, scoring='precision')
model.fit(X_train_os,Y_train_os)
Y_predict = model.predict(X_test_pca)
cm = confusion_matrix(y_test,Y_predict)
print('Precision: %s' % float(cm[1][1]/(cm[1][1]+cm[0][1])))
print(cm)
print('---------------------------------------------------')

param_grid = {'n_neighbors': np.arange(1, 20)}
knn = KNeighborsClassifier()
model = GridSearchCV(knn, param_grid, cv=5, scoring='roc_auc')
model.fit(X_train_os,Y_train_os)
Y_predict = model.predict(X_test_pca)
cm = confusion_matrix(y_test,Y_predict)
print('Precision: %s' % float(cm[1][1]/(cm[1][1]+cm[0][1])))
print(cm)