In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
plt.style.use('ggplot') 
import calendar
import quandl
import pickle
import math
from datetime import datetime, timedelta
import re  
import psycopg2
import sys
import matplotlib.gridspec as gridspec
import sqlalchemy
from sqlalchemy.sql import select, and_, or_, not_, desc, asc
from sqlalchemy import Table, Column, Integer, DateTime, String,Float, ForeignKey
from sqlalchemy.orm import sessionmaker
from sklearn import preprocessing
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def connect(user, password, db, host='localhost', port=5432):
    '''Returns a connection and a metadata object'''
    # We connect with the help of the PostgreSQL URL
    url = 'postgresql+psycopg2://{}:{}@{}:{}/{}'
    url = url.format(user, password, host, port, db)

    # The return value of create_engine() is our connection object
    con = sqlalchemy.create_engine(url, client_encoding='utf8')

    # We then bind the connection to MetaData()
    meta = sqlalchemy.MetaData(bind=con)

    return con, meta

CREATE TABLES 

In [None]:
con, meta = connect('postgres', '', 'robotdb')

mkt_trend = Table('Market_trend', meta,
                      Column('coin', String, primary_key=True),
                      Column('date', DateTime, primary_key=True),
                      Column('screen', Integer, primary_key=True),
                      Column('dif_current', Float),
                      Column('dif_base', Float),
                      Column('d_dif', Float),
                      Column('theta_current', Float),
                      Column('theta_base', Float),
                      Column('d_theta', Float),
                      Column('long_dif', Float),
                      Column('max_growth', Float),
                      Column('max_loss', Float),
                      Column('max_price', Float),
                      Column('min_price', Float),
                      Column('max_rel', Float),
                      Column('min_rel', Float),
                      Column('log_ret', Float),
                      Column('log_ret_p', Float),
                      Column('log_ret_t_1', Float),
                      Column('histogram', Float),
                      Column('ema_dif', Float),
                      Column('rsi', Float),
                      Column('dif_sma', Float),
                      Column('max_growth_p', Float),
                      Column('obv', Float),
                      Column('strength', Float),
                      Column('vote', Integer)
                      )

tickers = Table('Ticker', meta,
    Column('date', DateTime, primary_key = True),
    Column('coin', String, primary_key = True),
    Column('price', Float),
    Column('volume', Float),
    Column('screen', String, primary_key = True)    
)

years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
yearsFmt = mdates.DateFormatter('%Y')

In [None]:
def get_mkt_trend(coin=None, date='2019-12-31', screen=1):
    if coin:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.coin == coin, mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    else:
        s = select([mkt_trend]).\
            where(and_(mkt_trend.c.date <= date, mkt_trend.c.screen == screen)).\
            order_by(desc(mkt_trend.c.date))
    rows = con.execute(s)
    mkt_trend_df = pd.DataFrame(rows.fetchall()).iloc[::-1]
    mkt_trend_df.columns = rows.keys()
    return mkt_trend_df

In [None]:
def scale_df(train, test, scale_columns):
    # #STANDARD SCALER
    scaler = preprocessing.StandardScaler().fit(train[scale_columns])
    train[scale_columns] = scaler.transform(train[scale_columns])
    test[scale_columns] = scaler.transform(test[scale_columns])
    return train, test

FETCH DATA FROM DATABASE AND JOIN TABLES 
MKTTREND - PRICE - MACDS - RSI - SMA

In [None]:
def manipulate_mkt_data():
    df_model = get_mkt_trend()
    df_model = df_model.drop(['max_growth_p'], axis=1).dropna()
    df_model.loc[df_model['max_growth'] >= 0.1, 'growth_sign'] = 1
    df_model.loc[df_model['max_growth'] < 0.1, 'growth_sign'] = 0
    dates_ = df_model[['date']]
    shifted_parms = df_model[['coin', 'max_growth']].shift(1)
    shifted_parms['date'] = dates_
    df_model = pd.merge(df_model, shifted_parms, how='inner', left_on=['date', 'coin'], right_on=['date', 'coin'])
    df_model['max_growth'] = df_model['max_growth_x']
    df_model['max_growth_p'] = df_model['max_growth_y']
    df_model = df_model.drop(['max_growth_x', 'max_growth_y'], axis=1)
    df_model = df_model.dropna()
    df_model = df_model.sort_values(['date'])
    df = df_model.drop(['screen', 'max_growth'] ,axis=1)
    return df
    
df_model = manipulate_mkt_data()
df_model['strength_ema'] = df_model.strength.ewm(span=6,min_periods=6,adjust=True,ignore_na=False).mean()
df_model = df_model.dropna()
df_model = df_model.sort_values(['date']).reset_index().drop(['index', 'date', 'coin', 'vote',
                                                              'max_loss', 'max_price', 'min_price'], axis=1)

In [None]:
def best_subset_cv(estimator, X_train, y_train, X_test, y_test):
    from itertools import chain, combinations
    n_features = X_train.shape[1]
    subsets = chain.from_iterable(combinations(range(k), k + 1)
                                  for k in range(n_features))
    result = []
    for subset in subsets:
        print(subset)
        estimator.fit(X_train[:, subset], y_train)
        Y_predict = estimator.predict(X_test[:, subset])
        cm = confusion_matrix(y_test,Y_predict)
        result.append({
            "precision": float(cm[1][1]/(cm[1][1]+cm[0][1])),
            "feat": subset,
            "parms": model.best_params_
        })

    return result

In [None]:
df = df_model
train = df.iloc[0:math.floor(len(df)*0.75)].copy()
test = df.iloc[math.floor(len(df)*0.75):len(df)].copy()

X_train, y_train = train.drop(['growth_sign'], axis=1), train.growth_sign.values
X_test, y_test =  test.drop(['growth_sign'], axis=1), test.growth_sign.values
oversampler = SMOTE(random_state=42)
X_train_os, Y_train_os = oversampler.fit_sample(X_train,y_train)

param_grid = {'max_depth': np.arange(1, 20), 'min_samples_leaf':np.arange(1, 8),}
model = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='precision')

In [None]:
best_subset_cv(model, X_train_os, Y_train_os, X_test, y_test)

In [None]:
from itertools import chain, combinations
n_features = X_train_os.shape[1]
subsets = chain.from_iterable(combinations(range(k), k + 1) for k in range(n_features))
for subset in subsets:
    print(subset)

In [6]:
from itertools import chain, combinations
n_features = 10
s = chain.from_iterable(combinations(range(k), k + 1) for k in range(n_features))
s

<itertools.chain at 0x7f1c7a5ec048>