In [55]:
# export
from dotenv import load_dotenv
load_dotenv()

import os
import pickle
from datetime import datetime


import pandas as pd
import numpy as np
from datetime import datetime
from binance.client import Client
from binance.websockets import BinanceSocketManager
from twisted.internet import reactor
from datetime import datetime
import btalib


#cor coinmarketcapapi
from coinmarketcapapi import CoinMarketCapAPI, CoinMarketCapAPIError
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json

#custom library
import MLTrain as mlt

pd.set_option('display.max_rows', 500)


import logging

# Gets or creates a logger
logger = logging.getLogger(__name__)  

# set log level
logger.setLevel(logging.INFO)

# define file handler and set formatter
file_handler = logging.FileHandler('logfile.log')
formatter    = logging.Formatter('%(asctime)s : %(levelname)s : %(name)s : %(message)s')
file_handler.setFormatter(formatter)

# add file handler to logger
logger.addHandler(file_handler)

In [65]:
# export
def get_coin_market_cap_data():
    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    parameters = {
      'start':'1',
      'limit':'1000',
      'convert':'USD'
    }
    headers = {
      'Accepts': 'application/json',
      'X-CMC_PRO_API_KEY': os.environ.get('COIN_MARKET_CAP_API_KEY'),
    }

    session = Session()
    session.headers.update(headers)

    try:
      logger.info('try to connect {url}'.format(url=url))  
      response = session.get(url, params=parameters)
      data = json.loads(response.text)
      #print(data)
    except (ConnectionError, Timeout, TooManyRedirects) as e:
      print(e)
      logger.error('Can NOT connect {url}'.format(url=url))


    now = datetime.now()
    dt_string = now.strftime("%Y%m%d%H%M")

    listx = []
    for coin_dict in data.get("data"):
        listx.append([ \
            coin_dict.get('name'),
            coin_dict.get('symbol'),
            coin_dict.get('cmc_rank'),
            coin_dict.get('quote').get('USD').get('volume_24h'),
            coin_dict.get('quote').get('USD').get('market_cap'),
            coin_dict.get('quote').get('USD').get('last_updated'),
            dt_string          
        ])
    
    return listx


def append_coin_market_cap_data(v_list):
    file = open("cmc_data.pkl","rb")
    list_old = pickle.load(file)
    
    list_all = list_old + v_list
    
    pickle_out = open("cmc_data.pkl","wb")
    pickle.dump(list_all, pickle_out)
    pickle_out.close()


In [12]:
# export
def f_get_coinlist():
    coin_list_df = pd.read_csv("crypto_lu.csv")
    coin_list_df["Symbolusd"] =  coin_list_df["Symbol"].apply(lambda x: x + "USDT" )
    coin_list_df["Keywords"]  = coin_list_df.apply(lambda row: row["Name"].lower() + "," + 
                                   row["Symbol"].lower() + "," + 
                                   row["Symbolusd"].lower() + "," + 
                                   row["Keywords"].lower()  , axis = 1)


    coin_list_df = coin_list_df.head(25)

    coin_list = list(coin_list_df.Symbolusd)
    return coin_list


def f_get_binance_client():
    api_key = os.environ.get('BINANCE_API')
    api_secret = os.environ.get('BINANCE_SECRET')
    return Client(api_key, api_secret)


def f_load_allcoins(coin_list = None):
    
    client = f_get_binance_client()
    coin_list = f_get_coinlist()
    dict_all = {}
    for coin in coin_list:  
        dict_coin = {}    
        logger.info('Initial load {x}'.format(x=coin))
        try:
            klines = client.get_historical_klines(coin, Client.KLINE_INTERVAL_2HOUR, "365 day ago UTC")
        except Exception as e:
            print(str(e))
            logger.error('Initial load {x} FAILED'.format(x=coin))
            continue
        for x in klines:
            ts = int(x[0]) / 1000
            dt = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
            dict_coin.update({dt:[x[0], x[4], x[5], x[8]]}) #date, price, volume, numoftrades    
        dict_all.update({coin:dict_coin})

    return dict_all



# fetch 1 minute klines for the last day up until now
def f_update_lastNdays(dict_all=None):
    
    if dict_all is None:
        file = open("dict_all.pkl","rb")
        dict_all = pickle.load(file)
    
    client = f_get_binance_client()
    for coin in list(dict_all.keys()):  
        dict_coin = {}
        logger.info('try to update coin {x}'.format(x=coin))
        
        try:
            klines = client.get_historical_klines(coin, Client.KLINE_INTERVAL_2HOUR, "2 day ago UTC")
        except Exception as e:
            print(str(e))
            logger.error('UPDATE list of coin: {x} FAILED, seems no info'.format(x=coin))
            ##Make warning to say that such coin data is not up-to-date!!
            continue        
        dict_coin = dict_all.get(coin)
        for x in klines:
            ts = int(x[0]) / 1000
            dt = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
            dict_coin.update({dt:[x[0], x[4], x[5], x[8]]}) #date, price, volume, numoftrades    
        dict_all.update({coin:dict_coin})
        
    pickle_out = open("dict_all.pkl","wb")
    pickle.dump(dict_all, pickle_out)
    pickle_out.close()

    return dict_all



# fetch 1 minute klines for the last day up until now
def f_update_lastprice(dict_all=None):
    
    client = f_get_binance_client()
    for coin in list(dict_all.keys()):  
        dict_coin = {}
        logger.info('try to update coin {x}'.format(x=coin))
        
        try:
            klines = client.get_historical_klines(coin, Client.KLINE_INTERVAL_1MINUTE, "1 minute ago UTC")
        except Exception as e:
            print(str(e))
            logger.error('UPDATE list of coin: {x} FAILED, seems no info'.format(x=coin))
            ##Make warning to say that such coin data is not up-to-date!!
            continue        
        dict_coin = dict_all.get(coin)
        for x in klines:
            ts = int(x[0]) / 1000
            dt = datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
            dict_coin.update({dt:[x[0], x[4], x[5], x[8]]}) #date, price, volume, numoftrades    
        dict_all.update({coin:dict_coin})

    return dict_all



def f_get_all_price(dict_all_coins):
    #dict_all_coins = f_load_allcoins(coin_list = None)
    #dict_all_coins = f_update_lastprice(dict_all=dict_all_coins)
    
    df_all_coins = pd.DataFrame(columns= ["coin", "dt", "price", "vol", "tradefreq"])
    for coin in dict_all_coins.keys():
        dict_coin = dict_all_coins.get(coin)
        listx = []
        for dt in dict_coin.keys():
            listx.append([coin, dt, dict_coin.get(dt)[1], dict_coin.get(dt)[2], dict_coin.get(dt)[3]])
        df_all_coins = df_all_coins.append(pd.DataFrame(data = listx,
                                         columns= ["coin", "dt", "price", "vol", "tradefreq"]
                                        )
                           )
    df_all_coins.dt = df_all_coins.dt.apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')  )
    df_all_coins['last_time'] = 0
    for coin in df_all_coins.coin.unique():
        max_dt = np.max(df_all_coins.loc[df_all_coins.coin == coin, 'dt'])
        df_all_coins.loc[(df_all_coins.coin == coin) & (df_all_coins.dt == max_dt ) , 'last_time'] = 1

    df_all_coins.set_index("dt", drop=True, inplace = True )
    df_all_coins.sort_index(inplace=True)
    df_all_coins["is_day_end"] = 0
    df_all_coins.loc[[True if x.strftime("%H:%M:%S") == '00:00:00' else False for x in list(df_all_coins.index)], "is_day_end"] = 1  
    df_all_coins.price = df_all_coins.price.astype(float)
    df_all_coins.tradefreq= df_all_coins.tradefreq.astype(float)
    df_all_coins.vol= df_all_coins.vol.astype(float)
    
    
    return df_all_coins
    


def f_calc_indicators(df):
    df_all_coins = df.copy()
    
    #df_all_coins = f_get_all_price()
    
    for coin in df_all_coins.coin.unique():
        index_t = (df_all_coins.coin == coin)
        lenx = len(df_all_coins.loc[df_all_coins.coin == coin, :])
        if  lenx < 100:
            print("{coin} lacks of enough sample {sample} so lets drop it!!".format(coin=coin, sample=lenx))
            continue
        #df_all_coins.loc[index_t, 'ma12'] = btalib.sma(df_all_coins.loc[index_t, 'price'], period=12).df.values.tolist()
        #df_all_coins.loc[index_t, 'ma24'] = btalib.sma(df_all_coins.loc[index_t, 'price'], period=24).df.values.tolist()
        #df_all_coins.loc[index_t, 'ma48'] = btalib.sma(df_all_coins.loc[index_t, 'price'], period=48).df.values.tolist()
        #df_all_coins.loc[index_t, 'ma148'] = btalib.sma(df_all_coins.loc[index_t, 'price'], period=96).df.values.tolist()
        df_all_coins.loc[index_t, 'rsi14'] = btalib.rsi(df_all_coins.loc[index_t, 'price'], period=14).df.values.tolist()
        df_all_coins.loc[index_t, 'rsi28'] = btalib.rsi(df_all_coins.loc[index_t, 'price'], period=28).df.values.tolist()
        df_all_coins.loc[index_t, 'rsi42'] = btalib.rsi(df_all_coins.loc[index_t, 'price'], period=42).df.values.tolist()
        df_all_coins.loc[index_t, 'rsi48'] = btalib.rsi(df_all_coins.loc[index_t, 'price'], period=48).df.values.tolist()
        df_all_coins.loc[index_t, 'rsi72'] = btalib.rsi(df_all_coins.loc[index_t, 'price'], period=72).df.values.tolist()
        df_all_coins.loc[index_t, ["macd12", "signal12", "hist12"] ] =  \
                        btalib.macd(df_all_coins.loc[index_t, 'price'], pfast=12, pslow=24, psignal=9).df.values.tolist()
        df_all_coins.loc[index_t, ["macd6", "signal6", "hist6"] ] =  \
                        btalib.macd(df_all_coins.loc[index_t, 'price'], pfast=6, pslow=12, psignal=4).df.values.tolist()

    df_all_coins['macd12_ratio'] = df_all_coins['macd12'] / df_all_coins['price']
    df_all_coins['signal12_ratio'] = df_all_coins['signal12'] / df_all_coins['price']
    
    
    return df_all_coins


def f_get_last_minute_rows(v_df, rnk = 1):
    #df = f_calc_indicators()
    df = v_df.copy()
    df.reset_index(inplace=True)
    df.loc[:, "rnk_dt"] =  df.groupby("coin")["dt"].rank("dense", ascending=False)
    df = df.loc[df.rnk_dt == rnk, :]
    df.set_index("dt", inplace = True)
    df.drop(['rnk_dt'], axis=1, inplace = True)
    return df
    

In [11]:
# export
def set_target(row, margin):
    meanx = np.nanmean([row["next1price"], row["next2price"], row["next3price"] ])
    if meanx > row["price"] * (1.00 + margin):
        return 1
    elif meanx < row["price"] * (1.00 - margin):
        return -1
    else:
        return 0

    
def set_target_numeric(row):
    return np.nanmean([row["next1price"], row["next2price"], row["next3price"] ])    
    
def f_transform_df_target(df_all_coins, shift_period = 2):    
    Target_Type = 'Binary'
    df_all_coins["next1price"] = df_all_coins.groupby(['coin'])['price'].shift(-1*shift_period)
    df_all_coins["next2price"] = df_all_coins.groupby(['coin'])['price'].shift(-2*shift_period)
    df_all_coins["next3price"] = df_all_coins.groupby(['coin'])['price'].shift(-4*shift_period)

    df_all_coins["Target"] = 0
       

    
    if Target_Type == 'Numeric':    
        #Numeric Target
        df_all_coins.loc[df_all_coins.last_time == 0, "Target"] = \
                df_all_coins.loc[df_all_coins.last_time == 0, :].apply(lambda row: set_target_numeric(row), axis = 1)    
    else:
        #Binary Target
        df_all_coins.loc[df_all_coins.last_time == 0, "Target"] = \
                df_all_coins.loc[df_all_coins.last_time == 0, :].apply(lambda row: set_target(row, 0.02), axis = 1)
    
    pickle_out = open("df_all.pkl","wb")
    pickle.dump(df_all_coins, pickle_out)
    pickle_out.close()
    
    return df_all_coins


def f_prep_df_to_ML(df):
    df_tmp = df.copy()
    df_tmp.dropna(inplace = True)
    y = df_tmp.loc[:, ["Target"]]
    X = df_tmp.loc[:, [ x for x in df_tmp.columns if x not in (["Target" 
                                                                ,"coin" 
                                                                ,"next1price" 
                                                                ,"next2price" 
                                                                ,"next3price" 
                                                                ,"price"
                                                               # ,"is_day_end"
                                                               # ,"last_time"
                                                               ])  ]]

    return X, y 

In [34]:
dict_init = f_load_allcoins()

APIError(code=-1121): Invalid symbol.
APIError(code=-1121): Invalid symbol.
APIError(code=-1121): Invalid symbol.


In [8]:
dict_tmp = f_update_lastNdays()
dict_tmp = f_update_lastprice(dict_tmp)
df = f_get_all_price(dict_tmp)

In [9]:
df

Unnamed: 0_level_0,coin,price,vol,tradefreq,last_time,is_day_end
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-10 18:00:00,BTCUSDT,8502.940000,7.530685e+03,77046.0,0,0
2020-05-10 18:00:00,BUSDUSDT,0.999800,2.427941e+06,8947.0,0,0
2020-05-10 18:00:00,ETHUSDT,185.220000,5.935731e+04,17577.0,0,0
2020-05-10 18:00:00,XMRUSDT,57.650000,1.061929e+04,2733.0,0,0
2020-05-10 18:00:00,DOGEUSDT,0.002396,5.976979e+06,129.0,0,0
...,...,...,...,...,...,...
2021-05-12 11:09:00,LUNAUSDT,17.071000,6.750928e+03,179.0,1,0
2021-05-12 11:09:00,DOTUSDT,39.310000,1.393696e+03,88.0,1,0
2021-05-12 11:09:00,SHIBUSDT,0.000029,6.838048e+09,468.0,1,0
2021-05-12 11:09:00,LINKUSDT,47.485000,9.874470e+02,54.0,1,0


In [12]:
df.reset_index(inplace=True)
index_t = df.loc[df.coin == "SHIBUSDT", :].index
index_t

df.drop(index_t, axis = 0, inplace = True)

In [13]:
df = f_calc_indicators(df)
df = f_transform_df_target(df)

SHIBUSDT lacks of enough sample 26 so lets drop it!!


  This is separate from the ipykernel package so we can avoid doing imports until


In [14]:
df.loc[df.coin == 'SHIBUSDT', :]

Unnamed: 0_level_0,coin,price,vol,tradefreq,last_time,is_day_end,rsi14,rsi28,rsi42,rsi48,...,hist12,macd6,signal6,hist6,macd12_ratio,signal12_ratio,next1price,next2price,next3price,Target
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-10 10:00:00,SHIBUSDT,3e-05,15274310000000.0,730457.0,0,0,,,,,...,,,,,,,2.9e-05,3e-05,3.5e-05,1
2021-05-10 12:00:00,SHIBUSDT,3.1e-05,21372690000000.0,1037643.0,0,0,,,,,...,,,,,,,2.7e-05,3.1e-05,3.4e-05,0
2021-05-10 14:00:00,SHIBUSDT,2.9e-05,9380247000000.0,508738.0,0,0,,,,,...,,,,,,,3e-05,3.5e-05,3.1e-05,1
2021-05-10 16:00:00,SHIBUSDT,2.7e-05,11564130000000.0,528332.0,0,0,,,,,...,,,,,,,3.1e-05,3.4e-05,3.2e-05,1
2021-05-10 18:00:00,SHIBUSDT,3e-05,8962142000000.0,437754.0,0,0,,,,,...,,,,,,,3.5e-05,3.5e-05,3.3e-05,1
2021-05-10 20:00:00,SHIBUSDT,3.1e-05,9314410000000.0,469977.0,0,0,,,,,...,,,,,,,3.4e-05,3.4e-05,3.1e-05,1
2021-05-10 22:00:00,SHIBUSDT,3.5e-05,19434380000000.0,864728.0,0,0,,,,,...,,,,,,,3.5e-05,3.1e-05,3e-05,-1
2021-05-11 00:00:00,SHIBUSDT,3.4e-05,9034212000000.0,414005.0,0,1,,,,,...,,,,,,,3.4e-05,3.2e-05,3.2e-05,-1
2021-05-11 02:00:00,SHIBUSDT,3.5e-05,6691246000000.0,357273.0,0,0,,,,,...,,,,,,,3.1e-05,3.3e-05,3.3e-05,-1
2021-05-11 04:00:00,SHIBUSDT,3.4e-05,4910050000000.0,277811.0,0,0,,,,,...,,,,,,,3.2e-05,3.1e-05,3.3e-05,-1


In [28]:
df1 = f_get_last_minute_rows(df, rnk = 15)

df1.loc[df1.coin == 'SHIBUSDT', :]

X = mlt.f_prep_df_to_ML(df1.loc[df1.coin == 'SHIBUSDT', :], is_train = False)[0]

X

Unnamed: 0_level_0,vol,tradefreq,last_time,is_day_end,rsi14,rsi28,rsi42,rsi48,rsi72,macd12,signal12,hist12,macd6,signal6,hist6,macd12_ratio,signal12_ratio
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1


In [56]:
X

Unnamed: 0_level_0,vol,tradefreq,last_time,is_day_end,rsi14,rsi28,rsi42,rsi48,rsi72,macd12,signal12,hist12,macd6,signal6,hist6,macd12_ratio,signal12_ratio
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-05-07 20:00:00,1507296000.0,867978.0,0,0,67.285092,65.872928,65.037728,64.713876,63.743638,0.028953,0.030047,-0.001094,0.015661,0.010977,0.004684,0.042723,0.044337


In [21]:
file = open("model_dict.pkl","rb")
model_dict = pickle.load(file)
model_dict.get("DOGEUSDT")[0].predict_proba(X)

array([[0.29072402, 0.10066178, 0.6086142 ]])

In [66]:
from exporter import export
export("Financial_Indicators.ipynb","Financial_Indicators.py")

In [None]:
# export
if __name__ == "__main__":
    print("main")