In [4]:
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

import os, sys
from time import time

from pathlib import Path
import numpy as np
import pandas as pd
import pandas_datareader.data as web

import statsmodels.api as sm
from sklearn.feature_selection import mutual_info_regression
from sklearn.preprocessing import scale
import lightgbm as lgb
from scipy.stats import spearmanr
from tqdm import tqdm
import shap

import matplotlib.pyplot as plt
import seaborn as sns


from datetime import datetime, timedelta

import talib

from tumbler.constant import Interval
from tumbler.service.mysql_service import MysqlService
from tumbler.object import BarData, FactorData

use_start_time = datetime(2017, 1, 1)
use_end_time = datetime.now() + timedelta(hours=10)


def create_df(suffix="_usdt"):
    global use_start_time, use_end_time
    mysql_service_manager = MysqlService.get_mysql_service()
    symbols = mysql_service_manager.get_mysql_distinct_symbol(table=MysqlService.get_kline_table(Interval.DAY.value))
    symbols = [x for x in symbols if
                   x.endswith(suffix) and not x.endswith("down_usdt")
                   and not x.endswith("up_usdt") and not x.endswith("bear_usdt") 
                   and not x in ["drep_usdt", "cocos_usdt"]]
    bars = mysql_service_manager.get_bars(symbols=[], period=Interval.DAY.value,
                                          start_datetime=use_start_time,
                                          end_datetime=use_end_time,
                                          sort_way="symbol")

    bars = BarData.suffix_filter(bars, suffix="_usdt")
    bars.sort()

    pd_data = BarData.get_pandas_from_bars(bars)
    pd_data = pd_data.set_index(["symbol", "datetime"]).sort_index().reset_index()
    
    return pd_data


def make_target(df, num_day_rise=1):
    df["target"] = df.groupby(by=['symbol']).apply(
        lambda x: pd.DataFrame(pd.Series(talib.ROC(x.close, num_day_rise)).shift(-1 * num_day_rise), index=x.index)
    )
    return df


In [5]:
df = create_df()
print(df)

df = make_target(df, num_day_rise=1)
print(df)

            symbol            datetime exchange    open    high     low  \
0       1inch_usdt 2020-12-25 08:00:00  BINANCE  0.2000  3.0885  0.2000   
1       1inch_usdt 2020-12-26 08:00:00  BINANCE  2.2958  2.4609  1.5717   
2       1inch_usdt 2020-12-27 08:00:00  BINANCE  1.5970  1.6516  1.0360   
3       1inch_usdt 2020-12-28 08:00:00  BINANCE  1.0600  1.2787  1.0353   
4       1inch_usdt 2020-12-29 08:00:00  BINANCE  1.1140  1.1166  0.7541   
...            ...                 ...      ...     ...     ...     ...   
139341    zrx_usdt 2021-10-17 08:00:00  BINANCE  1.0034  1.0245  0.9541   
139342    zrx_usdt 2021-10-18 08:00:00  BINANCE  0.9903  0.9981  0.9501   
139343    zrx_usdt 2021-10-19 08:00:00  BINANCE  0.9692  0.9873  0.9423   
139344    zrx_usdt 2021-10-20 08:00:00  BINANCE  0.9659  1.0057  0.9553   
139345    zrx_usdt 2021-10-21 08:00:00  BINANCE  1.0045  1.0348  0.9941   

         close        volume  
0       2.2954  1.259776e+08  
1       1.5967  5.703970e+07  
2     

In [6]:
df.to_csv("day_coins_analyse.csv")