In [1]:
import asyncio
import aiohttp
import logging
import json
import datetime as dt
import pandas as pd
import requests
import time

# see documentation: https://docs.amberdata.io/reference/reference-getting-started
class AmberData:
    URL = 'https://web3api.io/api'
    WSS = 'wss://ws.web3api.io'
    QUOTES = ['USD', 'USDC', 'USDT']
    INTERVALS = {
        'minutes': dt.timedelta(days=1),
        'hours': dt.timedelta(days=30),
        'days': dt.timedelta(days=365),
        'weeks': dt.timedelta(days=(365 * 10))
    }
    
    def __init__(self):
        self.key = 'UAKeb75c7488ce4acf005daec2655ab6ebb'

    def header(self):
        headers = {
            'x-api-key': self.key
        }
        return headers

    def _request(self, method, path, params=None):
        if params is None:
            params = {}
        try:
            req = requests.request(method=method, url=self.URL + path, params=params, headers=self.header())
            response = req.json()
            return response.get('payload', dict())
        except Exception as e:
            logging.warning('amberdata exception %s %s', e, req)
            print(path, params, 'exception', e, 'waiting 30 seconds...')
            time.sleep(10)
            return self._request(method, path, params)

    @staticmethod
    def flatten_markets(pair_dict):
        flattened = list()
        for pair, pair_data in pair_dict.items():
            for exchange, exchange_data in pair_data.items():
                flattened_data = dict()
                flattened_data['pair'] = pair
                flattened_data['exchange'] = exchange
                for metric, metric_dates in exchange_data.items():
                    for date in metric_dates:
                        if not metric_dates[date]: continue
                        metric_dates[date] = dt.datetime.fromtimestamp(metric_dates[date] / 1e3)
                    flattened_data[metric] = metric_dates
                flattened.append(flattened_data)
        return flattened

    def get_spot_markets(self, pair=None, exchange=None, time_format='ms', include_dates=True):
        params = {
            'includeDates': str(include_dates).lower(),
            'timeFormat': time_format
        }
        if pair:
            params['pair'] = pair.lower()
        if exchange:
            params['exchange'] = exchange.lower()
        data = self._request('GET', '/v2/market/pairs', params=params)
        return self.flatten_markets(data)

    def get_base_spot_markets(self, base=None, exchange=None, time_format='ms'):
        markets = []
        for quote in self.QUOTES:
            quote_markets = self.get_spot_markets(pair=f"{base}_{quote}", exchange=exchange, time_format=time_format)
            markets += quote_markets
        return markets

    def get_oldest_spot_market(self, base, exchange=None):
        markets = self.get_base_spot_markets(base=base, exchange=exchange)
        if not markets:
            markets = self.get_base_spot_markets(base=base, exchange=None)
        if not markets:
            return dict()
        now = dt.datetime.utcnow()
        oldest = None
        for market in markets:
            if 'ohlc' not in market: continue
            if market['ohlc'].get('startDate') is None or market['ohlc']['endDate'] is None: continue
            if now - market['ohlc']['endDate'] > dt.timedelta(hours=1): continue
            if oldest is None or market['ohlc']['startDate'] < oldest['ohlc']['startDate']: oldest = market
        return oldest

    

    @staticmethod
    def flatten_spot_candles(pair, candles):
        if not candles.get('data'): return dict()
        candle_list = list()
        columns = candles['metadata']['columns']
        for exchange, exchange_candles in candles['data'].items():
            for exchange_candle in exchange_candles:
                candle_data = dict(zip(columns, exchange_candle))
                candle_data['pair'] = pair
                candle_data['exchange'] = exchange
                candle_data['dt'] = dt.datetime.fromtimestamp(candle_data['timestamp'] / 1e3)
                candle_list.append(candle_data)
        return candle_list

    def get_interval_spot_candles(self, pair, exchange, interval, start, end=None, time_format='ms'):
        if end is None:
            end = start + self.INTERVALS[interval]
        params = {
            'exchange': exchange,
            'timeFormat': time_format,
            'startDate': start.isoformat(),
            'endDate': min(start + self.INTERVALS[interval], end).isoformat(),
            'timeInterval': interval
        }
        candles = self._request('GET', f'/v2/market/spot/ohlcv/{pair}/historical', params=params)
        return self.flatten_spot_candles(pair, candles)

    def get_spot_candles(self, pair, exchange, interval, start=None, end=None, time_format='ms'):
        end_time = dt.datetime.utcnow() if end is None else end
        start_time = end_time - self.INTERVALS[interval] if start is None else start
        # print(start_time, end_time)
        candles = []
        while start_time <= end_time:
            interval_candles = self.get_interval_spot_candles(pair, exchange, interval, start_time, end_time, time_format)
            candles += interval_candles
            start_time += self.INTERVALS[interval]
        return candles

    @staticmethod
    def flatten_futures_candles(instrument, candles):
        if not candles.get('data'): return dict()
        candle_list = list()
        # print(candles)
        # columns = candles['metadata']['columns']
        for candle_data in candles['data']:
            # for exchange_candle in exchange_candles:
            # candle_data = dict(zip(columns, exchange_candle))
            candle_data['instrument'] = instrument
            # candle_data['exchange'] = exchange
            candle_data['dt'] = dt.datetime.fromtimestamp(candle_data['timestamp'] / 1e3)
            candle_list.append(candle_data)
        return candle_list

    def get_interval_futures_candles(self, instrument, exchange, interval, start, time_format='ms'):
        params = {
            'exchange': exchange,
            'timeFormat': time_format,
            'startDate': start.isoformat(),
            'endDate': (start + self.INTERVALS[interval]).isoformat(),
            'timeInterval': interval
        }
        candles = self._request('GET', f'/v2/market/futures/ohlcv/{instrument}/historical', params=params)
        return self.flatten_futures_candles(instrument, candles)

    def get_futures_candles(self, instrument, exchange, interval, start=None, end=None, time_format='ms'):
        end_time = dt.datetime.utcnow() if end is None else end
        start_time = end_time - self.INTERVALS[interval] if start is None else start

        candles = []
        while start_time <= end_time:
            interval_candles = self.get_interval_futures_candles(instrument, exchange, interval, start_time, time_format)
            candles += interval_candles
            start_time += self.INTERVALS[interval]
        return candles

    @staticmethod
    def flatten_spot_trades(pair, trades):
        if not trades.get('data'): return dict()
        trade_list = list()
        columns = trades['metadata']['columns']
        for trade in trades['data']:
            trade_data = dict(zip(columns, trade))
            trade_data['pair'] = pair
            trade_data['dt'] = dt.datetime.fromtimestamp(trade_data['timestamp'] / 1e3)
            trade_list.append(trade_data)
        return trade_list

    def get_interval_spot_trades(self, pair, exchange, interval, start, time_format='ms', flatten=True):
        if isinstance(interval, str):
            interval_delta = self.INTERVALS[interval]
        else:
            interval_delta = dt.timedelta(minutes=interval)

    def get_spot_price(self, pair, exchange=None):
        params = {}
        if exchange is not None:
            params['exchange'] = exchange
        spot = self._request('GET', f'/v2/market/spot/prices/pairs/{pair}/latest/', params=params)
        spot['dt'] = dt.datetime.fromtimestamp(spot['timestamp'] / 1e3)
        for field in ['price', 'volume']:
            try:
                spot[field] = float(spot[field])
            except Exception as e:
                print(e, type(e), spot)
        return spot

    def get_interval_funding_rates(self, instrument, exchange, interval, start, time_format='ms'):
        params = {
            'exchange': exchange,
            'timeFormat': time_format,
            'startDate': start.isoformat(),
            'endDate': (start + self.INTERVALS[interval]).isoformat(),
            'timeInterval': interval
        }
        data = self._request('GET', f'/v2/market/futures/funding-rates/{instrument}/historical', params=params)
        funding_rates = data['data']
        for rate in funding_rates:
            rate['dt'] = dt.datetime.fromtimestamp(rate['timestamp'] / 1e3)
            rate['instrument'] = instrument
        return funding_rates

    def get_spot_trades(self, pair, exchange, interval, start=None, end=None, time_format='ms'):
        end_time = dt.datetime.utcnow() if end is None else end
        start_time = end_time - self.INTERVALS[interval] if start is None else start

        trades = []
        while start_time <= end_time:
            interval_trades = self.get_interval_spot_trades(pair, exchange, interval, start_time, time_format)
            trades += interval_trades
            start_time += self.INTERVALS[interval]
        return trades

    def get_funding_rates(self, instrument, exchange, interval='hours', start=None, end=None, time_format='ms'):
        end_time = dt.datetime.utcnow() if end is None else end
        start_time = end_time - self.INTERVALS[interval] if start is None else start

        funding_rates = []
        while start_time <= end_time:
            interval_funding_rates = self.get_interval_funding_rates(instrument, exchange, interval, start_time, time_format)
            funding_rates += interval_funding_rates
            start_time += self.INTERVALS[interval]
        return funding_rates

    def get_latest_funding_rates(self, exchange, instrument=None, time_format='ms'):
        params = {
            'timeFormat': time_format
        }
        if instrument is not None:
            params['instrument'] = instrument
        funding_rates = self._request('GET', f'/v2/market/futures/funding-rates/exchange/{exchange}/latest', params=params)
        for rate in funding_rates:
            rate['dt'] = dt.datetime.fromtimestamp(rate['timestamp'] / 1e3)
            rate['exchange'] = exchange
        return funding_rates





In [2]:
import asyncio
import aiohttp
import logging
import json
import datetime as dt
import pandas as pd
import requests
import time
from datetime import datetime
import numpy as np
def create_changePCT(df, shift, column, time):
    df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
    df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
    return df
    

In [3]:
data_start = datetime.now()
amberdata = AmberData()
now = datetime.now()
start = dt.datetime(2018, 1, 1)
end = dt.datetime.combine(dt.date.today(), dt.time.min)

# spot_candles = amberdata.get_spot_candles(pair='btc_usdt', exchange='binance', interval='hours', start=start, end=end)
# spot_candle_df = pd.DataFrame(spot_candles)
# print(spot_candle_df)
ticker = "BTCUSDT"
exchange_in = 'binance'
interval_in = 'hours'
shift_val = 24
futures_candles = amberdata.get_futures_candles(instrument=ticker, exchange=exchange_in, interval=interval_in, start=start, end=end)
futures_candle_df = pd.DataFrame(futures_candles)
print(futures_candle_df)
end = datetime.now()
print('raw data', end-now)
#futures_candle_df['close_forward24'] = futures_candle_df['close'].shift(-1)
futures_candle_df['volume_24HR'] = futures_candle_df['volume'].rolling(24).sum()
# futures_candle_df['volume_24HR$'] = futures_candle_df['volume_24HR'].rolling(24).sum()*futures_candle_df['close']
# futures_candle_df['volume_$'] = futures_candle_df['volume'].rolling(24).sum()*futures_candle_df['close']



futures_candle_df['volume_$'] = futures_candle_df['volume']*futures_candle_df['close']
futures_candle_df['volume_24HR$'] = futures_candle_df['volume_$'].rolling(24).sum()

      exchange      timestamp      open      high       low     close  \
0      binance  1567962000000  10000.00  10000.00  10000.00  10000.00   
1      binance  1567965600000  10000.00  10000.00  10000.00  10000.00   
2      binance  1567969200000  10344.77  10357.53  10337.43  10340.12   
3      binance  1567972800000  10340.12  10368.64  10334.54  10351.42   
4      binance  1567976400000  10351.42  10391.90  10324.77  10391.90   
...        ...            ...       ...       ...       ...       ...   
30281  binance  1676973600000  24797.40  24804.30  24670.20  24699.60   
30282  binance  1676977200000  24699.60  24756.40  24555.00  24572.60   
30283  binance  1676980800000  24572.60  24663.00  24474.80  24634.10   
30284  binance  1676984400000  24634.10  24699.00  24505.60  24568.60   
30285  binance  1676988000000  24568.60  24786.60  24550.20  24686.40   

          volume instrument                  dt  
0          0.002    BTCUSDT 2019-09-08 13:00:00  
1          0.000    BTC

In [4]:
shift_df = futures_candle_df.copy(deep=True).shift(shift_val)
#24hr raw feature changes
futures_candle_df = create_changePCT(futures_candle_df, shift_df, "close", "24HR")
futures_candle_df= create_changePCT(futures_candle_df, shift_df, "open", "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, "high", "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, "low", "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume_24HR', "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume_24HR$', "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume', "24HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume_$', "24HR")
#1hr raw feature changes

shift_df1hr = futures_candle_df.copy(deep=True).shift(1)
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, "close", "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, "open", "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, "high", "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, "low", "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, 'volume_24HR', "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df1hr, 'volume_24HR$', "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume', "1HR")
futures_candle_df = create_changePCT(futures_candle_df, shift_df, 'volume_$', "1HR")

In [5]:
#for prediction 24hr ahead
futures_candle_df['closeChg%_forward24HR'] = futures_candle_df['closeChg%24HR'].shift(-24)
futures_candle_df['closeChg%_forward1HR'] = futures_candle_df['closeChg%1HR'].shift(-1)
#used parkinson and garman-klass
futures_candle_df['hl_log_sqr'] = np.log(futures_candle_df["high"]/futures_candle_df["low"])**2
#used in garman-klass
futures_candle_df['co_log_sqr'] = np.log(futures_candle_df["close"]/futures_candle_df["open"])**2
#used in rogers-satchell
futures_candle_df['hc_log'] = np.log(futures_candle_df["high"]/futures_candle_df["close"])
futures_candle_df['ho_log'] = np.log(futures_candle_df["high"]/futures_candle_df["open"])
futures_candle_df['lc_log'] = np.log(futures_candle_df["low"]/futures_candle_df["close"])
futures_candle_df['lo_log'] = np.log(futures_candle_df["low"]/futures_candle_df["open"])

In [6]:
day = '3D'
futures_candle_df['close_ewm'+day] = futures_candle_df['close'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['open_ewm'+day] = futures_candle_df['open'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['high_ewm'+day] = futures_candle_df['high'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['low_ewm'+day] = futures_candle_df['low'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hl_log_sqr_ewm'+day] = futures_candle_df['hl_log_sqr'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['co_log_sqr_ewm'+day] = futures_candle_df['co_log_sqr'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hc_log_ewm'+day] = futures_candle_df['hc_log'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['ho_log_ewm'+day] = futures_candle_df['ho_log'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lc_log_ewm'+day] = futures_candle_df['lc_log'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lo_log_ewm'+day] = futures_candle_df['lo_log'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_ewm'+day] = futures_candle_df['volume'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR_ewm'+day] = futures_candle_df['volume_24HR'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_$_ewm'+day] = futures_candle_df['volume_$'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR$_ewm'+day] = futures_candle_df['volume_24HR$'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()

day = '7D'
futures_candle_df['close_ewm'+day] = futures_candle_df['close'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['open_ewm'+day] = futures_candle_df['open'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['high_ewm'+day] = futures_candle_df['high'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['low_ewm'+day] = futures_candle_df['low'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hl_log_sqr_ewm'+day] = futures_candle_df['hl_log_sqr'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['co_log_sqr_ewm'+day] = futures_candle_df['co_log_sqr'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hc_log_ewm'+day] = futures_candle_df['hc_log'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['ho_log_ewm'+day] = futures_candle_df['ho_log'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lc_log_ewm'+day] = futures_candle_df['lc_log'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lo_log_ewm'+day] = futures_candle_df['lo_log'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_ewm'+day] = futures_candle_df['volume'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR_ewm'+day] = futures_candle_df['volume_24HR'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_$_ewm'+day] = futures_candle_df['volume_$'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR$_ewm'+day] = futures_candle_df['volume_24HR$'].ewm(halflife='3 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()



day = '21D'
futures_candle_df['close_ewm'+day] = futures_candle_df['close'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['open_ewm'+day] = futures_candle_df['open'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['high_ewm'+day] = futures_candle_df['high'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['low_ewm'+day] = futures_candle_df['low'].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hl_log_sqr_ewm'+day] = futures_candle_df['hl_log_sqr'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['co_log_sqr_ewm'+day] = futures_candle_df['co_log_sqr'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['hc_log_ewm'+day] = futures_candle_df['hc_log'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['ho_log_ewm'+day] = futures_candle_df['ho_log'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lc_log_ewm'+day] = futures_candle_df['lc_log'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['lo_log_ewm'+day] = futures_candle_df['lo_log'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_ewm'+day] = futures_candle_df['volume'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR_ewm'+day] = futures_candle_df['volume_24HR'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_$_ewm'+day] = futures_candle_df['volume_$'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
futures_candle_df['volume_24HR$_ewm'+day] = futures_candle_df['volume_24HR$'].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()

In [7]:
#%% make ewm % changes
shifts = [1, 24]
for col in futures_candle_df.columns:
    if "_ewm" in col:
        for s in shifts:
            shift_df_temp = futures_candle_df.copy(deep=True).shift(s)
            futures_candle_df = create_changePCT(futures_candle_df, shift_df_temp, col, str(s)+"HR")
            
            
futures_candle_df.dropna(inplace=True)
futures_candle_df.reset_index(drop = True, inplace=True)  

  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+ti

In [8]:
vol_windows = [1*24,3*24,5*24,7*24, 15*24, 30*24, 60*24]
for x in vol_windows:
    #make close2close
    futures_candle_df['vol_c2c_'+str(x/24)+"D"] = (futures_candle_df['closeChg%24HR'].rolling(x).std()*((365)**.5))*100

    
    
    #make parkinsonConsole 9
    futures_candle_df['vol_park_'+str(x/24)+"D"] = (futures_candle_df['hl_log_sqr'].rolling(x).sum()*(1.0 / (4.0*(x)* np.log(2.0))))**.5
    futures_candle_df['vol_park_'+str(x/24)+"D"] = (futures_candle_df['vol_park_'+str(x/24)+"D"]*((365*24)**.5))*100
    
    
    
    #make garman-klass
    mult_one= 1/(2*x)
    mult_two = (2*np.log(2)-1)/x
    futures_candle_df['vol_garman_'+str(x/24)+"D"] = futures_candle_df['hl_log_sqr']-mult_two*futures_candle_df['co_log_sqr']
    futures_candle_df['vol_garman_'+str(x/24)+"D"] = (mult_one*futures_candle_df['vol_garman_'+str(x/24)+"D"].rolling(x).sum())**.5
    futures_candle_df['vol_garman_'+str(x/24)+"D"] = (futures_candle_df['vol_garman_'+str(x/24)+"D"]*((365*24)**.5))*100
    #rogers-stachell
    mult_one = 1/x
    futures_candle_df['vol_rogers_'+str(x/24)+"D"] = (futures_candle_df['hc_log']*futures_candle_df['ho_log'])+ (futures_candle_df['lc_log']*futures_candle_df['lo_log'])
    futures_candle_df['vol_rogers_'+str(x/24)+"D"] = (mult_one*futures_candle_df['vol_rogers_'+str(x/24)+"D"].rolling(x).sum())**.5
    futures_candle_df['vol_rogers_'+str(x/24)+"D"] = (futures_candle_df['vol_rogers_'+str(x/24)+"D"]*((365*24)**.5))*100


    #make EWM base don halflives
    futures_candle_df['vol_c2c_'+str(x/24)+"D"+"_ewm7D"] = futures_candle_df['vol_c2c_'+str(x/24)+"D"].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_c2c_'+str(x/24)+"D"+"_ewm21D"] = futures_candle_df['vol_c2c_'+str(x/24)+"D"].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_park_'+str(x/24)+"D"+"_ewm7D"] = futures_candle_df['vol_park_'+str(x/24)+"D"].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_park_'+str(x/24)+"D"+"_ewm21D"] = futures_candle_df['vol_park_'+str(x/24)+"D"].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_garman_'+str(x/24)+"D"+"_ewm7D"] = futures_candle_df['vol_garman_'+str(x/24)+"D"].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_garman_'+str(x/24)+"D"+"_ewm21D"] = futures_candle_df['vol_garman_'+str(x/24)+"D"].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_rogers_'+str(x/24)+"D"+"_ewm7D"] = futures_candle_df['vol_rogers_'+str(x/24)+"D"].ewm(halflife='7 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
    futures_candle_df['vol_rogers_'+str(x/24)+"D"+"_ewm21D"] = futures_candle_df['vol_rogers_'+str(x/24)+"D"].ewm(halflife='21 days', times=pd.DatetimeIndex(futures_candle_df['dt'])).mean()
#24hr and 1hr changes  
shifts = [1, 24]
for col in futures_candle_df.columns:
    if "vol_" in col:
        for s in shifts:
            shift_df_temp = futures_candle_df.copy(deep=True).shift(s)
            futures_candle_df = create_changePCT(futures_candle_df, shift_df_temp, col, str(s)+"HR")   
 


  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Chg%"+time] = df[column+"Chg"+time]/shift[column]
  df[column+"Chg"+time] = (futures_candle_df[column]-shift[column])
  df[column+"Ch

In [9]:
futures_candle_df['closeChg%_forward24HR'] = futures_candle_df['closeChg%24HR'].shift(-24)
futures_candle_df['closeChg%_forward1HR'] = futures_candle_df['closeChg%1HR'].shift(-1)
#get up down or chop based on sdev
std_24 = futures_candle_df['closeChg%_forward24HR'].std()
std_1 = futures_candle_df['closeChg%_forward1HR'].std()
for index, row in futures_candle_df.iterrows():
    if abs(row['closeChg%_forward24HR'])>=  std_24:
        if row['closeChg%_forward24HR'] > 0:
            futures_candle_df.at[index,'UpDownPred24HR' ] =1
        else:
            futures_candle_df.at[index,'UpDownPred24HR' ] =-1
    else:
        futures_candle_df.at[index,'UpDownPred24HR' ] =0
    if abs(row['closeChg%_forward1HR'])>=  std_1:
        if row['closeChg%_forward1HR'] > 0:
            futures_candle_df.at[index,'UpDownPred1HR' ] =1
        else:
            futures_candle_df.at[index,'UpDownPred1HR' ] =-1
    else:
        futures_candle_df.at[index,'UpDownPred1HR' ] =0



futures_candle_df.dropna(inplace=True)
futures_candle_df.reset_index(drop = True, inplace=True)  

In [11]:
from pycaret.regression import *
end2 = datetime.now()

bad_cols = ['dt', 'exchange', 'timestamp', 'instrument', 'closeChg%_forward1HR']
#select 100 features
number_columns = 100/(len(futures_candle_df.columns))
a=setup(futures_candle_df,target='closeChg%_forward24HR',
        ignore_features=['dt', 'exchange', 'timestamp', 'instrument', 'closeChg%_forward1HR',  'UpDownPred1HR', 'UpDownPred24HR' ],session_id=11,
        profile=False,  use_gpu=True,  normalize = True,  remove_multicollinearity=True, n_features_to_select=number_columns);



end_setup = datetime.now()
#get best models
models = compare_models(exclude = ['tr', 'lightgbm'],turbo=True, n_select =4)
#pull error df
model_df = pull()
#get top 4 models to train
best_models = model_df.iloc[0:4]
#train best models


end_final = datetime.now()

print('compare_modles', (end_class-end_setup_class))
print('model compare', (end_final-end_setup))
print('model_setup', (end_setup-end2))
print('model compare', (end_final-end_setup))

print('total_time', (end_final-end))
model_lst = []

print(best_models)

[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recomp

Unnamed: 0,Description,Value
0,Session id,11
1,Target,closeChg%_forward24HR
2,Target type,Regression
3,Original data shape,"(28728, 684)"
4,Transformed data shape,"(28728, 113)"
5,Transformed train set shape,"(20109, 113)"
6,Transformed test set shape,"(8619, 113)"
7,Ignore features,7
8,Numeric features,676
9,Preprocess,True


[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1
[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_GPU=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDA=1


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,0.0066,0.0001,0.0102,0.9247,0.0091,1.0931,2.402
rf,Random Forest Regressor,0.0081,0.0001,0.0122,0.8913,0.0111,1.1422,10.363
dt,Decision Tree Regressor,0.0114,0.0004,0.0192,0.7292,0.0147,2.5175,2.951
knn,K Neighbors Regressor,0.0136,0.0005,0.022,0.6457,0.018,2.1404,0.613
ada,AdaBoost Regressor,0.0254,0.0012,0.0352,0.0974,0.0301,2.5072,9.954
ridge,Ridge Regression,0.0251,0.0013,0.0363,0.0422,0.0312,2.3926,0.37
lr,Linear Regression,0.0251,0.0013,0.0363,0.0417,0.0312,2.4011,10.988
br,Bayesian Ridge,0.0251,0.0013,0.0363,0.0402,0.0316,2.1969,0.45
huber,Huber Regressor,0.0249,0.0013,0.0365,0.0299,0.0317,2.1159,1.612
omp,Orthogonal Matching Pursuit,0.0253,0.0014,0.0368,0.0118,0.0328,1.6958,0.353


Processing:   0%|          | 0/80 [00:00<?, ?it/s]

KeyboardInterrupt: 