In [1]:
from pathlib import Path
import polars as pl
import ind_pl as ind
from pprint import pprint
from continuous import components

v_bands length: 64


In [2]:
def choose_by_length(minimum: int|str=27500, maximum:int|str=420000):
    """checks the length of ohlc history of each trading pair, then makes a list of all pairs whos history length falls
    within the stated range"""

    lengths = {'1 month': 8750, '2 months': 17500, '3 months': 26250, '6 months': 52500,
               '1 year': 105000, '2 years': 210000, '3 years': 315000, '4 years': 420000}

    if isinstance(minimum, str):
        minimum = lengths[minimum]
    if isinstance(maximum, str):
        maximum = lengths[maximum]

    info = {}
    data_path = Path("/home/ross/coding/modular_trader/bin_ohlc_5m")
    for pair_path in list(data_path.glob('*')):
        df = pl.read_parquet(pair_path)
        info[pair_path.stem] = len(df)

    return [p for p, v in info.items() if minimum < v <= maximum]

def resample(df, timeframe):
    df = df.sort('timestamp').set_sorted('timestamp')
    
    df = (df.group_by_dynamic(pl.col('timestamp'), every=timeframe).agg(
        pl.first('open'),
        pl.max('high'),
        pl.min('low'),
        pl.last('close'),
        pl.sum('base_vol'),
        pl.sum('quote_vol'),
        pl.sum('num_trades'),
        pl.sum('taker_buy_base_vol'),
        pl.sum('taker_buy_quote_vol'),
        ))

    df = df.sort('timestamp')

def top_heavy(df: pl.DataFrame) -> pl.DataFrame:
    """calculates my 'top heavy' metric, which is a ratio representing the balance of total historic
    volume below the current price relative to total historic volume above the current price"""
    
    current_price = df.item(-1, 'close')
    
    above = df.filter(pl.col('close') > current_price)['base_vol'].sum()
    below = df.filter(pl.col('close') < current_price)['base_vol'].sum()
    
    return above, below, above / below

    

In [3]:
# top_heavy refers to an analysis i just thought up: is there more historic volume above or below current price? 
# this should be calculated as total volume above current price / total volume below

# to analyse the effect of daily/weekly rsi on a pair's suitablility for selection, i could go through ohlc history calculating rolling sharpe alongside rsi, then calculate the forward-looking difference between rsi values and rsi values 1 week/month ahead (so the diff in each period is a projection of how the rsi will change over the coming week/month), then compare each diff to it's corresponding sharpe value to see if there is a 'good' range of rsi values to select for (is it better to select overbought pairs, oversold pairs, or something else). it might even turn out to be a combination of daily and weekly that is most useful, like if one is high and the other is low then good, but if both high or both low then bad

# it seems that the shortest history that works with the ichi trend strategy is 27500 5min periods

idea: an allocator function that chooses what proportion of capital to allocate to trend-following vs mean-reversion. calculate a price channel based on a rolling window, then analyse where in the channel price spends most of the time. my guess is during trending regimes price will mostly be at the edges of the channel, and during ranging conditions price will spend most of the time in the middle of the channel. 
so an indicator could be created by calculating the distance from the middle of the channel to the (smoothed) current price, so if price is close to the middle, more weight is given to the mean-reverting forecasts, and if price is out at the extremes, more weight is given to the trend-following forecasts. 

i could also investigate whether vwma can tell me anything useful here, like if price is at the edge of the channel and 25h vwma is further out than 25h ema then continuation is more likely, but if ema is further out than vwma then mean-reversion is more likely etc. i could investigate this same idea with rsi too, ie when rsi is t the extremes, can the relationship between vwma and a non-volume ma have any predictive power about continuation or reversion? 

this will probably hurt overall profitability but might improve sharpe ratio.

i could even use machine learning here maybe, by looking at the channel position at time t, t-1, t-2, t-3 etc, can i get a solid probability on t+1's price action being either continuation or reversal? could vwma/ema ratio be a useful feature here? or volume delta? or volume z-score? or num_trades z-score?

In [30]:
info = {
    'pair': [],
    'length': [],
    'daily_volume': [],
    'weekly_volume': [],
    'daily_volume_change': [],
    'daily_atr': [],
    # 'weekly_rsi': [],
    # 'monthly_rsi': [],
    'volume_above_pw': [],  # historic volume above the current price, in multiples of current weekly volume
    'volume_below_pw': [],  # historic volume below the current price, in multiples of current weekly volume
    'top_heavy': [],
    # 'sharpe': [],  # this will have to be backtested
    # 'mcap': [],  # this will have to come from coingecko
}

pairs = choose_by_length()

for pair in pairs:
    pair_path = Path(f"/home/ross/coding/modular_trader/bin_ohlc_5m/{pair}.parquet")
    try:
        df = pl.read_parquet(pair_path)
    except FileNotFoundError:
        continue
    weekly_df = resample(df, '1w')
    monthly_df = resample(df, '1m')
    one_day_volume = df['quote_vol'].tail(288).sum()
    seven_day_volume = df['quote_vol'].tail(2016).sum()
    daily_volume_change = df['quote_vol'].ewm_mean(span=288).pct_change(288)[-1]
    daily_atr = ind.atr(df, 288).item(-1, 'atr_288')
    info['pair'].append(pair)
    info['length'].append(len(df))
    info['daily_volume'].append(one_day_volume)
    info['weekly_volume'].append(seven_day_volume)
    info['daily_volume_change'].append(daily_volume_change)
    info['daily_atr'].append(daily_atr)
    # info['weekly_rsi'].append(weekly_df['close'])
    # info['monthly_rsi'].append(monthly_df['close'])
    info['volume_above_pw'].append(top_heavy(df)[0] / seven_day_volume)
    info['volume_below_pw'].append(top_heavy(df)[1] / seven_day_volume)
    info['top_heavy'].append(top_heavy(df)[2])

info_df = pl.from_dict(info)

In [31]:
info_df.describe()

statistic,pair,length,daily_volume,weekly_volume,daily_volume_change,daily_atr,volume_above_pw,volume_below_pw,top_heavy
str,str,f64,f64,f64,f64,f64,f64,f64,f64
"""count""","""359""",359.0,359.0,359.0,359.0,359.0,359.0,359.0,359.0
"""null_count""","""0""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",,294807.401114,22136000.0,152190000.0,0.137095,0.003141,353419.006681,476867.14165,5.552233
"""std""",,118734.529107,116950000.0,766010000.0,2.158015,0.001651,5711200.0,7888200.0,35.192217
"""min""","""1000SATSUSDT""",28202.0,450886.726338,6087900.0,-0.716607,0.000123,0.000128,0.000474,0.010899
"""25%""",,239914.0,1410100.0,12137000.0,-0.316766,0.002201,27.056751,18.648782,0.592877
"""50%""",,318412.0,2606100.0,21156000.0,-0.132067,0.002768,203.883403,131.653572,1.182611
"""75%""",,400011.0,7859700.0,61788000.0,0.062021,0.003487,1308.777943,971.417585,3.316763
"""max""","""ZRXUSDT""",420000.0,1857600000.0,12050000000.0,32.937801,0.015561,107860000.0,149080000.0,641.416838


In [37]:
lively_pairs = info_df.filter(
    pl.col('daily_atr').rank(descending=True).lt(30),
    pl.col('daily_volume').gt(2_500_000),
    pl.col('length').gt(27_500),
)
                
# calculate correlation matrix for all pairs in lively pairs, and record avg correlation as a new stat for each pair

(
    lively_pairs
    .sort('avg_correlation', descending=True)
    .tail(12)['pair']
    .to_list()
)

['PHBUSDT',
 'MOVRUSDT',
 'ARKMUSDT',
 'JTOUSDT',
 'HIGHUSDT',
 'RNDRUSDT',
 'UMAUSDT',
 'PEOPLEUSDT',
 'TRBUSDT',
 'FTTUSDT',
 'FRONTUSDT',
 'KMDUSDT']

In [24]:
dead_pairs = info_df.filter(
    pl.col('volume_above_pw').rank(descending=True) < 20,  # select the 10 pairs with the highest volume_above
    pl.col('top_heavy') > 1
)

likely_shorts = info_df.filter(
    pl.col('volume_above_pw').rank(descending=True) < 20,  # select the 10 pairs with the highest volume_above
    pl.col('top_heavy') < 1
)

In [31]:
dead_pairs

pair,length,daily_volume,weekly_volume,volume_above_pw,volume_below_pw,top_heavy
str,i64,f64,f64,f64,f64,f64
"""MBLUSDT""",420000,1959300.0,23960000.0,20245.136424,17680.237775,1.145072
"""FUNUSDT""",420000,1779000.0,16734000.0,15742.963768,3736.336984,4.213475
"""TROYUSDT""",420000,2717500.0,27461000.0,24187.215055,4544.342496,5.322489
"""1000SATSUSDT""",28514,53604000.0,517280000.0,28727.449875,3051.346916,9.414678
"""WINUSDT""",420000,3257200.0,30805000.0,1623400.0,571936.668037,2.838421
"""HOTUSDT""",420000,8692200.0,113620000.0,27177.378656,20668.197702,1.314937
"""REEFUSDT""",338837,6114800.0,62939000.0,19231.562513,8750.034054,2.197884
"""DENTUSDT""",420000,4861700.0,45726000.0,89219.293618,49981.063421,1.785062
"""LUNCUSDT""",160738,27282000.0,285720000.0,193601.602484,163678.30882,1.182818
"""EPXUSDT""",192178,1276800.0,18946000.0,93991.759347,92892.77861,1.011831


In [32]:
likely_shorts

pair,length,daily_volume,weekly_volume,volume_above_pw,volume_below_pw,top_heavy
str,i64,f64,f64,f64,f64,f64
"""PEPEUSDT""",92090,622350000.0,5173500000.0,244054.769302,1921200.0,0.12703
"""SHIBUSDT""",300880,403740000.0,3320100000.0,1093700.0,2339100.0,0.467576
"""VTHOUSDT""",386247,2835100.0,21915000.0,27299.943938,32200.924105,0.8478
"""SPELLUSDT""",235330,4417400.0,52628000.0,21975.821573,52943.465335,0.415081
"""XECUSDT""",267538,10516000.0,81596000.0,604733.618449,727384.301537,0.831381
"""BONKUSDT""",27698,83368000.0,1162900000.0,152789.566044,266073.716147,0.574238
"""SUNUSDT""",366027,3058700.0,17495000.0,13501.500557,14016.954354,0.963226
"""BTTCUSDT""",226162,9259800.0,70179000.0,16009000.0,29953000.0,0.534464


In [33]:
import continuous.components as components
z = 20
markets = 
trader = components.Trader(markets, '3 years', 'perf', False, 3, live=False)
trader.run_backtests(plot_pnls=True, window='2 years')


Trader initialised at 19/03/2024 15:37:34
