In [1]:
import nb_setup 
import importlib
importlib.reload(nb_setup)
nb_setup.init()

Added project root to sys.path: /Users/falcon/Developments/act/backtesting-playground/trading
Changed current working directory to: /Users/falcon/Developments/act/backtesting-playground/trading
Initialized project with base directory: /Users/falcon/Developments/act/backtesting-playground/trading


In [34]:
import numpy as np
import pandas as pd

def rank_column(df, column, ascending=False):
    _df = df.copy()
    col_name = f"rank_{column}"
    if col_name not in _df.columns:
        _df[col_name] = np.nan
    _df.loc[_df[col_name].isna(), col_name] = _df.groupby("start_time")[column].rank(
        ascending=ascending
    )
    return _df    

def prepare_data(data: pd.DataFrame):
    """
    Prepare the data for the strategy
    """
    data = data.copy()
    window_size = 7 * 24 * 4  # Adjust this based on your exact data frequency
    prep_col = [
        "pct_change",
        "rolling_accumulated_pct_change",
        "rolling_variance_pct_change",
        "rank",
    ]
    for col in prep_col:
        if col not in data.columns:
            data[col] = np.nan
    ### pct chage
    data.loc[data["pct_change"].isna(), "pct_change"] = data.groupby(level=0)[
        "close"
    ].pct_change()
    ### rolling_acc_pct_change
    data.loc[
        data["rolling_accumulated_pct_change"].isna(),
        "rolling_accumulated_pct_change",
    ] = data.groupby(level=0)["pct_change"].transform(
        lambda x: (
            x.rolling(window=window_size, min_periods=1).apply(
                lambda y: np.prod(1 + y / 100)
            )
            - 1
        )
        * 100
    )
    data.loc[
        data["rolling_variance_pct_change"].isna(), "rolling_variance_pct_change"
    ] = data.groupby(level=0)["pct_change"].transform(
        lambda x: x.rolling(window=window_size, min_periods=1).var()
    )
    data = rank_column(data, "rolling_accumulated_pct_change", ascending=False)
    data = rank_column(data, "rolling_variance_pct_change", ascending=True)
    data.loc[data["rank"].isna(), "rank"] = (
        data["rank_rolling_accumulated_pct_change"]
        + data["rank_rolling_variance_pct_change"]
    ) / 2
    return data

In [35]:
from settings import DATA_DIR

df = pd.read_csv(DATA_DIR / "all.csv", parse_dates=[1, 2])
df.set_index(["symbol", "start_time"], inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,interval,number_of_trades,close,high,low,open,volume,tic,toc
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
BTC,2024-03-12 00:00:00,2024-03-12 00:14:59,15m,740,72253.0,72389.0,72129.0,72155.0,73.59512,1710201600000,1710202499999
BTC,2024-03-12 00:15:00,2024-03-12 00:29:59,15m,544,72189.0,72360.0,72177.0,72254.0,33.93085,1710202500000,1710203399999
BTC,2024-03-12 00:30:00,2024-03-12 00:44:59,15m,441,72351.0,72411.0,72182.0,72188.0,38.35377,1710203400000,1710204299999
BTC,2024-03-12 00:45:00,2024-03-12 00:59:59,15m,666,72077.0,72351.0,72063.0,72350.0,39.48791,1710204300000,1710205199999
BTC,2024-03-12 01:00:00,2024-03-12 01:14:59,15m,735,72329.0,72360.0,71970.0,72076.0,74.58844,1710205200000,1710206099999


In [36]:
ranked_df = prepare_data(df)


In [37]:
ranked_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,interval,number_of_trades,close,high,low,open,volume,tic,toc,pct_change,rolling_accumulated_pct_change,rolling_variance_pct_change,rank,rank_rolling_accumulated_pct_change,rank_rolling_variance_pct_change
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
BTC,2024-03-12 00:00:00,2024-03-12 00:14:59,15m,740,72253.0,72389.0,72129.0,72155.0,73.59512,1710201600000,1710202499999,,,,,,
BTC,2024-03-12 00:15:00,2024-03-12 00:29:59,15m,544,72189.0,72360.0,72177.0,72254.0,33.93085,1710202500000,1710203399999,-0.000886,-0.000886,,,44.0,
BTC,2024-03-12 00:30:00,2024-03-12 00:44:59,15m,441,72351.0,72411.0,72182.0,72188.0,38.35377,1710203400000,1710204299999,0.002244,0.001358,5e-06,26.0,33.0,19.0
BTC,2024-03-12 00:45:00,2024-03-12 00:59:59,15m,666,72077.0,72351.0,72063.0,72350.0,39.48791,1710204300000,1710205199999,-0.003787,-0.002429,9e-06,17.5,25.0,10.0
BTC,2024-03-12 01:00:00,2024-03-12 01:14:59,15m,735,72329.0,72360.0,71970.0,72076.0,74.58844,1710205200000,1710206099999,0.003496,0.001067,1.1e-05,16.0,21.0,11.0


In [38]:
def get_non_volatile_coins(df):
    # Group by the 'symbol' level of the MultiIndex and get the index of the max 'start_time' for each 'symbol'
    idx = df.groupby(level='symbol')['end_time'].idxmax()

    # Select the rows with the latest 'end_time' for each 'symbol'
    latest_df = df.loc[idx]

    # Assuming 'rank' is a column in your DataFrame, sort these rows by 'rank' in ascending order
    sorted_latest_df = latest_df.sort_values(by='rank_rolling_variance_pct_change', ascending=True)

    print("dropped:", sorted_latest_df.index.get_level_values('symbol').tolist()[-10:])
    return sorted_latest_df.index.get_level_values('symbol').tolist()[:-10]

good_coins = get_non_volatile_coins(ranked_df)
len(good_coins)

dropped: ['ONDO', 'TAO', 'ALT', 'FTM', 'NEO', 'ORDI', 'JTO', 'WIF', 'PENDLE', 'W']


48

In [39]:
ranked_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 167098 entries, ('BTC', Timestamp('2024-03-12 00:00:00')) to ('W', Timestamp('2024-04-11 00:00:00'))
Data columns (total 16 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   end_time                             167098 non-null  datetime64[ns]
 1   interval                             167098 non-null  object        
 2   number_of_trades                     167098 non-null  int64         
 3   close                                167098 non-null  float64       
 4   high                                 167098 non-null  float64       
 5   low                                  167098 non-null  float64       
 6   open                                 167098 non-null  float64       
 7   volume                               167098 non-null  float64       
 8   tic                                  167098 non-null  int64         
 9

In [43]:
cols = [
    "pct_change",
    "rolling_accumulated_pct_change",
    "rank_rolling_accumulated_pct_change",
    "rolling_variance_pct_change",
    "rank_rolling_variance_pct_change",
    "rank",
]
non_volatile_reranked_df = ranked_df.loc[good_coins].drop(cols, axis=1)
non_volatile_reranked_df = prepare_data(non_volatile_reranked_df)

In [44]:
non_volatile_reranked_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,interval,number_of_trades,close,high,low,open,volume,tic,toc,pct_change,rolling_accumulated_pct_change,rolling_variance_pct_change,rank,rank_rolling_accumulated_pct_change,rank_rolling_variance_pct_change
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
TRX,2024-03-12 00:00:00,2024-03-12 00:14:59,15m,26,0.1339,0.13394,0.13384,0.13385,208789.0,1710201600000,1710202499999,,,,,,
TRX,2024-03-12 00:15:00,2024-03-12 00:29:59,15m,20,0.13395,0.13397,0.1339,0.1339,3276.0,1710202500000,1710203399999,0.000373,0.000373,,,31.0,
TRX,2024-03-12 00:30:00,2024-03-12 00:44:59,15m,21,0.13401,0.13404,0.13394,0.13394,3400.0,1710203400000,1710204299999,0.000448,0.000821,2.776268e-09,16.0,30.0,2.0
TRX,2024-03-12 00:45:00,2024-03-12 00:59:59,15m,32,0.13388,0.13402,0.13388,0.13402,134846.0,1710204300000,1710205199999,-0.00097,-0.000149,6.36876e-07,8.5,16.0,1.0
TRX,2024-03-12 01:00:00,2024-03-12 01:14:59,15m,59,0.13391,0.13393,0.13379,0.13388,479736.0,1710205200000,1710206099999,0.000224,7.5e-05,4.433064e-07,10.0,19.0,1.0


In [45]:
non_volatile_reranked_df.index.get_level_values('symbol').nunique()

48

In [46]:
non_volatile_reranked_df.to_csv(DATA_DIR / "ranked.csv")

In [21]:
non_volatile_reranked_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,interval,number_of_trades,close,high,low,open,volume,tic,toc,pct_change,rolling_accumulated_pct_change,rolling_variance_pct_change,rank,rank_rolling_accumulated_pct_change,rank_rolling_variance_pct_change
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
TRX,2024-03-12 00:00:00,2024-03-12 00:14:59,15m,26,0.1339,0.13394,0.13384,0.13385,208789.0,1710201600000,1710202499999,,,,,,
TRX,2024-03-12 00:15:00,2024-03-12 00:29:59,15m,20,0.13395,0.13397,0.1339,0.1339,3276.0,1710202500000,1710203399999,0.000373,0.000373,,,31.0,
TRX,2024-03-12 00:30:00,2024-03-12 00:44:59,15m,21,0.13401,0.13404,0.13394,0.13394,3400.0,1710203400000,1710204299999,0.000448,0.000821,2.776268e-09,16.0,30.0,2.0
TRX,2024-03-12 00:45:00,2024-03-12 00:59:59,15m,32,0.13388,0.13402,0.13388,0.13402,134846.0,1710204300000,1710205199999,-0.00097,-0.000149,6.36876e-07,8.5,16.0,1.0
TRX,2024-03-12 01:00:00,2024-03-12 01:14:59,15m,59,0.13391,0.13393,0.13379,0.13388,479736.0,1710205200000,1710206099999,0.000224,7.5e-05,4.433064e-07,10.0,19.0,1.0


In [47]:
non_volatile_reranked_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 138288 entries, ('TRX', Timestamp('2024-03-12 00:00:00')) to ('JUP', Timestamp('2024-04-11 00:00:00'))
Data columns (total 16 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   end_time                             138288 non-null  datetime64[ns]
 1   interval                             138288 non-null  object        
 2   number_of_trades                     138288 non-null  int64         
 3   close                                138288 non-null  float64       
 4   high                                 138288 non-null  float64       
 5   low                                  138288 non-null  float64       
 6   open                                 138288 non-null  float64       
 7   volume                               138288 non-null  float64       
 8   tic                                  138288 non-null  int64         


In [48]:
# Group by the 'symbol' level of the MultiIndex and get the index of the max 'start_time' for each 'symbol'
idx = non_volatile_reranked_df.groupby(level='symbol')['end_time'].idxmax()

# Select the rows with the latest 'end_time' for each 'symbol'
latest_df = non_volatile_reranked_df.loc[idx]

# Assuming 'rank' is a column in your DataFrame, sort these rows by 'rank' in ascending order
sorted_latest_df = latest_df.sort_values(by='rank', ascending=True)


In [49]:
sorted_latest_df.loc[:, [ "end_time", "rank"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,rank
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1
BNB,2024-04-11,2024-04-11 00:14:59,4.5
BTC,2024-04-11,2024-04-11 00:14:59,5.5
ETH,2024-04-11,2024-04-11 00:14:59,6.5
XRP,2024-04-11,2024-04-11 00:14:59,9.5
TRX,2024-04-11,2024-04-11 00:14:59,10.5
ADA,2024-04-11,2024-04-11 00:14:59,13.0
ATOM,2024-04-11,2024-04-11 00:14:59,14.0
kSHIB,2024-04-11,2024-04-11 00:14:59,15.0
DYDX,2024-04-11,2024-04-11 00:14:59,16.5
DOT,2024-04-11,2024-04-11 00:14:59,17.0


In [50]:
sorted_latest_df = sorted_latest_df.loc[:, [ "end_time", "rank"]]
sorted_latest_df

Unnamed: 0_level_0,Unnamed: 1_level_0,end_time,rank
symbol,start_time,Unnamed: 2_level_1,Unnamed: 3_level_1
BNB,2024-04-11,2024-04-11 00:14:59,4.5
BTC,2024-04-11,2024-04-11 00:14:59,5.5
ETH,2024-04-11,2024-04-11 00:14:59,6.5
XRP,2024-04-11,2024-04-11 00:14:59,9.5
TRX,2024-04-11,2024-04-11 00:14:59,10.5
ADA,2024-04-11,2024-04-11 00:14:59,13.0
ATOM,2024-04-11,2024-04-11 00:14:59,14.0
kSHIB,2024-04-11,2024-04-11 00:14:59,15.0
DYDX,2024-04-11,2024-04-11 00:14:59,16.5
DOT,2024-04-11,2024-04-11 00:14:59,17.0


In [51]:
sorted_latest_df.to_csv(DATA_DIR / "sorted_latest.csv")