In [None]:
import nb_setup 
import importlib
importlib.reload(nb_setup)
nb_setup.init()

In [None]:
import pandas as pd
from settings import DATA_DIR

df = pd.read_csv(DATA_DIR / "all_ranked.csv", index_col=[0])
df.head()

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 164217 entries, BTC to LDO
Data columns (total 6 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   start_time                           164217 non-null  datetime64[ns]
 1   average_rank                         164160 non-null  float64       
 2   rank_rolling_accumulated_pct_change  164160 non-null  float64       
 3   rank_rolling_variance_pct_change     164103 non-null  float64       
 4   open                                 164217 non-null  float64       
 5   close                                164217 non-null  float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 8.8+ MB


In [16]:
import numpy as np
import pandas as pd


def _initialize(df_final):
    # Initial principal amount
    principal = 100

    # Assuming df_final is your prepared DataFrame
    df_final["start_time"] = pd.to_datetime(df_final["start_time"])
    df_final = df_final.sort_values(by=["start_time", "average_rank"])

    # Find unique timestamps
    timestamps = df_final["start_time"].unique()

    return df_final, principal, timestamps


def _get_intervals(
    df_final,
    current_time,
    i,
    trading_interval,
    timestamps,
    verbose,
):
    """
    Inputs:
    df_final: DataFrame
    current_time: Timestamp
    i: int  # Index of the current loop
    trading_interval: DataFrame
    timestamps: list of start_times
    verbose: bool
    """
    if i + trading_interval < len(timestamps):
        close_time = timestamps[i + trading_interval]
    else:
        close_time = timestamps[-1]

    if verbose:
        print("Current time:", current_time)
        print("Close time:", close_time)

    # Filter the DataFrame for the current timestamp
    last_interval = df_final[
        (df_final["start_time"] >= current_time - pd.Timedelta(minutes=15))
        & (df_final["start_time"] < close_time - pd.Timedelta(minutes=15))
    ]

    current_interval = df_final[
        (df_final["start_time"] >= current_time) & (df_final["start_time"] < close_time)
    ]

    if verbose:
        print(
            current_interval.loc[:, ["start_time", "open", "close"]].head(),
        )

    return last_interval, current_interval, close_time


def calculate_returns(
    df_final,
    trading_interval=1,
    verbose=False,
    loop=None,
    slippage_and_cost=0.003,
    transaction_cost=0.00025,
):
    # Initialize
    df_final, principal, timestamps = _initialize(df_final)
    results = []

    # Adjust loop to step through timestamps according to trading_interval
    current = 0
    for i in range(0, len(timestamps) - trading_interval, trading_interval):
        current_time = timestamps[i]
        print("---------------------------------------------")
        print("Current time:", current_time)
        print("---------------------------------------------")

        # Ensure we do not go out of bounds for the last interval
        last_interval, current_interval, close_time = _get_intervals(
            df_final,
            current_time,
            i,
            trading_interval,
            timestamps,
            verbose,
        )
        if current_interval.empty or last_interval.empty:
            continue

        # Calculate investment per coin
        investment_per_coin = principal / 20  # Divided among top 10 and bottom 10

        # For the top 10 and bottom 10, we need the open price at current_time and close price at close_time
        top_10 = last_interval.head(10).copy()
        bottom_10 = last_interval.tail(10).copy()

        if (
            np.isnan(top_10["average_rank"]).all()
            or np.isnan(bottom_10["average_rank"]).all()
            or i == 1
        ):
            interval_return = 0
            principal += interval_return
        else:
            # top 10 ranked coins prices
            top_10_open_prices = current_interval.loc[top_10.index, "open"].values
            bottom_10_open_prices = current_interval.loc[bottom_10.index, "open"].values

            # Get closing prices at close_time
            future_interval = df_final[df_final["start_time"] == close_time]
            if future_interval.empty:
                continue
            top_10_close_prices = future_interval.loc[top_10.index, "close"].values
            bottom_10_close_prices = future_interval.loc[
                bottom_10.index, "close"
            ].values

            # Long the top 10

            long_asset = (
                (
                    -top_10_open_prices * (1 + slippage_and_cost)
                    - top_10_open_prices * (1 + slippage_and_cost) * (transaction_cost)
                )
                / (top_10_open_prices * (1 + slippage_and_cost))
            ) * (investment_per_coin)

            sell_longed_asset = (
                (
                    +top_10_close_prices * (1 - slippage_and_cost)
                    - top_10_close_prices * (1 - slippage_and_cost) * (transaction_cost)
                )
                / (top_10_open_prices * (1 + slippage_and_cost))
            ) * (investment_per_coin)
            long_returns = long_asset + sell_longed_asset

            # Short the bottom 10
            short_asset = (
                (
                    bottom_10_open_prices
                    * (1 - slippage_and_cost)
                    # - bottom_10_open_prices
                    # * (1 - slippage_and_cost)
                    # * (transaction_cost)
                )
                / (bottom_10_open_prices * (1 - slippage_and_cost))
            ) * (investment_per_coin)

            buy_shorted_asset = (
                (
                    -bottom_10_close_prices
                    * (1 + slippage_and_cost)
                    # - bottom_10_close_prices
                    # * (1 + slippage_and_cost)
                    # * (transaction_cost)
                )
                / (bottom_10_open_prices * (1 - slippage_and_cost))
            ) * (investment_per_coin)
            short_returns = short_asset + buy_shorted_asset

            # Calculate returns
            interval_return = long_returns.sum() + short_returns.sum()
            principal += interval_return

            if verbose:
                print("=========Verbose Section===========")
                print(
                    "top_10_open_prices", top_10_open_prices * (1 + slippage_and_cost)
                )
                print(
                    "top_10_close_prices", top_10_close_prices * (1 - slippage_and_cost)
                )
                print(
                    "bottom_10_open_prices",
                    bottom_10_open_prices * (1 - slippage_and_cost),
                )
                print(
                    "bottom_10_close_prices",
                    bottom_10_close_prices * (1 + slippage_and_cost),
                )
                print("long_asset", long_asset)
                print("sell_longed_asset", sell_longed_asset)
                print("long_returns", long_returns)
                print("short_asset", short_asset)
                print("buy_shorted_asset", buy_shorted_asset)
                print("short_returns", short_returns)

        results.append(
            {
                "timestamp": current_time,
                "return": interval_return,
                "principal": principal,
            }
        )

        print("---------------------------------------------")
        print(
            f"For interval {i//trading_interval} starting at {current_time}: Long {top_10.index.values}, Short {bottom_10.index.values}"
        )
        print(
            f"For interval {i//trading_interval} starting at {current_time}: Long {top_10['average_rank'].values}, Short {bottom_10['average_rank'].values}"
        )
        print(
            f"In this interval, the return is {interval_return}, new principal is {principal}"
        )

        current += 1
        if loop is not None and current == loop:
            break

    # Summarize results
    results_df = pd.DataFrame(results)
    total_performance = results_df["return"].sum()
    print("Total simulated return over the period:", total_performance)
    return results_df


# Example usage, assuming df is your DataFrame
results_df = calculate_returns(
    df,
    trading_interval=1,
    loop=3,
    verbose=True,
    slippage_and_cost=0,
    transaction_cost=0,
)  # Adjust trading_interval as needed

---------------------------------------------
Current time: 2024-02-10 00:00:00
---------------------------------------------
Current time: 2024-02-10 00:00:00
Close time: 2024-02-10 00:15:00
       start_time        open        close
symbol                                    
BTC    2024-02-10  47166.0000  47244.00000
ETH    2024-02-10   2488.5000   2493.70000
SOL    2024-02-10    107.0300    107.63000
WLD    2024-02-10      2.4923      2.49230
WIF    2024-02-10      0.2523      0.26613
---------------------------------------------
Current time: 2024-02-10 00:15:00
---------------------------------------------
Current time: 2024-02-10 00:15:00
Close time: 2024-02-10 00:30:00
                start_time     open     close
symbol                                       
WIF    2024-02-10 00:15:00   0.2671   0.27167
MINA   2024-02-10 00:15:00   1.2795   1.29860
MAVIA  2024-02-10 00:15:00   5.7678   5.85860
AVAX   2024-02-10 00:15:00  38.1990  38.53800
ENS    2024-02-10 00:15:00  21.3120  21

In [None]:
import matplotlib.pyplot as plt 
results_df.head()

# plt.figure(figsize=(14, 7))
plt.plot(results_df["timestamp"], results_df["principal"])

In [None]:
plt.plot(results_df["timestamp"], results_df["return"])


In [None]:
results_df["return"].describe()