In [1]:
print('hi')

hi


In [3]:
import os
import pandas as pd
from collections import defaultdict
from datetime import datetime
from dateutil.relativedelta import relativedelta


# --------- CONFIG ---------
BASE_PATH = "/Users/harshit/Downloads/Research-Commons-Quant/automated-memetoken-index-pipeline/dataframes"
INITIAL_PORTFOLIO_VALUE = 1000


# --------- LOAD ALL MONTHLY TOKENS AND DATA ---------
def load_monthly_tokens_and_data(base_path):
    monthly_tokens = {}
    ohlcv = {}

    month_folders = sorted(os.listdir(base_path))
    for folder in month_folders:
        folder_path = os.path.join(base_path, folder)
        if not os.path.isdir(folder_path):
            continue

        tokens = []
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                token = file.replace(".csv", "")
                filepath = os.path.join(folder_path, file)

                df = pd.read_csv(filepath)

                # --- FIX FOR YOUR ERROR ---
                # Try to standardize date column
                if "date" not in df.columns:
                    possible_date_cols = [col for col in df.columns if col.lower() in ["date", "timestamp", "datetime"]]
                    if possible_date_cols:
                        df["date"] = df[possible_date_cols[0]]
                    else:
                        print(f"❌ Skipping {filepath}: no date column found")
                        continue

                df["date"] = pd.to_datetime(df["date"], errors="coerce")
                df = df.dropna(subset=["date"])  # drop rows where date couldn't be parsed
                df.sort_values("date", inplace=True)

                ohlcv[token] = df
                tokens.append(token)

        # Convert folder name like "april23" to "2023-04"
        try:
            month_str = datetime.strptime(folder, "%B%y").strftime("%Y-%m")
        except ValueError:
            print(f"⚠️ Skipping folder '{folder}' — invalid name format for datetime parsing")
            continue

        monthly_tokens[month_str] = tokens

    return monthly_tokens, ohlcv




# --------- BACKTEST FUNCTION ---------
def monthly_backtest(monthly_tokens, ohlcv, initial_portfolio_value=100):
    results = []
    portfolio_value = initial_portfolio_value
    sorted_months = sorted(monthly_tokens.keys())  # ensure chronological order

    for i in range(len(sorted_months) - 1):  # exclude last, since we need next month to test
        month = sorted_months[i]
        next_month = sorted_months[i + 1]
        tokens = monthly_tokens[month]

        print(f"\n📅 Rebalancing for {month} → Simulate holding in {next_month}")

        monthly_returns = []
        for token in tokens:
            if token not in ohlcv:
                print(f"⚠️ Missing OHLCV for {token}")
                continue

            df = ohlcv[token]
            start_date = pd.to_datetime(f"{next_month}-01")
            end_date = (start_date + relativedelta(months=1)) - pd.Timedelta(days=1)

            df_month = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
            if df_month.empty:
                print(f"⚠️ No data for {token} in {next_month}")
                continue

            start_price = df_month.iloc[0]["close"]
            end_price = df_month.iloc[-1]["close"]
            token_return = (end_price / start_price) - 1
            monthly_returns.append(token_return)

        if monthly_returns:
            avg_return = sum(monthly_returns) / len(monthly_returns)
            portfolio_value *= (1 + avg_return)
            results.append({
                "rebalance_month": month,
                "test_month": next_month,
                "monthly_return": avg_return,
                "portfolio_value": portfolio_value
            })
        else:
            print(f"⚠️ Skipping {next_month}, no valid tokens.")
    
    return pd.DataFrame(results)


# --------- RUN BACKTEST ---------
if __name__ == "__main__":
    monthly_tokens, ohlcv = load_monthly_tokens_and_data(BASE_PATH)
    df_results = monthly_backtest(monthly_tokens, ohlcv, INITIAL_PORTFOLIO_VALUE)

    print("\n📊 Final Portfolio Value:", df_results["portfolio_value"].iloc[-1])
    print(df_results)

    # Optionally save results
    # df_results.to_csv("backtest_results.csv", index=False)


⚠️ Skipping folder 'apr23' — invalid name format for datetime parsing
⚠️ Skipping folder 'apr24' — invalid name format for datetime parsing
⚠️ Skipping folder 'aug23' — invalid name format for datetime parsing
⚠️ Skipping folder 'dec23' — invalid name format for datetime parsing
⚠️ Skipping folder 'feb24' — invalid name format for datetime parsing
⚠️ Skipping folder 'jan24' — invalid name format for datetime parsing
⚠️ Skipping folder 'jul23' — invalid name format for datetime parsing
⚠️ Skipping folder 'jun23' — invalid name format for datetime parsing
⚠️ Skipping folder 'mar24' — invalid name format for datetime parsing
⚠️ Skipping folder 'nov23' — invalid name format for datetime parsing
⚠️ Skipping folder 'oct23' — invalid name format for datetime parsing
⚠️ Skipping folder 'sep23' — invalid name format for datetime parsing

📅 Rebalancing for 2023-05 → Simulate holding in 2024-05


TypeError: Invalid comparison between dtype=datetime64[ns, UTC] and Timestamp