In [21]:
import pandas as pd
import os
import glob

monthly_token_data = {}

BASE_PATH = "/Users/harshit/Downloads/Research-Commons-Quant/automated-memetoken-index-pipeline/dataframes"
MONTHS = ["may23", "jun23", "jul23", "aug23", "sep23", "oct23", "nov23", "dec23", "jan24", "feb24"]  

def parse_timestamp(ts):
    try:
        return pd.to_datetime(ts, utc=True)
    except Exception:
        try:
            return pd.to_datetime(ts, unit='s', utc=True)
        except Exception:
            return pd.NaT

for month in MONTHS:
    files = glob.glob(f"{BASE_PATH}/{month}/*.csv")
    month_prices = {}
    month_volumes = {}

    for file in files:
        token = os.path.basename(file).replace(".csv", "")
        try:
            df = pd.read_csv(file)
            df.columns = df.columns.str.lower()

            if 'timestamp' not in df.columns or 'close' not in df.columns or 'volume' not in df.columns:
                print(f"⚠️ Skipping {token} — required columns missing")
                continue

            df['timestamp'] = df['timestamp'].apply(parse_timestamp)
            df = df.dropna(subset=['timestamp'])  # drop rows where timestamp couldn't be parsed
            df = df.sort_values(by='timestamp').reset_index(drop=True)

            df['days_since_launch'] = (df['timestamp'] - df['timestamp'].iloc[0]).dt.days
            df = df.drop_duplicates(subset='days_since_launch')  # In case timestamps are repeated

            month_prices[token] = df[['days_since_launch', 'close']].set_index('days_since_launch').rename(columns={'close': token})
            month_volumes[token] = df[['days_since_launch', 'volume']].set_index('days_since_launch').rename(columns={'volume': token})

        except Exception as e:
            print(f"⚠️ Error loading {token}: {e}")

    if month_prices and month_volumes:
        try:
            aligned_prices = pd.concat(month_prices.values(), axis=1, join='inner')
            aligned_volumes = pd.concat(month_volumes.values(), axis=1, join='inner')

            monthly_token_data[month] = {
                "prices": aligned_prices,
                "volumes": aligned_volumes
            }

            print(f"✅ Loaded {month}: {aligned_prices.shape[1]} tokens, {aligned_prices.shape[0]} days")

        except Exception as e:
            print(f"❌ Failed to align {month}: {e}")
    else:
        print(f"❌ Skipped {month}: No valid token data found")


✅ Loaded may23: 5 tokens, 217 days
✅ Loaded jun23: 5 tokens, 17 days
✅ Loaded jul23: 5 tokens, 373 days
✅ Loaded aug23: 5 tokens, 22 days
✅ Loaded sep23: 5 tokens, 2 days
✅ Loaded oct23: 3 tokens, 362 days
✅ Loaded nov23: 4 tokens, 381 days
✅ Loaded dec23: 5 tokens, 256 days
✅ Loaded jan24: 5 tokens, 67 days
✅ Loaded feb24: 5 tokens, 36 days


In [20]:
import pandas as pd

df = pd.read_csv("/Users/harshit/Downloads/Research-Commons-Quant/automated-memetoken-index-pipeline/dataframes/jan24/ONDO.csv") 
df.columns = df.columns.str.lower()

# Check rows, columns, and first/last timestamps
print(f"Rows: {len(df)}")
print(f"Columns: {df.columns.tolist()}")

# Convert timestamp
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce', utc=True)

print("First date:", df['timestamp'].min())
print("Last date:", df['timestamp'].max())

# Show the number of unique days
df['date'] = df['timestamp'].dt.date
print("Unique days of data:", df['date'].nunique())


Rows: 399
Columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'trades', 'return', 'log_return', 'cumulative_return', 'sharpe_ratio', 'cum_max', 'drawdown', 'turnover']
First date: 2024-05-01 00:00:00+00:00
Last date: 2025-06-03 00:00:00+00:00
Unique days of data: 399


In [12]:
import pandas as pd

df = pd.read_csv("/Users/harshit/Downloads/Research-Commons-Quant/automated-memetoken-index-pipeline/dataframes/aug23/WAI.csv") 
df.columns = df.columns.str.lower()

# Check rows, columns, and first/last timestamps
print(f"Rows: {len(df)}")
print(f"Columns: {df.columns.tolist()}")

# Convert timestamp
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce', utc=True)

print("First date:", df['timestamp'].min())
print("Last date:", df['timestamp'].max())

# Show the number of unique days
df['date'] = df['timestamp'].dt.date
print("Unique days of data:", df['date'].nunique())


Rows: 30
Columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'trades', 'return', 'log_return', 'cumulative_return', 'sharpe_ratio', 'cum_max', 'drawdown', 'turnover']
First date: 2024-08-05 00:00:00+00:00
Last date: 2024-09-03 00:00:00+00:00
Unique days of data: 30


In [13]:
df = pd.read_csv('/Users/harshit/Downloads/Research-Commons-Quant/automated-memetoken-index-pipeline/dataframes/aug23/WAI.csv')

In [14]:
df.head(40)

Unnamed: 0,timestamp,open,high,low,close,volume,trades,return,log_return,cumulative_return,sharpe_ratio,cum_max,drawdown,turnover
0,2024-09-03 00:00:00+00:00,0.000167,0.000195,0.000167,0.000186,55772.87,189,0.041282,0.040452,0.041282,3.882413,0.000186,0.0,300491200.0
1,2024-09-02 00:00:00+00:00,0.000171,0.000178,0.000165,0.000168,28165.3,100,-0.096724,-0.101728,-0.059436,3.882413,0.000186,-0.096724,167997500.0
2,2024-09-01 00:00:00+00:00,0.000179,0.000183,0.00017,0.00017,15302.49,108,0.016415,0.016281,-0.043997,3.882413,0.000186,-0.081898,89800690.0
3,2024-08-31 00:00:00+00:00,0.000183,0.000185,0.00017,0.000179,26405.35,127,0.050351,0.049125,0.004139,3.882413,0.000186,-0.03567,147528200.0
4,2024-08-30 00:00:00+00:00,0.000176,0.000201,0.000176,0.000183,89478.05,214,0.024014,0.02373,0.028253,3.882413,0.000186,-0.012513,488195400.0
5,2024-08-29 00:00:00+00:00,0.00019,0.000194,0.000153,0.000176,97195.08,283,-0.038959,-0.039739,-0.011807,3.882413,0.000186,-0.050984,551797500.0
6,2024-08-28 00:00:00+00:00,0.000215,0.000232,0.00019,0.00019,99128.21,254,0.078241,0.075331,0.06551,3.882413,0.00019,0.0,521935400.0
7,2024-08-27 00:00:00+00:00,0.000197,0.000278,0.000193,0.000216,278975.3,579,0.139198,0.130325,0.213827,3.882413,0.000216,0.0,1289395000.0
8,2024-08-26 00:00:00+00:00,0.000172,0.000227,0.000161,0.000197,148813.2,410,-0.090065,-0.094382,0.104503,3.882413,0.000216,-0.090065,755877400.0
9,2024-08-25 00:00:00+00:00,0.000143,0.000184,0.000124,0.000172,164657.3,447,-0.127792,-0.136727,-0.036643,3.882413,0.000216,-0.206347,958894500.0
