In [25]:
import pandas as pd
df = pd.read_csv("raw_data/ustc_historic_data.csv", sep=";")
df.head()
df.columns

Index(['timeOpen', 'timeClose', 'timeHigh', 'timeLow', 'name', 'open', 'high',
       'low', 'close', 'volume', 'marketCap', 'circulatingSupply',
       'timestamp'],
      dtype='object')

In [26]:
#clean datetime predictors
time_cols = ["timeOpen", "timeClose", "timeHigh", "timeLow", "timestamp"]
df[time_cols] = df[time_cols].apply(pd.to_datetime)

#standardise date reference 
df["date"] = df["timestamp"].dt.normalize()

df = df.drop(columns=time_cols)


In [27]:
#setting up predictors
import numpy as np

# sort and index date
df = df.sort_values('date').set_index('date')
df = df.asfreq('D', method='ffill') 

# 1. PRICE DEVIATION FROM $1
df['price_dev'] = df['close'] - 1
df['abs_price_dev'] = abs(df['price_dev'])
df['rel_price_dev'] = df['price_dev'] / 1

In [28]:
# 2. PRICE VOLATILITY / DAILY SWINGS
df['intraday_range_pct'] = (df['high'] - df['low']) / df['close']  # daily swing
df['volatility_7d'] = df['close'].pct_change().rolling(7).std()     # rolling 7-day volatility

In [29]:
# 3. VOLUME / LIQUIDITY STRESS
df['volume_zscore'] = (df['volume'] - df['volume'].rolling(7).mean()) / df['volume'].rolling(20).std()
df['volume_acceleration'] = df['volume_zscore'].diff().rolling(3).mean()
df['turnover'] = df['volume'] / df['circulatingSupply']
df['range_expansion'] = df['intraday_range_pct'] / df['intraday_range_pct'].rolling(7).mean()
# Optional: df['volume_chg'] = df['volume'].pct_change()
# Optional: df['volume_ma7'] = df['volume'].rolling(7).mean()
# Optional: df['price_range_proxy'] = df['intraday_range_pct']

In [30]:
# 4. SUPPLY & MARKET CAP CHANGES
df['supply_chg'] = df['circulatingSupply'].pct_change()
df['mcap_chg'] = df['marketCap'].pct_change()
df['mcap_velocity'] = df['marketCap'].pct_change(3)
df['supply_panic'] = (df['circulatingSupply'].pct_change().abs() > 0.05)
df['volume_mcap_ratio'] = df['volume'] / df['marketCap']

In [31]:
#5. EARLY WARNING / ACCELERATION SIGNALS
df['dev_acceleration'] = df['abs_price_dev'].diff().rolling(3).mean()
df['peg_stable_days'] = (df['close'] > 0.999).astype(int).groupby(
    (df['close'] <= 0.999).astype(int).cumsum()
).cumsum()
df['peg_breakdown'] = df['peg_stable_days'].diff().fillna(0)
df['price_shock'] = (abs(df['close'].pct_change()) > 0.02).astype(int)
df['volume_explosion'] = (df['volume'] > df['volume'].rolling(30).quantile(0.95)).astype(int)


In [21]:
df.head()



Unnamed: 0_level_0,name,open,high,low,close,volume,marketCap,circulatingSupply,price_dev,abs_price_dev,...,intraday_range_pct,volatility_7d,volume_chg,volume_ma7,volume_zscore,supply_chg,mcap_chg,volume_mcap_ratio,price_range_proxy,turnover
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-28 00:00:00+00:00,2781,1.00056,1.003111,0.999108,1.002239,139519000.0,10056420000.0,10033960000.0,0.002239,0.002239,...,0.003994,,,,,,,0.013874,0.003994,0.013905
2021-12-29 00:00:00+00:00,2781,1.002244,1.003347,0.99927,1.001423,157592600.0,10079440000.0,10065110000.0,0.001423,0.001423,...,0.004072,,0.129542,,,0.003105,0.002289,0.015635,0.004072,0.015657
2021-12-30 00:00:00+00:00,2781,1.001019,1.00295,0.99973,1.001984,110621900.0,10117710000.0,10097680000.0,0.001984,0.001984,...,0.003213,,-0.298051,,,0.003235,0.003797,0.010933,0.003213,0.010955
2021-12-31 00:00:00+00:00,2781,1.001748,1.002327,0.998924,1.00144,135703600.0,10137120000.0,10122540000.0,0.00144,0.00144,...,0.003398,,0.226733,,,0.002463,0.001918,0.013387,0.003398,0.013406
2022-01-01 00:00:00+00:00,2781,1.001657,1.002204,0.998407,1.000757,126635800.0,10133540000.0,10125870000.0,0.000757,0.000757,...,0.003795,,-0.066821,,,0.000329,-0.000353,0.012497,0.003795,0.012506
