In [17]:
import pandas as pd
import matplotlib.pyplot as plt

## BTC

In [18]:
btc = pd.read_csv("BTC/btc_final.csv", parse_dates=["timestamp"])

### Moving Average (MA)

In [19]:
btc["ma_6"] = btc["close_price"].rolling(window=6).mean().round(4)
btc["ma_24"] = btc["close_price"].rolling(window=24).mean().round(4)
btc["ma_crossover_signal"] = btc.apply(
    lambda row: (
        1 if row["ma_6"] > row["ma_24"]
        else 0
    ) if pd.notna(row["ma_6"]) and pd.notna(row["ma_24"]) else None,
    axis=1
)    # 1: short MA is above long MA (buy signal), 0: short MA is below long MA (no buy signal)
btc["ma_crossover_signal"] = btc["ma_crossover_signal"].astype("Int64")
btc["ma_crossover_description"] = btc["ma_crossover_signal"].apply(
    lambda x: (
        "short MA (6h) is above long MA (24h) → buy signal"
        if x == 1 else
        "short MA (6h) is below long MA (24h) → no buy signal"
        if x == 0 else 
        None
    ) if pd.notna(x) else None
)
btc.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_btc,avg_fee_btc,total_size_used_mb,unique_active_wallets,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description
0,2024-05-01 00:00:00,60208.817,8511.2535,514130300.0,27131,41488.3624,6.4e-05,11.7035,50034,1192806000000.0,,,,
1,2024-05-01 01:00:00,60095.5002,8853.2447,532178000.0,25263,29720.9307,6.3e-05,11.459,39480,1185496000000.0,,,,
2,2024-05-01 02:00:00,59886.5202,8690.0466,519349100.0,21228,29104.3936,6e-05,7.82,37710,1183924000000.0,,,,
3,2024-05-01 03:00:00,60206.0485,5206.5224,312751300.0,19018,62407.2166,7e-05,8.3795,42124,1179127000000.0,,,,
4,2024-05-01 04:00:00,60241.4675,4977.9718,299829900.0,26684,44802.0762,5.7e-05,11.0749,36411,1185334000000.0,,,,
5,2024-05-01 05:00:00,60005.2504,5928.1266,356062900.0,11898,26359.225,9.2e-05,4.5227,27976,1185417000000.0,60107.2673,,,
6,2024-05-01 06:00:00,59501.2836,7846.8091,468895300.0,17902,27977.1218,8.2e-05,7.7468,42564,1180281000000.0,59989.3451,,,
7,2024-05-01 07:00:00,57466.2288,48781.7905,2831902000.0,18476,80240.6812,8.2e-05,7.7279,46975,1174252000000.0,59551.1332,,,
8,2024-05-01 08:00:00,57103.0541,34919.7273,1990814000.0,36958,77431.8478,6.2e-05,14.1263,59083,1129262000000.0,59087.2222,,,
9,2024-05-01 09:00:00,57096.7474,14509.3452,830822400.0,36684,38395.1546,5.4e-05,13.1422,48869,1119474000000.0,58569.0053,,,


### MACD (Moving Average Convergence Divergence)

`MACD`: a momentum indicator that shows the relationship between two Exponential Moving Averages (EMA)

`EMA`: gives more weight to recent prices, making it more responsive than a simple moving average (MA)

MACD consists of 3 main components:
1. MACD Line = EMA(12) - EMA(26): measures trend momentum.
2. Signal Line = EMA(9) of MACD: smooths the MACD line.
3. MACD Histogram = MACD Line - Signal Line: shows the strength of momentum change.

- A crossover (MACD > Signal) is commonly interpreted as a buy signal.
- A crossover (MACD < Signal) is commonly interpreted as a sell signal.

- 当 MACD线 > Signal线：表示上涨动量增强，通常是买入点
- 当 MACD线 < Signal线：表示下跌动量增强，通常是卖出点
- 两者的差值越大，说明动量越强


In [20]:
# 1. Compute short-term EMA (12h)
btc["ema_12"] = btc['close_price'].ewm(span=12, adjust=False).mean().round(4)

# 2. Compute long-term EMA (26h)
btc["ema_26"] = btc['close_price'].ewm(span=26, adjust=False).mean().round(4)

# 3. MACD line = ema_12 - ema_26
btc["macd"] = (btc["ema_12"] - btc["ema_26"]).round(4)

# 4. Signal line = EMA(9) of MACD3
    # 对 MACD 再做一次 EMA（指数移动平均），它是一个“更平滑的 MACD 曲线”，用于生成买卖信号。
btc["macd_signal"] = btc["macd"].ewm(span=9, adjust=False).mean().round(4)

# 5. Compute MACD histogram (MACD - Signal)
btc["macd_hist"] = (btc["macd"] - btc["macd_signal"]).round(4)

# 6. Generate a crossover signal (1 -> MACD above signal -> buy momentum)
btc["macd_crossover_signal"] = (btc["macd"] > btc["macd_signal"]).astype(int)

# 7. Convert to MACD crossover description
btc["macd_crossover_description"] = btc["macd_crossover_signal"].apply(
    lambda x: (
        "MACD (12h vs 26h EMA) is above signal line → bullish momentum"
        if x == 1 else
        "MACD (12h vs 26h EMA) is below signal line → no bullish signal"
        if x == 0 else None
    )
)

In [21]:
btc.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_btc,avg_fee_btc,total_size_used_mb,unique_active_wallets,market_cap,...,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description
0,2024-05-01 00:00:00,60208.817,8511.2535,514130300.0,27131,41488.3624,6.4e-05,11.7035,50034,1192806000000.0,...,,,,60208.817,60208.817,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...
1,2024-05-01 01:00:00,60095.5002,8853.2447,532178000.0,25263,29720.9307,6.3e-05,11.459,39480,1185496000000.0,...,,,,60191.3836,60200.4232,-9.0396,-1.8079,-7.2317,0,MACD (12h vs 26h EMA) is below signal line → n...
2,2024-05-01 02:00:00,59886.5202,8690.0466,519349100.0,21228,29104.3936,6e-05,7.82,37710,1183924000000.0,...,,,,60144.4816,60177.1711,-32.6895,-7.9842,-24.7053,0,MACD (12h vs 26h EMA) is below signal line → n...
3,2024-05-01 03:00:00,60206.0485,5206.5224,312751300.0,19018,62407.2166,7e-05,8.3795,42124,1179127000000.0,...,,,,60153.9534,60179.3102,-25.3568,-11.4587,-13.8981,0,MACD (12h vs 26h EMA) is below signal line → n...
4,2024-05-01 04:00:00,60241.4675,4977.9718,299829900.0,26684,44802.0762,5.7e-05,11.0749,36411,1185334000000.0,...,,,,60167.4171,60183.9144,-16.4973,-12.4665,-4.0308,0,MACD (12h vs 26h EMA) is below signal line → n...


### Transaction Count Ratio

Compares the current hour's transaction count to the average of the past 24 hours.

It helps assess whether on-chain activity is surging, normal, or low.

In [22]:
# 1. Calculate 24h rolling average of transaction count
btc["txn_count_mean_24h"] = btc["transaction_count"].rolling(window=24).mean()

# 2. Compute the ratio: current transaction count / 24h rolling average
btc["txn_ratio"] = (btc["transaction_count"] / btc["txn_count_mean_24h"]).round(4)
btc.head(30)


Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_btc,avg_fee_btc,total_size_used_mb,unique_active_wallets,market_cap,...,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description,txn_count_mean_24h,txn_ratio
0,2024-05-01 00:00:00,60208.817,8511.2535,514130300.0,27131,41488.3624,6.4e-05,11.7035,50034,1192806000000.0,...,,60208.817,60208.817,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...,,
1,2024-05-01 01:00:00,60095.5002,8853.2447,532178000.0,25263,29720.9307,6.3e-05,11.459,39480,1185496000000.0,...,,60191.3836,60200.4232,-9.0396,-1.8079,-7.2317,0,MACD (12h vs 26h EMA) is below signal line → n...,,
2,2024-05-01 02:00:00,59886.5202,8690.0466,519349100.0,21228,29104.3936,6e-05,7.82,37710,1183924000000.0,...,,60144.4816,60177.1711,-32.6895,-7.9842,-24.7053,0,MACD (12h vs 26h EMA) is below signal line → n...,,
3,2024-05-01 03:00:00,60206.0485,5206.5224,312751300.0,19018,62407.2166,7e-05,8.3795,42124,1179127000000.0,...,,60153.9534,60179.3102,-25.3568,-11.4587,-13.8981,0,MACD (12h vs 26h EMA) is below signal line → n...,,
4,2024-05-01 04:00:00,60241.4675,4977.9718,299829900.0,26684,44802.0762,5.7e-05,11.0749,36411,1185334000000.0,...,,60167.4171,60183.9144,-16.4973,-12.4665,-4.0308,0,MACD (12h vs 26h EMA) is below signal line → n...,,
5,2024-05-01 05:00:00,60005.2504,5928.1266,356062900.0,11898,26359.225,9.2e-05,4.5227,27976,1185417000000.0,...,,60142.4684,60170.68,-28.2116,-15.6155,-12.5961,0,MACD (12h vs 26h EMA) is below signal line → n...,,
6,2024-05-01 06:00:00,59501.2836,7846.8091,468895300.0,17902,27977.1218,8.2e-05,7.7468,42564,1180281000000.0,...,,60043.8246,60121.0951,-77.2705,-27.9465,-49.324,0,MACD (12h vs 26h EMA) is below signal line → n...,,
7,2024-05-01 07:00:00,57466.2288,48781.7905,2831902000.0,18476,80240.6812,8.2e-05,7.7279,46975,1174252000000.0,...,,59647.2714,59924.4384,-277.167,-77.7906,-199.3764,0,MACD (12h vs 26h EMA) is below signal line → n...,,
8,2024-05-01 08:00:00,57103.0541,34919.7273,1990814000.0,36958,77431.8478,6.2e-05,14.1263,59083,1129262000000.0,...,,59255.8533,59715.4469,-459.5936,-154.1512,-305.4424,0,MACD (12h vs 26h EMA) is below signal line → n...,,
9,2024-05-01 09:00:00,57096.7474,14509.3452,830822400.0,36684,38395.1546,5.4e-05,13.1422,48869,1119474000000.0,...,,58923.6832,59521.4692,-597.786,-242.8782,-354.9078,0,MACD (12h vs 26h EMA) is below signal line → n...,,


In [23]:
btc["txn_ratio_description"] = btc["txn_ratio"].apply(
    lambda x: (
        "activity surge" if x > 1.2 else
        "low activity" if x < 0.8 else
        "normal activity"
    ) if pd.notna(x) else None
)

btc.drop(columns="txn_count_mean_24h", inplace=True)

In [24]:
btc["txn_ratio_description"].value_counts()

txn_ratio_description
low activity       3029
normal activity    2592
activity surge     2396
Name: count, dtype: int64

### Bollinger Bands

Bollinger Bands consist of three lines:
- Middle Band: Simple Moving Average (usually 20-period)
- Upper Band: Middle + 2 × standard deviation
- Lower Band: Middle - 2 × standard deviation

These bands help identify whether the price is relatively high or low
compared to recent history, and are useful for detecting breakout or sideways markets.

In [25]:
# 1. Calculate 20-period moving average (middle band)
btc["bb_mid"] = btc["close_price"].rolling(window=20).mean()

# 2. Calculate 20-period rolling standard deviation
btc["bb_std"] = btc["close_price"].rolling(window=20).std()

# 3. Compute upper and lower bands
btc["bb_upper"] = (btc["bb_mid"] + 2 * btc["bb_std"]).round(4)
btc["bb_lower"] = (btc["bb_mid"] - 2 * btc["bb_std"]).round(4)

# 4. Bollinger band width
btc["bb_position"] = btc.apply(
    lambda row: (
        "above upper band (possible breakout)" if row["close_price"] > row["bb_upper"]
        else "below lower band (possible breakdown)" if row["close_price"] < row["bb_lower"]
        else "within band (normal range)"
    ) if pd.notna(row["bb_upper"]) and pd.notna(row["bb_lower"]) else None, 
    axis=1
)

This field classifies the current close price relative to the Bollinger Bands.

- If the price is above the upper band → it's called a **breakout**.
    - Meaning: price has moved beyond normal volatility range on the upside.
    - Traders often interpret this as a strong bullish momentum or start of an upward trend.

- If the price is below the lower band → it's called a **rebound**.
    - Meaning: price may have dropped too sharply and is likely to bounce back.
    - Traders interpret this as a potential oversold signal or reversal opportunity.

- If the price is between upper and lower bands → it's in the **normal range** (sideways).
    - Meaning: market is moving within expected volatility bounds.

In [26]:
btc.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_btc,avg_fee_btc,total_size_used_mb,unique_active_wallets,market_cap,...,macd_hist,macd_crossover_signal,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position
0,2024-05-01 00:00:00,60208.817,8511.2535,514130300.0,27131,41488.3624,6.4e-05,11.7035,50034,1192806000000.0,...,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
1,2024-05-01 01:00:00,60095.5002,8853.2447,532178000.0,25263,29720.9307,6.3e-05,11.459,39480,1185496000000.0,...,-7.2317,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
2,2024-05-01 02:00:00,59886.5202,8690.0466,519349100.0,21228,29104.3936,6e-05,7.82,37710,1183924000000.0,...,-24.7053,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
3,2024-05-01 03:00:00,60206.0485,5206.5224,312751300.0,19018,62407.2166,7e-05,8.3795,42124,1179127000000.0,...,-13.8981,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
4,2024-05-01 04:00:00,60241.4675,4977.9718,299829900.0,26684,44802.0762,5.7e-05,11.0749,36411,1185334000000.0,...,-4.0308,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
5,2024-05-01 05:00:00,60005.2504,5928.1266,356062900.0,11898,26359.225,9.2e-05,4.5227,27976,1185417000000.0,...,-12.5961,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
6,2024-05-01 06:00:00,59501.2836,7846.8091,468895300.0,17902,27977.1218,8.2e-05,7.7468,42564,1180281000000.0,...,-49.324,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
7,2024-05-01 07:00:00,57466.2288,48781.7905,2831902000.0,18476,80240.6812,8.2e-05,7.7279,46975,1174252000000.0,...,-199.3764,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
8,2024-05-01 08:00:00,57103.0541,34919.7273,1990814000.0,36958,77431.8478,6.2e-05,14.1263,59083,1129262000000.0,...,-305.4424,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
9,2024-05-01 09:00:00,57096.7474,14509.3452,830822400.0,36684,38395.1546,5.4e-05,13.1422,48869,1119474000000.0,...,-354.9078,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,


### Price Change Percentage

This indicator measures the relative change in closing price compared to the previous time step.

It's useful for identifying short-term price momentum (up/down/flat).

Formula:

   $\frac{\text{close}_t - \text{close}_{t-1}}{\text{close}_{t-1}}$

In [27]:
btc["price_change_pct"] = btc["close_price"].pct_change().round(6)
btc.head(10)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_btc,avg_fee_btc,total_size_used_mb,unique_active_wallets,market_cap,...,macd_crossover_signal,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position,price_change_pct
0,2024-05-01 00:00:00,60208.817,8511.2535,514130300.0,27131,41488.3624,6.4e-05,11.7035,50034,1192806000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,
1,2024-05-01 01:00:00,60095.5002,8853.2447,532178000.0,25263,29720.9307,6.3e-05,11.459,39480,1185496000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.001882
2,2024-05-01 02:00:00,59886.5202,8690.0466,519349100.0,21228,29104.3936,6e-05,7.82,37710,1183924000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.003477
3,2024-05-01 03:00:00,60206.0485,5206.5224,312751300.0,19018,62407.2166,7e-05,8.3795,42124,1179127000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,0.005336
4,2024-05-01 04:00:00,60241.4675,4977.9718,299829900.0,26684,44802.0762,5.7e-05,11.0749,36411,1185334000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,0.000588
5,2024-05-01 05:00:00,60005.2504,5928.1266,356062900.0,11898,26359.225,9.2e-05,4.5227,27976,1185417000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.003921
6,2024-05-01 06:00:00,59501.2836,7846.8091,468895300.0,17902,27977.1218,8.2e-05,7.7468,42564,1180281000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.008399
7,2024-05-01 07:00:00,57466.2288,48781.7905,2831902000.0,18476,80240.6812,8.2e-05,7.7279,46975,1174252000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.034202
8,2024-05-01 08:00:00,57103.0541,34919.7273,1990814000.0,36958,77431.8478,6.2e-05,14.1263,59083,1129262000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.00632
9,2024-05-01 09:00:00,57096.7474,14509.3452,830822400.0,36684,38395.1546,5.4e-05,13.1422,48869,1119474000000.0,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.00011


In [28]:
# Classify movement for LLM prompt
btc["price_movement"] = btc["price_change_pct"].apply(
    lambda x: (
        "significant increase" if x > 0.01 else
        "slight increase" if x > 0.002 else
        "no significant change" if x > - 0.002 else
        "slight decrease" if x > -0.01 else
        "significant decrease"
    ) if pd.notna(x) else None 
)

In [29]:
btc["price_movement"].value_counts()

price_movement
no significant change    3551
slight increase          2074
slight decrease          1891
significant decrease      273
significant increase      250
Name: count, dtype: int64

In [30]:
btc.to_csv("BTC/btc_full.csv", index=False)

## ETH

In [31]:
eth = pd.read_csv("ETH/eth_final.csv", parse_dates=["timestamp"])
eth

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_eth,unique_active_wallets,avg_gas_price_gwei,total_gas_used,market_cap
0,2024-05-01 00:00:00,2997.2957,87041.9491,2.613718e+08,50831,53248.2181,25482,8.2190,4542529445,3.687864e+11
1,2024-05-01 01:00:00,3000.8196,90117.9517,2.701605e+08,49831,75066.1618,26422,8.1389,4578797065,3.659825e+11
2,2024-05-01 02:00:00,2985.5093,92728.8428,2.763142e+08,48229,29038.2851,25233,9.2955,4518099410,3.665995e+11
3,2024-05-01 03:00:00,3001.9196,45859.1077,1.372769e+08,48785,27267.6164,27274,8.1441,4530654967,3.641239e+11
4,2024-05-01 04:00:00,3003.8646,41604.4323,1.250516e+08,47804,35669.8495,26537,7.1900,4541463899,3.655860e+11
...,...,...,...,...,...,...,...,...,...,...
8035,2025-03-31 19:00:00,1827.5233,170162.2498,3.123563e+08,62628,67909.5841,28120,2.1402,5401486521,2.218443e+11
8036,2025-03-31 20:00:00,1819.8772,106165.0559,1.941037e+08,62727,81357.4728,26322,3.3118,5495013163,2.203444e+11
8037,2025-03-31 21:00:00,1824.6029,72938.0947,1.331678e+08,60498,106047.8367,25494,2.9551,5359587938,2.197182e+11
8038,2025-03-31 22:00:00,1824.7112,72546.7354,1.321121e+08,59763,40238.3265,29035,1.8836,5432212218,2.201118e+11


### Moving Average (MA)

In [32]:
eth["ma_6"] = eth["close_price"].rolling(window=6).mean().round(4)
eth["ma_24"] = eth["close_price"].rolling(window=24).mean().round(4)
eth["ma_crossover_signal"] = eth.apply(
    lambda row: (
        1 if row["ma_6"] > row["ma_24"]
        else 0
    ) if pd.notna(row["ma_6"]) and pd.notna(row["ma_24"]) else None,
    axis=1
).astype("Int64")    # 1: short MA is above long MA (buy signal), 0: short MA is below long MA (no buy signal)
eth["ma_crossover_description"] = eth["ma_crossover_signal"].apply(
    lambda x: (
        "short MA (6h) is above long MA (24h) → buy signal"
        if x == 1 else
        "short MA (6h) is below long MA (24h) → no buy signal"
        if x == 0 else 
        None
    ) if pd.notna(x) else None
)
eth.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_eth,unique_active_wallets,avg_gas_price_gwei,total_gas_used,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description
0,2024-05-01 00:00:00,2997.2957,87041.9491,261371800.0,50831,53248.2181,25482,8.219,4542529445,368786400000.0,,,,
1,2024-05-01 01:00:00,3000.8196,90117.9517,270160500.0,49831,75066.1618,26422,8.1389,4578797065,365982500000.0,,,,
2,2024-05-01 02:00:00,2985.5093,92728.8428,276314200.0,48229,29038.2851,25233,9.2955,4518099410,366599500000.0,,,,
3,2024-05-01 03:00:00,3001.9196,45859.1077,137276900.0,48785,27267.6164,27274,8.1441,4530654967,364123900000.0,,,,
4,2024-05-01 04:00:00,3003.8646,41604.4323,125051600.0,47804,35669.8495,26537,7.19,4541463899,365586000000.0,,,,
5,2024-05-01 05:00:00,2992.2338,61148.8662,183078500.0,46818,23924.5542,25218,7.4799,4527735659,366560400000.0,2996.9404,,,
6,2024-05-01 06:00:00,2952.8314,93689.7612,278324200.0,49805,23264.6681,26463,7.6935,4553312046,364259700000.0,2989.5297,,,
7,2024-05-01 07:00:00,2867.5894,368547.273,1066140000.0,49596,83393.7018,22182,22.0712,4633450219,361173100000.0,2967.3247,,,
8,2024-05-01 08:00:00,2859.6975,271661.9017,773363600.0,49251,74381.28,22358,20.4277,4511887791,349561000000.0,2946.356,,,
9,2024-05-01 09:00:00,2874.1493,127704.8928,367313400.0,49242,92524.2244,22534,13.9404,4459155419,347497300000.0,2925.061,,,


### MACD (Moving Average Convergence Divergence)

`MACD`: a momentum indicator that shows the relationship between two Exponential Moving Averages (EMA)

`EMA`: gives more weight to recent prices, making it more responsive than a simple moving average (MA)

MACD consists of 3 main components:
1. MACD Line = EMA(12) - EMA(26): measures trend momentum.
2. Signal Line = EMA(9) of MACD: smooths the MACD line.
3. MACD Histogram = MACD Line - Signal Line: shows the strength of momentum change.

- A crossover (MACD > Signal) is commonly interpreted as a buy signal.
- A crossover (MACD < Signal) is commonly interpreted as a sell signal.

- 当 MACD线 > Signal线：表示上涨动量增强，通常是买入点
- 当 MACD线 < Signal线：表示下跌动量增强，通常是卖出点
- 两者的差值越大，说明动量越强


In [33]:
# 1. Compute short-term EMA (12h)
eth["ema_12"] = eth['close_price'].ewm(span=12, adjust=False).mean().round(4)

# 2. Compute long-term EMA (26h)
eth["ema_26"] = eth['close_price'].ewm(span=26, adjust=False).mean().round(4)

# 3. MACD line = ema_12 - ema_26
eth["macd"] = (eth["ema_12"] - eth["ema_26"]).round(4)

# 4. Signal line = EMA(9) of MACD3
    # 对 MACD 再做一次EMA（指数移动平均），它是一个“更平滑的 MACD 曲线”，用于生成买卖信号。
eth["macd_signal"] = eth["macd"].ewm(span=9, adjust=False).mean().round(4)

# 5. Compute MACD histogram (MACD - Signal)
eth["macd_hist"] = (eth["macd"] - eth["macd_signal"]).round(4)

# 6. Generate a crossover signal (1 -> MACD above signal -> buy momentum)
eth["macd_crossover_signal"] = (eth["macd"] > eth["macd_signal"]).astype(int)

# 7. Convert to MACD crossover description
eth["macd_crossover_description"] = eth["macd_crossover_signal"].apply(
    lambda x: (
        "MACD (12h vs 26h EMA) is above signal line → bullish momentum"
        if x == 1 else
        "MACD (12h vs 26h EMA) is below signal line → no bullish signal"
        if x == 0 else None
    )
)

In [34]:
eth.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_eth,unique_active_wallets,avg_gas_price_gwei,total_gas_used,market_cap,...,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description
0,2024-05-01 00:00:00,2997.2957,87041.9491,261371800.0,50831,53248.2181,25482,8.219,4542529445,368786400000.0,...,,,,2997.2957,2997.2957,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...
1,2024-05-01 01:00:00,3000.8196,90117.9517,270160500.0,49831,75066.1618,26422,8.1389,4578797065,365982500000.0,...,,,,2997.8378,2997.5567,0.2811,0.0562,0.2249,1,MACD (12h vs 26h EMA) is above signal line → b...
2,2024-05-01 02:00:00,2985.5093,92728.8428,276314200.0,48229,29038.2851,25233,9.2955,4518099410,366599500000.0,...,,,,2995.9411,2996.6643,-0.7232,-0.0997,-0.6235,0,MACD (12h vs 26h EMA) is below signal line → n...
3,2024-05-01 03:00:00,3001.9196,45859.1077,137276900.0,48785,27267.6164,27274,8.1441,4530654967,364123900000.0,...,,,,2996.8609,2997.0536,-0.1927,-0.1183,-0.0744,0,MACD (12h vs 26h EMA) is below signal line → n...
4,2024-05-01 04:00:00,3003.8646,41604.4323,125051600.0,47804,35669.8495,26537,7.19,4541463899,365586000000.0,...,,,,2997.9384,2997.5581,0.3803,-0.0186,0.3989,1,MACD (12h vs 26h EMA) is above signal line → b...


### Transaction Count Ratio

Compares the current hour's transaction count to the average of the past 24 hours.

It helps assess whether on-chain activity is surging, normal, or low.

In [35]:
# 1. Calculate 24h rolling average of transaction count
eth["txn_count_mean_24h"] = eth["transaction_count"].rolling(window=24).mean()

# 2. Compute the ratio: current transaction count / 24h rolling average
eth["txn_ratio"] = (eth["transaction_count"] / eth["txn_count_mean_24h"]).round(4)

# 3. Convert to transaction ratio description
eth["txn_ratio_description"] = eth["txn_ratio"].apply(
    lambda x: (
        "activity surge" if x > 1.1 else
        "low activity" if x < 0.9 else
        "normal activity"
    ) if pd.notna(x) else None
)

eth.drop(columns="txn_count_mean_24h", inplace=True)

In [36]:
eth["txn_ratio_description"].value_counts()

txn_ratio_description
normal activity    6382
low activity        884
activity surge      751
Name: count, dtype: int64

### Bollinger Bands

Bollinger Bands consist of three lines:
- Middle Band: Simple Moving Average (usually 20-period)
- Upper Band: Middle + 2 × standard deviation
- Lower Band: Middle - 2 × standard deviation

These bands help identify whether the price is relatively high or low
compared to recent history, and are useful for detecting breakout or sideways markets.

In [37]:
# 1. Calculate 20-period moving average (middle band)
eth["bb_mid"] = eth["close_price"].rolling(window=20).mean()

# 2. Calculate 20-period rolling standard deviation
eth["bb_std"] = eth["close_price"].rolling(window=20).std()

# 3. Compute upper and lower bands
eth["bb_upper"] = (eth["bb_mid"] + 2 * eth["bb_std"]).round(4)
eth["bb_lower"] = (eth["bb_mid"] - 2 * eth["bb_std"]).round(4)

# 4. Bollinger band width
eth["bb_position"] = eth.apply(
    lambda row: (
        "above upper band (possible breakout)" if row["close_price"] > row["bb_upper"]
        else "below lower band (possible breakdown)" if row["close_price"] < row["bb_lower"]
        else "within band (normal range)"
    ) if pd.notna(row["bb_upper"]) and pd.notna(row["bb_lower"]) else None, 
    axis=1
)

This field classifies the current close price relative to the Bollinger Bands.

- If the price is above the upper band → it's called a **breakout**.
    - Meaning: price has moved beyond normal volatility range on the upside.
    - Traders often interpret this as a strong bullish momentum or start of an upward trend.

- If the price is below the lower band → it's called a **rebound**.
    - Meaning: price may have dropped too sharply and is likely to bounce back.
    - Traders interpret this as a potential oversold signal or reversal opportunity.

- If the price is between upper and lower bands → it's in the **normal range** (sideways).
    - Meaning: market is moving within expected volatility bounds.

In [38]:
eth.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_eth,unique_active_wallets,avg_gas_price_gwei,total_gas_used,market_cap,...,macd_hist,macd_crossover_signal,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position
0,2024-05-01 00:00:00,2997.2957,87041.9491,261371800.0,50831,53248.2181,25482,8.219,4542529445,368786400000.0,...,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
1,2024-05-01 01:00:00,3000.8196,90117.9517,270160500.0,49831,75066.1618,26422,8.1389,4578797065,365982500000.0,...,0.2249,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,
2,2024-05-01 02:00:00,2985.5093,92728.8428,276314200.0,48229,29038.2851,25233,9.2955,4518099410,366599500000.0,...,-0.6235,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
3,2024-05-01 03:00:00,3001.9196,45859.1077,137276900.0,48785,27267.6164,27274,8.1441,4530654967,364123900000.0,...,-0.0744,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
4,2024-05-01 04:00:00,3003.8646,41604.4323,125051600.0,47804,35669.8495,26537,7.19,4541463899,365586000000.0,...,0.3989,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,
5,2024-05-01 05:00:00,2992.2338,61148.8662,183078500.0,46818,23924.5542,25218,7.4799,4527735659,366560400000.0,...,-0.0675,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
6,2024-05-01 06:00:00,2952.8314,93689.7612,278324200.0,49805,23264.6681,26463,7.6935,4553312046,364259700000.0,...,-2.8706,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
7,2024-05-01 07:00:00,2867.5894,368547.273,1066140000.0,49596,83393.7018,22182,22.0712,4633450219,361173100000.0,...,-9.91,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
8,2024-05-01 08:00:00,2859.6975,271661.9017,773363600.0,49251,74381.28,22358,20.4277,4511887791,349561000000.0,...,-14.2769,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
9,2024-05-01 09:00:00,2874.1493,127704.8928,367313400.0,49242,92524.2244,22534,13.9404,4459155419,347497300000.0,...,-15.2814,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,


### Price Change Percentage

This indicator measures the relative change in closing price compared to the previous time step.

It's useful for identifying short-term price momentum (up/down/flat).

Formula:

   $\frac{\text{close}_t - \text{close}_{t-1}}{\text{close}_{t-1}}$

In [39]:
eth["price_change_pct"] = eth["close_price"].pct_change().round(6)

# Classify movement for LLM prompt
eth["price_movement"] = eth["price_change_pct"].apply(
    lambda x: (
        "significant increase" if x > 0.01 else
        "slight increase" if x > 0.002 else
        "no significant change" if x > - 0.002 else
        "slight decrease" if x > -0.01 else
        "significant decrease"
    ) if pd.notna(x) else None 
)
eth["price_movement"].value_counts()

price_movement
no significant change    2816
slight increase          2240
slight decrease          2116
significant decrease      465
significant increase      402
Name: count, dtype: int64

In [40]:
eth.to_csv("ETH/eth_full.csv", index=False)

## SOL

In [41]:
sol = pd.read_csv("SOL/sol_final.csv", parse_dates=["timestamp"])

### Moving Average (MA)

In [42]:
sol["ma_6"] = sol["close_price"].rolling(window=6).mean().round(4)
sol["ma_24"] = sol["close_price"].rolling(window=24).mean().round(4)
sol["ma_crossover_signal"] = sol.apply(
    lambda row: (
        1 if row["ma_6"] > row["ma_24"]
        else 0
    ) if pd.notna(row["ma_6"]) and pd.notna(row["ma_24"]) else None,
    axis=1
).astype("Int64")    # 1: short MA is above long MA (buy signal), 0: short MA is below long MA (no buy signal)
sol["ma_crossover_description"] = sol["ma_crossover_signal"].apply(
    lambda x: (
        "short MA (6h) is above long MA (24h) → buy signal"
        if x == 1 else
        "short MA (6h) is below long MA (24h) → no buy signal"
        if x == 0 else 
        None
    ) if pd.notna(x) else None
)
sol.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,successful_transaction_count,total_fee_sol,avg_fee_sol,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description
0,2024-05-01 00:00:00,125.1005,513934.2,64575940.0,8837436,6777203,228.6608,2.6e-05,57068380000.0,,,,
1,2024-05-01 01:00:00,126.8711,683454.0,86164400.0,9286854,7312422,206.7113,2.2e-05,56022580000.0,,,,
2,2024-05-01 02:00:00,125.0548,705846.8,88318480.0,8450646,6534633,217.1579,2.6e-05,56778550000.0,,,,
3,2024-05-01 03:00:00,126.1589,422406.5,52954240.0,9489185,7529496,221.0711,2.3e-05,56015210000.0,,,,
4,2024-05-01 04:00:00,125.9056,302809.6,38198780.0,9238242,7418829,222.2989,2.4e-05,56515540000.0,,,,
5,2024-05-01 05:00:00,125.082,320654.1,40168310.0,9074138,7142826,171.0434,1.9e-05,56287170000.0,125.6955,,,
6,2024-05-01 06:00:00,122.8263,689953.0,85343810.0,8511218,6724752,162.6879,1.9e-05,55883400000.0,125.3164,,,
7,2024-05-01 07:00:00,120.5031,1757072.0,212238300.0,8462130,6879615,202.1013,2.4e-05,55270380000.0,124.2551,,,
8,2024-05-01 08:00:00,121.0997,1108013.0,132924500.0,8489463,6847295,182.6292,2.2e-05,53864870000.0,123.5959,,,
9,2024-05-01 09:00:00,120.7857,742829.9,90145190.0,8777861,7057795,179.6191,2e-05,53755120000.0,122.7004,,,


### MACD (Moving Average Convergence Divergence)

`MACD`: a momentum indicator that shows the relationship between two Exponential Moving Averages (EMA)

`EMA`: gives more weight to recent prices, making it more responsive than a simple moving average (MA)

MACD consists of 3 main components:
1. MACD Line = EMA(12) - EMA(26): measures trend momentum.
2. Signal Line = EMA(9) of MACD: smooths the MACD line.
3. MACD Histogram = MACD Line - Signal Line: shows the strength of momentum change.

- A crossover (MACD > Signal) is commonly interpreted as a buy signal.
- A crossover (MACD < Signal) is commonly interpreted as a sell signal.

- 当 MACD线 > Signal线：表示上涨动量增强，通常是买入点
- 当 MACD线 < Signal线：表示下跌动量增强，通常是卖出点
- 两者的差值越大，说明动量越强


In [43]:
# 1. Compute short-term EMA (12h)
sol["ema_12"] = sol['close_price'].ewm(span=12, adjust=False).mean().round(4)

# 2. Compute long-term EMA (26h)
sol["ema_26"] = sol['close_price'].ewm(span=26, adjust=False).mean().round(4)

# 3. MACD line = ema_12 - ema_26
sol["macd"] = (sol["ema_12"] - sol["ema_26"]).round(4)

# 4. Signal line = EMA(9) of MACD3
    # 对 MACD 再做一次EMA（指数移动平均），它是一个“更平滑的 MACD 曲线”，用于生成买卖信号。
sol["macd_signal"] = sol["macd"].ewm(span=9, adjust=False).mean().round(4)

# 5. Compute MACD histogram (MACD - Signal)
sol["macd_hist"] = (sol["macd"] - sol["macd_signal"]).round(4)

# 6. Generate a crossover signal (1 -> MACD above signal -> buy momentum)
sol["macd_crossover_signal"] = (sol["macd"] > sol["macd_signal"]).astype(int)

# 7. Convert to MACD crossover description
sol["macd_crossover_description"] = sol["macd_crossover_signal"].apply(
    lambda x: (
        "MACD (12h vs 26h EMA) is above signal line → bullish momentum"
        if x == 1 else
        "MACD (12h vs 26h EMA) is below signal line → no bullish signal"
        if x == 0 else None
    )
)

In [44]:
sol.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,successful_transaction_count,total_fee_sol,avg_fee_sol,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description
0,2024-05-01 00:00:00,125.1005,513934.1844,64575943.45,8837436,6777203,228.6608,2.6e-05,57068380000.0,,,,,125.1005,125.1005,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...
1,2024-05-01 01:00:00,126.8711,683454.0389,86164402.38,9286854,7312422,206.7113,2.2e-05,56022580000.0,,,,,125.3729,125.2317,0.1412,0.0282,0.113,1,MACD (12h vs 26h EMA) is above signal line → b...
2,2024-05-01 02:00:00,125.0548,705846.8439,88318479.6,8450646,6534633,217.1579,2.6e-05,56778550000.0,,,,,125.324,125.2186,0.1054,0.0437,0.0617,1,MACD (12h vs 26h EMA) is above signal line → b...
3,2024-05-01 03:00:00,126.1589,422406.4831,52954244.59,9489185,7529496,221.0711,2.3e-05,56015210000.0,,,,,125.4524,125.2882,0.1642,0.0678,0.0964,1,MACD (12h vs 26h EMA) is above signal line → b...
4,2024-05-01 04:00:00,125.9056,302809.5565,38198777.59,9238242,7418829,222.2989,2.4e-05,56515540000.0,,,,,125.5221,125.3339,0.1882,0.0919,0.0963,1,MACD (12h vs 26h EMA) is above signal line → b...


### Transaction Count Ratio

Compares the current hour's transaction count to the average of the past 24 hours.

It helps assess whether on-chain activity is surging, normal, or low.

In [45]:
# 1. Calculate 24h rolling average of transaction count
sol["txn_count_mean_24h"] = sol["transaction_count"].rolling(window=24).mean()

# 2. Compute the ratio: current transaction count / 24h rolling average
sol["txn_ratio"] = (sol["transaction_count"] / sol["txn_count_mean_24h"]).round(4)

# 3. Convert to transaction ratio description
sol["txn_ratio_description"] = sol["txn_ratio"].apply(
    lambda x: (
        "activity surge" if x > 1.1 else
        "low activity" if x < 0.9 else
        "normal activity"
    ) if pd.notna(x) else None
)

sol.drop(columns="txn_count_mean_24h", inplace=True)

In [46]:
sol["txn_ratio_description"].value_counts()

txn_ratio_description
normal activity    7225
activity surge      420
low activity        372
Name: count, dtype: int64

### Bollinger Bands

Bollinger Bands consist of three lines:
- Middle Band: Simple Moving Average (usually 20-period)
- Upper Band: Middle + 2 × standard deviation
- Lower Band: Middle - 2 × standard deviation

These bands help identify whether the price is relatively high or low
compared to recent history, and are useful for detecting breakout or sideways markets.

In [47]:
# 1. Calculate 20-period moving average (middle band)
sol["bb_mid"] = sol["close_price"].rolling(window=20).mean()

# 2. Calculate 20-period rolling standard deviation
sol["bb_std"] = sol["close_price"].rolling(window=20).std()

# 3. Compute upper and lower bands
sol["bb_upper"] = (sol["bb_mid"] + 2 * sol["bb_std"]).round(4)
sol["bb_lower"] = (sol["bb_mid"] - 2 * sol["bb_std"]).round(4)

# 4. Bollinger band width
sol["bb_position"] = sol.apply(
    lambda row: (
        "above upper band (possible breakout)" if row["close_price"] > row["bb_upper"]
        else "below lower band (possible breakdown)" if row["close_price"] < row["bb_lower"]
        else "within band (normal range)"
    ) if pd.notna(row["bb_upper"]) and pd.notna(row["bb_lower"]) else None, 
    axis=1
)

This field classifies the current close price relative to the Bollinger Bands.

- If the price is above the upper band → it's called a **breakout**.
    - Meaning: price has moved beyond normal volatility range on the upside.
    - Traders often interpret this as a strong bullish momentum or start of an upward trend.

- If the price is below the lower band → it's called a **rebound**.
    - Meaning: price may have dropped too sharply and is likely to bounce back.
    - Traders interpret this as a potential oversold signal or reversal opportunity.

- If the price is between upper and lower bands → it's in the **normal range** (sideways).
    - Meaning: market is moving within expected volatility bounds.

In [48]:
sol["bb_position"].value_counts()

bb_position
within band (normal range)               7123
below lower band (possible breakdown)     476
above upper band (possible breakout)      422
Name: count, dtype: int64

### Price Change Percentage

This indicator measures the relative change in closing price compared to the previous time step.

It's useful for identifying short-term price momentum (up/down/flat).

Formula:

   $\frac{\text{close}_t - \text{close}_{t-1}}{\text{close}_{t-1}}$

In [49]:
sol["price_change_pct"] = sol["close_price"].pct_change().round(6)
sol.head(10)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,successful_transaction_count,total_fee_sol,avg_fee_sol,market_cap,ma_6,...,macd_crossover_signal,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position,price_change_pct
0,2024-05-01 00:00:00,125.1005,513934.2,64575940.0,8837436,6777203,228.6608,2.6e-05,57068380000.0,,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,
1,2024-05-01 01:00:00,126.8711,683454.0,86164400.0,9286854,7312422,206.7113,2.2e-05,56022580000.0,,...,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,,0.014153
2,2024-05-01 02:00:00,125.0548,705846.8,88318480.0,8450646,6534633,217.1579,2.6e-05,56778550000.0,,...,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,,-0.014316
3,2024-05-01 03:00:00,126.1589,422406.5,52954240.0,9489185,7529496,221.0711,2.3e-05,56015210000.0,,...,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,,0.008829
4,2024-05-01 04:00:00,125.9056,302809.6,38198780.0,9238242,7418829,222.2989,2.4e-05,56515540000.0,,...,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,,-0.002008
5,2024-05-01 05:00:00,125.082,320654.1,40168310.0,9074138,7142826,171.0434,1.9e-05,56287170000.0,125.6955,...,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,,-0.006541
6,2024-05-01 06:00:00,122.8263,689953.0,85343810.0,8511218,6724752,162.6879,1.9e-05,55883400000.0,125.3164,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.018034
7,2024-05-01 07:00:00,120.5031,1757072.0,212238300.0,8462130,6879615,202.1013,2.4e-05,55270380000.0,124.2551,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.018915
8,2024-05-01 08:00:00,121.0997,1108013.0,132924500.0,8489463,6847295,182.6292,2.2e-05,53864870000.0,123.5959,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,0.004951
9,2024-05-01 09:00:00,120.7857,742829.9,90145190.0,8777861,7057795,179.6191,2e-05,53755120000.0,122.7004,...,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,,-0.002593


In [50]:
# Classify movement for LLM prompt
sol["price_movement"] = sol["price_change_pct"].apply(
    lambda x: (
        "significant increase" if x > 0.01 else
        "slight increase" if x > 0.002 else
        "no significant change" if x > - 0.002 else
        "slight decrease" if x > -0.01 else
        "significant decrease"
    ) if pd.notna(x) else None 
)
sol["price_movement"].value_counts()

price_movement
slight increase          2364
slight decrease          2269
no significant change    1880
significant decrease      784
significant increase      742
Name: count, dtype: int64

In [51]:
sol.to_csv("SOL/sol_full.csv", index=False)

## DOGE

In [52]:
doge = pd.read_csv("DOGE/doge_final.csv", parse_dates=["timestamp"])
doge

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_doge,avg_fee_doge,total_size_used_bytes,market_cap
0,2024-05-01 00:00:00,0.131653,2.862902e+08,38105119.66,4731,6.674916e+08,0.230494,1593843,1.919231e+10
1,2024-05-01 01:00:00,0.131305,4.022567e+08,52859312.91,6419,3.206590e+08,0.184687,2186581,1.893531e+10
2,2024-05-01 02:00:00,0.130631,3.622108e+08,47070760.41,5364,3.654297e+08,0.251760,1925537,1.889457e+10
3,2024-05-01 03:00:00,0.132374,1.999534e+08,26345659.23,3761,3.075656e+08,0.394382,1889888,1.876737e+10
4,2024-05-01 04:00:00,0.132580,1.147719e+08,15225365.04,2124,2.631411e+08,0.514555,1459681,1.899158e+10
...,...,...,...,...,...,...,...,...,...
8035,2025-03-31 19:00:00,0.165746,1.546082e+08,25779667.50,1624,4.977779e+08,0.568891,2799532,2.484804e+10
8036,2025-03-31 20:00:00,0.165668,9.870469e+07,16412831.31,1772,8.277295e+08,0.433538,3357578,2.464346e+10
8037,2025-03-31 21:00:00,0.165791,5.603997e+07,9310295.35,1642,1.415250e+09,0.423078,3016915,2.464603e+10
8038,2025-03-31 22:00:00,0.165656,9.790216e+07,16160828.77,1354,6.266061e+08,0.466473,2494067,2.466526e+10


### Moving Average (MA)

In [53]:
doge["ma_6"] = doge["close_price"].rolling(window=6).mean().round(8)
doge["ma_24"] = doge["close_price"].rolling(window=24).mean().round(8)
doge["ma_crossover_signal"] = doge.apply(
    lambda row: (
        1 if row["ma_6"] > row["ma_24"]
        else 0
    ) if pd.notna(row["ma_6"]) and pd.notna(row["ma_24"]) else None,
    axis=1
).astype("Int64")    # 1: short MA is above long MA (buy signal), 0: short MA is below long MA (no buy signal)
doge["ma_crossover_description"] = doge["ma_crossover_signal"].apply(
    lambda x: (
        "short MA (6h) is above long MA (24h) → buy signal"
        if x == 1 else
        "short MA (6h) is below long MA (24h) → no buy signal"
        if x == 0 else 
        None
    ) if pd.notna(x) else None
)

### MACD (Moving Average Convergence Divergence)

`MACD`: a momentum indicator that shows the relationship between two Exponential Moving Averages (EMA)

`EMA`: gives more weight to recent prices, making it more responsive than a simple moving average (MA)

MACD consists of 3 main components:
1. MACD Line = EMA(12) - EMA(26): measures trend momentum.
2. Signal Line = EMA(9) of MACD: smooths the MACD line.
3. MACD Histogram = MACD Line - Signal Line: shows the strength of momentum change.

- A crossover (MACD > Signal) is commonly interpreted as a buy signal.
- A crossover (MACD < Signal) is commonly interpreted as a sell signal.

- 当 MACD线 > Signal线：表示上涨动量增强，通常是买入点
- 当 MACD线 < Signal线：表示下跌动量增强，通常是卖出点
- 两者的差值越大，说明动量越强


In [54]:
# 1. Compute short-term EMA (12h)
doge["ema_12"] = doge['close_price'].ewm(span=12, adjust=False).mean().round(8)

# 2. Compute long-term EMA (26h)
doge["ema_26"] = doge['close_price'].ewm(span=26, adjust=False).mean().round(8)

# 3. MACD line = ema_12 - ema_26
doge["macd"] = (doge["ema_12"] - doge["ema_26"]).round(8)

# 4. Signal line = EMA(9) of MACD3
    # 对 MACD 再做一次EMA（指数移动平均），它是一个“更平滑的 MACD 曲线”，用于生成买卖信号。
doge["macd_signal"] = doge["macd"].ewm(span=9, adjust=False).mean().round(8)

# 5. Compute MACD histogram (MACD - Signal)
doge["macd_hist"] = (doge["macd"] - doge["macd_signal"]).round(8)

# 6. Generate a crossover signal (1 -> MACD above signal -> buy momentum)
doge["macd_crossover_signal"] = (doge["macd"] > doge["macd_signal"]).astype(int)

# 7. Convert to MACD crossover description
doge["macd_crossover_description"] = doge["macd_crossover_signal"].apply(
    lambda x: (
        "MACD (12h vs 26h EMA) is above signal line → bullish momentum"
        if x == 1 else
        "MACD (12h vs 26h EMA) is below signal line → no bullish signal"
        if x == 0 else None
    )
)
doge.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_doge,avg_fee_doge,total_size_used_bytes,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description
0,2024-05-01 00:00:00,0.131653,286290200.0,38105119.66,4731,667491600.0,0.230494,1593843,19192310000.0,,,,,0.131653,0.131653,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...
1,2024-05-01 01:00:00,0.131305,402256700.0,52859312.91,6419,320659000.0,0.184687,2186581,18935310000.0,,,,,0.131599,0.131627,-2.8e-05,-6e-06,-2.2e-05,0,MACD (12h vs 26h EMA) is below signal line → n...
2,2024-05-01 02:00:00,0.130631,362210800.0,47070760.41,5364,365429700.0,0.25176,1925537,18894570000.0,,,,,0.13145,0.131553,-0.000103,-2.5e-05,-7.8e-05,0,MACD (12h vs 26h EMA) is below signal line → n...
3,2024-05-01 03:00:00,0.132374,199953400.0,26345659.23,3761,307565600.0,0.394382,1889888,18767370000.0,,,,,0.131592,0.131614,-2.2e-05,-2.4e-05,3e-06,1,MACD (12h vs 26h EMA) is above signal line → b...
4,2024-05-01 04:00:00,0.13258,114771900.0,15225365.04,2124,263141100.0,0.514555,1459681,18991580000.0,,,,,0.131744,0.131685,5.9e-05,-8e-06,6.7e-05,1,MACD (12h vs 26h EMA) is above signal line → b...


### Transaction Count Ratio

Compares the current hour's transaction count to the average of the past 24 hours.

It helps assess whether on-chain activity is surging, normal, or low.

In [55]:
# 1. Calculate 24h rolling average of transaction count
doge["txn_count_mean_24h"] = doge["transaction_count"].rolling(window=24).mean()

# 2. Compute the ratio: current transaction count / 24h rolling average
doge["txn_ratio"] = (doge["transaction_count"] / doge["txn_count_mean_24h"]).round(4)

# 3. Convert to transaction ratio description
doge["txn_ratio_description"] = doge["txn_ratio"].apply(
    lambda x: (
        "activity surge" if x > 1.2 else
        "low activity" if x < 0.8 else
        "normal activity"
    ) if pd.notna(x) else None
)

doge.drop(columns="txn_count_mean_24h", inplace=True)

doge["txn_ratio_description"].value_counts()

txn_ratio_description
low activity       3390
normal activity    2771
activity surge     1856
Name: count, dtype: int64

### Bollinger Bands

Bollinger Bands consist of three lines:
- Middle Band: Simple Moving Average (usually 20-period)
- Upper Band: Middle + 2 × standard deviation
- Lower Band: Middle - 2 × standard deviation

These bands help identify whether the price is relatively high or low
compared to recent history, and are useful for detecting breakout or sideways markets.

In [56]:
# 1. Calculate 20-period moving average (middle band)
doge["bb_mid"] = doge["close_price"].rolling(window=20).mean()

# 2. Calculate 20-period rolling standard deviation
doge["bb_std"] = doge["close_price"].rolling(window=20).std()

# 3. Compute upper and lower bands
doge["bb_upper"] = (doge["bb_mid"] + 2 * doge["bb_std"]).round(8)
doge["bb_lower"] = (doge["bb_mid"] - 2 * doge["bb_std"]).round(8)

# 4. Bollinger band width
doge["bb_position"] = doge.apply(
    lambda row: (
        "above upper band (possible breakout)" if row["close_price"] > row["bb_upper"]
        else "below lower band (possible breakdown)" if row["close_price"] < row["bb_lower"]
        else "within band (normal range)"
    ) if pd.notna(row["bb_upper"]) and pd.notna(row["bb_lower"]) else None, 
    axis=1
)

This field classifies the current close price relative to the Bollinger Bands.

- If the price is above the upper band → it's called a **breakout**.
    - Meaning: price has moved beyond normal volatility range on the upside.
    - Traders often interpret this as a strong bullish momentum or start of an upward trend.

- If the price is below the lower band → it's called a **rebound**.
    - Meaning: price may have dropped too sharply and is likely to bounce back.
    - Traders interpret this as a potential oversold signal or reversal opportunity.

- If the price is between upper and lower bands → it's in the **normal range** (sideways).
    - Meaning: market is moving within expected volatility bounds.

In [57]:
doge.head(30)

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_doge,avg_fee_doge,total_size_used_bytes,market_cap,ma_6,...,macd_hist,macd_crossover_signal,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position
0,2024-05-01 00:00:00,0.131653,286290200.0,38105120.0,4731,667491600.0,0.230494,1593843,19192310000.0,,...,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
1,2024-05-01 01:00:00,0.131305,402256700.0,52859310.0,6419,320659000.0,0.184687,2186581,18935310000.0,,...,-2.2e-05,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
2,2024-05-01 02:00:00,0.130631,362210800.0,47070760.0,5364,365429700.0,0.25176,1925537,18894570000.0,,...,-7.8e-05,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
3,2024-05-01 03:00:00,0.132374,199953400.0,26345660.0,3761,307565600.0,0.394382,1889888,18767370000.0,,...,3e-06,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,
4,2024-05-01 04:00:00,0.13258,114771900.0,15225370.0,2124,263141100.0,0.514555,1459681,18991580000.0,,...,6.7e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,
5,2024-05-01 05:00:00,0.131686,134807500.0,17829790.0,2649,342314700.0,0.373509,971719,19041300000.0,0.131705,...,4.6e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,,,,,,,
6,2024-05-01 06:00:00,0.128946,232647500.0,30322420.0,8880,432848100.0,0.158072,2700905,18946920000.0,0.131254,...,-0.000144,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
7,2024-05-01 07:00:00,0.124685,1297384000.0,163341500.0,9649,501979500.0,0.179835,3236316,18673170000.0,0.13015,...,-0.000527,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
8,2024-05-01 08:00:00,0.124329,961407100.0,118776300.0,9973,839174400.0,0.158568,3084028,17945670000.0,0.1291,...,-0.000761,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,
9,2024-05-01 09:00:00,0.123978,356016700.0,44404530.0,29411,402596000.0,0.1307,8983604,17758190000.0,0.127701,...,-0.000887,0,MACD (12h vs 26h EMA) is below signal line → n...,,,,,,,


In [58]:
doge["bb_position"].value_counts()

bb_position
within band (normal range)               7119
below lower band (possible breakdown)     464
above upper band (possible breakout)      438
Name: count, dtype: int64

### Price Change Percentage

This indicator measures the relative change in closing price compared to the previous time step.

It's useful for identifying short-term price momentum (up/down/flat).

Formula:

   $\frac{\text{close}_t - \text{close}_{t-1}}{\text{close}_{t-1}}$

In [59]:
doge["price_change_pct"] = doge["close_price"].pct_change().round(6)
# Classify movement for LLM prompt
doge["price_movement"] = doge["price_change_pct"].apply(
    lambda x: (
        "significant increase" if x > 0.01 else
        "slight increase" if x > 0.002 else
        "no significant change" if x > - 0.002 else
        "slight decrease" if x > -0.01 else
        "significant decrease"
    ) if pd.notna(x) else None 
)
doge["price_movement"].value_counts()

price_movement
slight increase          2245
slight decrease          2127
no significant change    1843
significant decrease      928
significant increase      896
Name: count, dtype: int64

In [60]:
doge.to_csv("DOGE/doge_full.csv", index=False)

## USDT

In [61]:
usdt = pd.read_csv("USDT/usdt_final.csv", parse_dates=["timestamp"])
usdt

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,market_cap
0,2024-05-01 00:00:00,0.999195,1.424318e+07,14231359.34,1.105692e+11
1,2024-05-01 01:00:00,0.999124,7.272293e+06,7267671.80,1.105374e+11
2,2024-05-01 02:00:00,0.999151,8.760352e+06,8755053.36,1.104664e+11
3,2024-05-01 03:00:00,0.999100,6.270335e+06,6266386.85,1.104993e+11
4,2024-05-01 04:00:00,0.999108,7.170144e+06,7166996.22,1.105020e+11
...,...,...,...,...,...
8035,2025-03-31 19:00:00,1.000039,2.419581e+07,24196501.77,1.439279e+11
8036,2025-03-31 20:00:00,1.000005,1.556304e+07,15561082.63,1.439114e+11
8037,2025-03-31 21:00:00,0.999940,2.478296e+07,24777574.63,1.439113e+11
8038,2025-03-31 22:00:00,0.999929,9.716986e+06,9716692.67,1.438995e+11


### Moving Average (MA)

In [62]:
usdt["ma_6"] = usdt["close_price"].rolling(window=6).mean().round(8)
usdt["ma_24"] = usdt["close_price"].rolling(window=24).mean().round(8)
usdt["ma_crossover_signal"] = usdt.apply(
    lambda row: (
        1 if row["ma_6"] > row["ma_24"]
        else 0
    ) if pd.notna(row["ma_6"]) and pd.notna(row["ma_24"]) else None,
    axis=1
).astype("Int64")    # 1: short MA is above long MA (buy signal), 0: short MA is below long MA (no buy signal)
usdt["ma_crossover_description"] = usdt["ma_crossover_signal"].apply(
    lambda x: (
        "short MA (6h) is above long MA (24h) → buy signal"
        if x == 1 else
        "short MA (6h) is below long MA (24h) → no buy signal"
        if x == 0 else 
        None
    ) if pd.notna(x) else None
)

### MACD (Moving Average Convergence Divergence)

`MACD`: a momentum indicator that shows the relationship between two Exponential Moving Averages (EMA)

`EMA`: gives more weight to recent prices, making it more responsive than a simple moving average (MA)

MACD consists of 3 main components:
1. MACD Line = EMA(12) - EMA(26): measures trend momentum.
2. Signal Line = EMA(9) of MACD: smooths the MACD line.
3. MACD Histogram = MACD Line - Signal Line: shows the strength of momentum change.

- A crossover (MACD > Signal) is commonly interpreted as a buy signal.
- A crossover (MACD < Signal) is commonly interpreted as a sell signal.

- 当 MACD线 > Signal线：表示上涨动量增强，通常是买入点
- 当 MACD线 < Signal线：表示下跌动量增强，通常是卖出点
- 两者的差值越大，说明动量越强


In [63]:
# 1. Compute short-term EMA (12h)
usdt["ema_12"] = usdt['close_price'].ewm(span=12, adjust=False).mean().round(8)

# 2. Compute long-term EMA (26h)
usdt["ema_26"] = usdt['close_price'].ewm(span=26, adjust=False).mean().round(8)

# 3. MACD line = ema_12 - ema_26
usdt["macd"] = (usdt["ema_12"] - usdt["ema_26"]).round(8)

# 4. Signal line = EMA(9) of MACD3
    # 对 MACD 再做一次EMA（指数移动平均），它是一个“更平滑的 MACD 曲线”，用于生成买卖信号。
usdt["macd_signal"] = usdt["macd"].ewm(span=9, adjust=False).mean().round(8)

# 5. Compute MACD histogram (MACD - Signal)
usdt["macd_hist"] = (usdt["macd"] - usdt["macd_signal"]).round(8)

# 6. Generate a crossover signal (1 -> MACD above signal -> buy momentum)
usdt["macd_crossover_signal"] = (usdt["macd"] > usdt["macd_signal"]).astype(int)

# 7. Convert to MACD crossover description
usdt["macd_crossover_description"] = usdt["macd_crossover_signal"].apply(
    lambda x: (
        "MACD (12h vs 26h EMA) is above signal line → bullish momentum"
        if x == 1 else
        "MACD (12h vs 26h EMA) is below signal line → no bullish signal"
        if x == 0 else None
    )
)
usdt.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,ema_26,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description
0,2024-05-01 00:00:00,0.999195,14243180.0,14231359.34,110569200000.0,,,,,0.999195,0.999195,0.0,0.0,0.0,0,MACD (12h vs 26h EMA) is below signal line → n...
1,2024-05-01 01:00:00,0.999124,7272293.0,7267671.8,110537400000.0,,,,,0.999184,0.999189,-6e-06,-1e-06,-4e-06,0,MACD (12h vs 26h EMA) is below signal line → n...
2,2024-05-01 02:00:00,0.999151,8760352.0,8755053.36,110466400000.0,,,,,0.999179,0.999187,-8e-06,-2e-06,-5e-06,0,MACD (12h vs 26h EMA) is below signal line → n...
3,2024-05-01 03:00:00,0.9991,6270335.0,6266386.85,110499300000.0,,,,,0.999167,0.99918,-1.3e-05,-5e-06,-9e-06,0,MACD (12h vs 26h EMA) is below signal line → n...
4,2024-05-01 04:00:00,0.999108,7170144.0,7166996.22,110502000000.0,,,,,0.999158,0.999175,-1.7e-05,-7e-06,-1e-05,0,MACD (12h vs 26h EMA) is below signal line → n...


### Bollinger Bands

Bollinger Bands consist of three lines:
- Middle Band: Simple Moving Average (usually 20-period)
- Upper Band: Middle + 2 × standard deviation
- Lower Band: Middle - 2 × standard deviation

These bands help identify whether the price is relatively high or low
compared to recent history, and are useful for detecting breakout or sideways markets.

In [64]:
# 1. Calculate 20-period moving average (middle band)
usdt["bb_mid"] = usdt["close_price"].rolling(window=20).mean()

# 2. Calculate 20-period rolling standard deviation
usdt["bb_std"] = usdt["close_price"].rolling(window=20).std()

# 3. Compute upper and lower bands
usdt["bb_upper"] = (usdt["bb_mid"] + 2 * usdt["bb_std"]).round(8)
usdt["bb_lower"] = (usdt["bb_mid"] - 2 * usdt["bb_std"]).round(8)

# 4. Bollinger band width
usdt["bb_position"] = usdt.apply(
    lambda row: (
        "above upper band (possible breakout)" if row["close_price"] > row["bb_upper"]
        else "below lower band (possible breakdown)" if row["close_price"] < row["bb_lower"]
        else "within band (normal range)"
    ) if pd.notna(row["bb_upper"]) and pd.notna(row["bb_lower"]) else None, 
    axis=1
)

This field classifies the current close price relative to the Bollinger Bands.

- If the price is above the upper band → it's called a **breakout**.
    - Meaning: price has moved beyond normal volatility range on the upside.
    - Traders often interpret this as a strong bullish momentum or start of an upward trend.

- If the price is below the lower band → it's called a **rebound**.
    - Meaning: price may have dropped too sharply and is likely to bounce back.
    - Traders interpret this as a potential oversold signal or reversal opportunity.

- If the price is between upper and lower bands → it's in the **normal range** (sideways).
    - Meaning: market is moving within expected volatility bounds.

In [65]:
usdt["bb_position"].value_counts()

bb_position
within band (normal range)               7173
above upper band (possible breakout)      432
below lower band (possible breakdown)     416
Name: count, dtype: int64

In [66]:
usdt.to_csv("USDT/usdt_full.csv", index=False)