In [33]:
import pandas as pd
import yfinance as yf

# Part 1: Convert to Text for Each Token

## BTC

In [None]:
btc = pd.read_csv("tokens/BTC/btc_full.csv", parse_dates=["timestamp"])

# Delete the first few rows where there is NA cells
btc = btc.iloc[24:].reset_index(drop=True)

btc.columns

Index(['timestamp', 'close_price', 'volume_base', 'volume_quote',
       'transaction_count', 'total_value_transferred_btc', 'avg_fee_btc',
       'total_size_used_mb', 'unique_active_wallets', 'market_cap', 'ma_6',
       'ma_24', 'ma_crossover_signal', 'ma_crossover_description', 'ema_12',
       'ema_26', 'macd', 'macd_signal', 'macd_hist', 'macd_crossover_signal',
       'macd_crossover_description', 'txn_ratio', 'txn_ratio_description',
       'bb_mid', 'bb_std', 'bb_upper', 'bb_lower', 'bb_position',
       'price_change_pct', 'price_movement'],
      dtype='object')

In [4]:
btc["text"] = btc.apply(
    lambda row: (
        f"Price: {row['close_price']} USD\n"
        f"Price change compared to last hour: {row['price_change_pct'] * 100:+.4f}% ({row['price_movement']})\n"
        f"Volume: {row['volume_base']:.4f} BTC\n"
        f"Market cap: {row['market_cap']:,.2f} USD\n"
        f"Trend signal: {row['ma_crossover_description']}\n"
        f"Momentum signal: {row['macd_crossover_description']}\n"
        f"Price regime (Bollinger Bands): {row['bb_position']}\n"
        f"On-chain activity:\n"
        f"- {row['transaction_count']} transactions → {row['txn_ratio_description']} (txn ratio = {row['txn_ratio']}, compared to 24h average)\n"
        f"- {row['unique_active_wallets']} active wallets\n"
        f"- {row['total_value_transferred_btc']} BTC transferred\n"
        f"- Average transaction fee: {row['avg_fee_btc']:.8f}\n"
        f"- {row['total_size_used_mb']} MB block size used"
    ),
    axis=1
)

In [5]:
btc.to_csv("BTC/btc_with_text.csv", index=False)

## ETH

In [None]:
eth = pd.read_csv("tokens/ETH/eth_full.csv", parse_dates=["timestamp"])
eth = eth.iloc[24:].reset_index(drop=True)

In [7]:
eth.columns

Index(['timestamp', 'close_price', 'volume_base', 'volume_quote',
       'transaction_count', 'total_value_transferred_eth',
       'unique_active_wallets', 'avg_gas_price_gwei', 'total_gas_used',
       'market_cap', 'ma_6', 'ma_24', 'ma_crossover_signal',
       'ma_crossover_description', 'ema_12', 'ema_26', 'macd', 'macd_signal',
       'macd_hist', 'macd_crossover_signal', 'macd_crossover_description',
       'txn_ratio', 'txn_ratio_description', 'bb_mid', 'bb_std', 'bb_upper',
       'bb_lower', 'bb_position', 'price_change_pct', 'price_movement'],
      dtype='object')

In [8]:
eth["text"] = eth.apply(
    lambda row: (
        f"Price: {row['close_price']} USD\n"
        f"Price change compared to last hour: {row['price_change_pct'] * 100:+.4f}% ({row['price_movement']})\n"
        f"Volume: {row['volume_base']:.4f} ETH\n"
        f"Market cap: {row['market_cap']:,.2f} USD\n"
        f"Trend signal: {row['ma_crossover_description']}\n"
        f"Momentum signal: {row['macd_crossover_description']}\n"
        f"Price regime (Bollinger Bands): {row['bb_position']}\n"
        f"On-chain activity:\n"
        f"- {row['transaction_count']} transactions → {row['txn_ratio_description']} (txn ratio = {row['txn_ratio']}, compared to 24h average)\n"
        f"- {row['unique_active_wallets']} active wallets\n"
        f"- {row['total_value_transferred_eth']} ETH transferred\n"
        f"- {row['avg_gas_price_gwei']} Gwei average gas price\n"
        f"- {row['total_gas_used']:,.0f} total gas used"
    ),
    axis=1
)

In [9]:
eth.to_csv("ETH/eth_with_text.csv", index=False)

## SOL

In [None]:
sol = pd.read_csv("tokens/SOL/sol_full.csv", parse_dates=["timestamp"])
sol = sol.iloc[24:].reset_index(drop=True)
sol.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,successful_transaction_count,total_fee_sol,avg_fee_sol,market_cap,ma_6,...,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position,price_change_pct,price_movement
0,2024-05-02 00:00:00,131.4696,739088.8957,97971114.19,9021681,7061930,280.9976,3.1e-05,60292550000.0,132.3795,...,MACD (12h vs 26h EMA) is above signal line → b...,1.0692,normal activity,126.4213,4.962783,136.3469,116.4957,within band (normal range),-0.023473,significant decrease
1,2024-05-02 01:00:00,129.6897,612510.1719,79864504.21,8822319,6868772,243.0346,2.8e-05,58680430000.0,132.383,...,MACD (12h vs 26h EMA) is above signal line → b...,1.048,normal activity,126.651685,5.004115,136.6599,116.6435,within band (normal range),-0.013538,significant decrease
2,2024-05-02 02:00:00,129.6112,539810.0675,69934077.55,9235352,7203347,238.7772,2.6e-05,58226090000.0,132.0229,...,MACD (12h vs 26h EMA) is above signal line → b...,1.0928,normal activity,126.99093,4.96093,136.9128,117.0691,within band (normal range),-0.000605,no significant change
3,2024-05-02 03:00:00,130.0905,411643.4165,53646068.35,9284207,7307339,247.4921,2.7e-05,58055890000.0,131.4748,...,MACD (12h vs 26h EMA) is above signal line → b...,1.0997,normal activity,127.4703,4.76017,136.9906,117.95,within band (normal range),0.003698,slight increase
4,2024-05-02 04:00:00,129.5669,442239.6301,57617701.05,9015569,6970857,230.0827,2.6e-05,58211850000.0,130.8429,...,MACD (12h vs 26h EMA) is below signal line → n...,1.0691,normal activity,127.89366,4.534961,136.9636,118.8237,within band (normal range),-0.004025,slight decrease


In [11]:
sol.columns

Index(['timestamp', 'close_price', 'volume_base', 'volume_quote',
       'transaction_count', 'successful_transaction_count', 'total_fee_sol',
       'avg_fee_sol', 'market_cap', 'ma_6', 'ma_24', 'ma_crossover_signal',
       'ma_crossover_description', 'ema_12', 'ema_26', 'macd', 'macd_signal',
       'macd_hist', 'macd_crossover_signal', 'macd_crossover_description',
       'txn_ratio', 'txn_ratio_description', 'bb_mid', 'bb_std', 'bb_upper',
       'bb_lower', 'bb_position', 'price_change_pct', 'price_movement'],
      dtype='object')

In [12]:
sol["text"] = sol.apply(
    lambda row: (
        f"Price: {row['close_price']} USD\n"
        f"Price change compared to last hour: {row['price_change_pct'] * 100:+.4f}% ({row['price_movement']})\n"
        f"Volume: {row['volume_base']:.4f} SOL\n"
        f"Market cap: {row['market_cap']:,.2f} USD\n"
        f"Trend signal: {row['ma_crossover_description']}\n"
        f"Momentum signal: {row['macd_crossover_description']}\n"
        f"Price regime (Bollinger Bands): {row['bb_position']}\n"
        f"On-chain activity:\n"
        f"- {row['transaction_count']} transactions → {row['txn_ratio_description']} (txn ratio = {row['txn_ratio']}, compared to 24h average)\n"
        f"- {row['total_fee_sol']} SOL total fees paid\n"
        f"- Average fee per transaction: {row['avg_fee_sol']:.8f} SOL"
    ),
    axis=1
)   

In [13]:
sol.to_csv("SOL/sol_with_text.csv", index=False)

## DOGE

In [None]:
doge = pd.read_csv("tokens/DOGE/doge_full.csv", parse_dates=["timestamp"])
doge = doge.iloc[24:].reset_index(drop=True)
doge.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,transaction_count,total_value_transferred_doge,avg_fee_doge,total_size_used_bytes,market_cap,ma_6,...,macd_crossover_description,txn_ratio,txn_ratio_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position,price_change_pct,price_movement
0,2024-05-02 00:00:00,0.127801,316526000.0,40991031.96,5188,532283100.0,0.18325,1589128,18810290000.0,0.128604,...,MACD (12h vs 26h EMA) is above signal line → b...,0.3307,low activity,0.126672,0.002865,0.132401,0.120942,within band (normal range),-0.020795,significant decrease
1,2024-05-02 01:00:00,0.125845,305965100.0,38783962.93,13093,233487400.0,0.090736,3489638,18425500000.0,0.128369,...,MACD (12h vs 26h EMA) is above signal line → b...,0.8201,normal activity,0.12638,0.002614,0.131607,0.121152,within band (normal range),-0.015309,significant decrease
2,2024-05-02 02:00:00,0.126181,199895100.0,25211351.38,8743,267416400.0,0.152309,2711506,18196040000.0,0.128124,...,MACD (12h vs 26h EMA) is above signal line → b...,0.5429,low activity,0.126241,0.002543,0.131327,0.121156,within band (normal range),0.002673,slight increase
3,2024-05-02 03:00:00,0.126435,107717800.0,13656346.98,5830,305461900.0,0.172382,1881013,18168950000.0,0.127712,...,MACD (12h vs 26h EMA) is above signal line → b...,0.3601,low activity,0.126329,0.002516,0.131362,0.121296,within band (normal range),0.002012,slight increase
4,2024-05-02 04:00:00,0.12516,198154200.0,25017373.02,31981,402419800.0,0.04979,8552289,18223620000.0,0.12699,...,MACD (12h vs 26h EMA) is above signal line → b...,1.8342,activity surge,0.12637,0.002488,0.131347,0.121394,within band (normal range),-0.010083,significant decrease


In [15]:
doge.columns

Index(['timestamp', 'close_price', 'volume_base', 'volume_quote',
       'transaction_count', 'total_value_transferred_doge', 'avg_fee_doge',
       'total_size_used_bytes', 'market_cap', 'ma_6', 'ma_24',
       'ma_crossover_signal', 'ma_crossover_description', 'ema_12', 'ema_26',
       'macd', 'macd_signal', 'macd_hist', 'macd_crossover_signal',
       'macd_crossover_description', 'txn_ratio', 'txn_ratio_description',
       'bb_mid', 'bb_std', 'bb_upper', 'bb_lower', 'bb_position',
       'price_change_pct', 'price_movement'],
      dtype='object')

In [16]:
doge["text"] = doge.apply(
    lambda row: (
        f"Price: {row['close_price']:.8f} USD\n"
        f"Price change compared to last hour: {row['price_change_pct'] * 100:+.4f}% ({row['price_movement']})\n"
        f"Volume: {row['volume_base']:,.4f} DOGE\n"
        f"Market cap: {row['market_cap']:,.2f} USD\n"
        f"Trend signal: {row['ma_crossover_description']}\n"
        f"Momentum signal: {row['macd_crossover_description']}\n"
        f"Price regime (Bollinger Bands): {row['bb_position']}\n"
        f"On-chain activity:\n"
        f"- {row['transaction_count']} transactions → {row['txn_ratio_description']} (txn ratio = {row['txn_ratio']}, compared to 24h average)\n"
        f"- {row['total_value_transferred_doge']:.2f} DOGE transferred\n"
        f"- Average transaction fee: {row['avg_fee_doge']} DOGE\n"
        f"- {row['total_size_used_bytes'] / 1024:.4f} KB block size used"
    ),
    axis=1
)

In [17]:
doge.to_csv("DOGE/doge_with_text.csv", index=False)

## USDT

In [None]:
usdt = pd.read_csv("tokens/USDT/usdt_full.csv", parse_dates=["timestamp"]) 
usdt = usdt.iloc[24:].reset_index(drop=True)
usdt.head()

Unnamed: 0,timestamp,close_price,volume_base,volume_quote,market_cap,ma_6,ma_24,ma_crossover_signal,ma_crossover_description,ema_12,...,macd,macd_signal,macd_hist,macd_crossover_signal,macd_crossover_description,bb_mid,bb_std,bb_upper,bb_lower,bb_position
0,2024-05-02 00:00:00,0.999544,9569110.0,9560786.23,110656600000.0,0.999356,0.999282,1.0,short MA (6h) is above long MA (24h) → buy signal,0.999354,...,4.9e-05,4.2e-05,8e-06,1,MACD (12h vs 26h EMA) is above signal line → b...,0.999314,0.000159,0.999632,0.998997,within band (normal range)
1,2024-05-02 01:00:00,0.999702,6666103.0,6661710.9,110446600000.0,0.999416,0.999306,1.0,short MA (6h) is above long MA (24h) → buy signal,0.999408,...,7.4e-05,4.8e-05,2.6e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,0.999344,0.000173,0.99969,0.998998,above upper band (possible breakout)
2,2024-05-02 02:00:00,0.99976,8511208.0,8506023.03,110607600000.0,0.999479,0.999332,1.0,short MA (6h) is above long MA (24h) → buy signal,0.999462,...,9.6e-05,5.8e-05,3.9e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,0.999371,0.000194,0.999758,0.998984,above upper band (possible breakout)
3,2024-05-02 03:00:00,0.999628,12571090.0,12560514.98,110577300000.0,0.999541,0.999354,1.0,short MA (6h) is above long MA (24h) → buy signal,0.999487,...,0.000102,6.7e-05,3.6e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,0.999387,0.000201,0.99979,0.998985,within band (normal range)
4,2024-05-02 04:00:00,0.999663,4436367.0,4433286.13,110443300000.0,0.999602,0.999377,1.0,short MA (6h) is above long MA (24h) → buy signal,0.999514,...,0.000109,7.5e-05,3.4e-05,1,MACD (12h vs 26h EMA) is above signal line → b...,0.999427,0.000169,0.999765,0.99909,within band (normal range)


In [19]:
usdt.columns

Index(['timestamp', 'close_price', 'volume_base', 'volume_quote', 'market_cap',
       'ma_6', 'ma_24', 'ma_crossover_signal', 'ma_crossover_description',
       'ema_12', 'ema_26', 'macd', 'macd_signal', 'macd_hist',
       'macd_crossover_signal', 'macd_crossover_description', 'bb_mid',
       'bb_std', 'bb_upper', 'bb_lower', 'bb_position'],
      dtype='object')

In [20]:
usdt["text"] = usdt.apply(
    lambda row: (
        f"Price: {row['close_price']:.8f} USD (stablecoin)\n"
        f"Volume: {row['volume_base']:.4f} USDT\n"
        f"Market cap: {row['market_cap']:,.2f} USD\n"
        f"Trend signal: {row['ma_crossover_description']}\n"
        f"Momentum signal: {row['macd_crossover_description']}\n"
        f"Price regime (Bollinger Bands): {row['bb_position']}"
    ),
    axis=1
)

In [21]:
usdt.to_csv("USDT/usdt_with_text.csv", index=False)

## Isolating All Price Data Across 5 Tokens For Each Hour (For Later Use)

In [347]:
btc_price = btc[["timestamp", "close_price"]].copy()
btc_price.rename(columns={"close_price": "btc_price"}, inplace=True)
btc_price

Unnamed: 0,timestamp,btc_price
0,2024-05-02 00:00:00,57759.9547
1,2024-05-02 01:00:00,57109.6836
2,2024-05-02 02:00:00,57462.1227
3,2024-05-02 03:00:00,57441.6225
4,2024-05-02 04:00:00,57386.5479
...,...,...
8011,2025-03-31 19:00:00,82447.4689
8012,2025-03-31 20:00:00,82439.1058
8013,2025-03-31 21:00:00,82567.0649
8014,2025-03-31 22:00:00,82412.5917


In [349]:
eth_price = eth[["timestamp", "close_price"]].copy()
eth_price.rename(columns={"close_price": "eth_price"}, inplace=True)
eth_price

Unnamed: 0,timestamp,eth_price
0,2024-05-02 00:00:00,2947.5852
1,2024-05-02 01:00:00,2905.4601
2,2024-05-02 02:00:00,2917.0471
3,2024-05-02 03:00:00,2928.5850
4,2024-05-02 04:00:00,2921.4791
...,...,...
8011,2025-03-31 19:00:00,1827.5233
8012,2025-03-31 20:00:00,1819.8772
8013,2025-03-31 21:00:00,1824.6029
8014,2025-03-31 22:00:00,1824.7112


In [350]:
sol_price = sol[["timestamp", "close_price"]].copy()
sol_price.rename(columns={"close_price": "sol_price"}, inplace=True)
sol_price

Unnamed: 0,timestamp,sol_price
0,2024-05-02 00:00:00,131.4696
1,2024-05-02 01:00:00,129.6897
2,2024-05-02 02:00:00,129.6112
3,2024-05-02 03:00:00,130.0905
4,2024-05-02 04:00:00,129.5669
...,...,...
8011,2025-03-31 19:00:00,125.2499
8012,2025-03-31 20:00:00,125.4111
8013,2025-03-31 21:00:00,125.7530
8014,2025-03-31 22:00:00,125.5478


In [353]:
doge_price = doge[["timestamp", "close_price"]].copy()
doge_price.rename(columns={"close_price": "doge_price"}, inplace=True)
doge_price

Unnamed: 0,timestamp,doge_price
0,2024-05-02 00:00:00,0.127801
1,2024-05-02 01:00:00,0.125845
2,2024-05-02 02:00:00,0.126181
3,2024-05-02 03:00:00,0.126435
4,2024-05-02 04:00:00,0.125160
...,...,...
8011,2025-03-31 19:00:00,0.165746
8012,2025-03-31 20:00:00,0.165668
8013,2025-03-31 21:00:00,0.165791
8014,2025-03-31 22:00:00,0.165656


In [354]:
usdt_price = usdt[["timestamp", "close_price"]].copy()
usdt_price.rename(columns={'close_price': 'usdt_price'}, inplace=True)  
usdt_price

Unnamed: 0,timestamp,usdt_price
0,2024-05-02 00:00:00,0.999544
1,2024-05-02 01:00:00,0.999702
2,2024-05-02 02:00:00,0.999760
3,2024-05-02 03:00:00,0.999628
4,2024-05-02 04:00:00,0.999663
...,...,...
8011,2025-03-31 19:00:00,1.000039
8012,2025-03-31 20:00:00,1.000005
8013,2025-03-31 21:00:00,0.999940
8014,2025-03-31 22:00:00,0.999929


In [355]:
all_prices = btc_price.merge(eth_price, on="timestamp") \
    .merge(sol_price, on="timestamp") \
    .merge(doge_price, on="timestamp") \
    .merge(usdt_price, on="timestamp")

all_prices

Unnamed: 0,timestamp,btc_price,eth_price,sol_price,doge_price,usdt_price
0,2024-05-02 00:00:00,57759.9547,2947.5852,131.4696,0.127801,0.999544
1,2024-05-02 01:00:00,57109.6836,2905.4601,129.6897,0.125845,0.999702
2,2024-05-02 02:00:00,57462.1227,2917.0471,129.6112,0.126181,0.999760
3,2024-05-02 03:00:00,57441.6225,2928.5850,130.0905,0.126435,0.999628
4,2024-05-02 04:00:00,57386.5479,2921.4791,129.5669,0.125160,0.999663
...,...,...,...,...,...,...
8011,2025-03-31 19:00:00,82447.4689,1827.5233,125.2499,0.165746,1.000039
8012,2025-03-31 20:00:00,82439.1058,1819.8772,125.4111,0.165668,1.000005
8013,2025-03-31 21:00:00,82567.0649,1824.6029,125.7530,0.165791,0.999940
8014,2025-03-31 22:00:00,82412.5917,1824.7112,125.5478,0.165656,0.999929


In [357]:
all_prices.to_csv("data/all_prices_across_assets.csv", index=False)

In [358]:
# Partition all price data into 3 regimes
all_prices_bullish = all_prices[(all_prices['timestamp'] >= '2024-11-05 00:00:00') & (all_prices['timestamp'] <= '2024-12-04 23:00:00')].copy().reset_index(drop=True)
all_prices_bullish

Unnamed: 0,timestamp,btc_price,eth_price,sol_price,doge_price,usdt_price
0,2024-11-05 00:00:00,68006.8008,2400.2317,158.5760,0.164755,0.999386
1,2024-11-05 01:00:00,67969.5688,2409.7639,158.2116,0.162622,0.999366
2,2024-11-05 02:00:00,67857.8633,2403.9608,158.2175,0.161914,0.999453
3,2024-11-05 03:00:00,68216.7021,2419.2786,159.6270,0.162487,0.999443
4,2024-11-05 04:00:00,68308.7174,2427.8065,159.9727,0.162226,0.999453
...,...,...,...,...,...,...
715,2024-12-04 19:00:00,97293.2167,3846.4843,232.9248,0.416510,1.001477
716,2024-12-04 20:00:00,98911.2251,3885.6738,232.9981,0.419737,1.002231
717,2024-12-04 21:00:00,97839.8131,3844.4650,229.9886,0.422598,1.001689
718,2024-12-04 22:00:00,98312.7488,3847.6644,229.4383,0.419115,1.001459


In [395]:
all_prices_bearish = all_prices[(all_prices['timestamp'] >= '2025-01-22 00:00:00') & (all_prices['timestamp'] <= '2025-03-09 23:00:00')].copy().reset_index(drop=True)
all_prices_bearish

Unnamed: 0,timestamp,btc_price,eth_price,sol_price,doge_price,usdt_price
0,2025-01-22 00:00:00,105991.6920,3363.1587,253.0133,0.373459,0.999969
1,2025-01-22 01:00:00,105822.8884,3330.5308,252.4561,0.370568,0.999978
2,2025-01-22 02:00:00,105623.7806,3344.7382,252.6056,0.372697,0.999993
3,2025-01-22 03:00:00,105651.3822,3332.5598,254.4701,0.373087,1.000090
4,2025-01-22 04:00:00,105606.9847,3333.1036,255.9487,0.371775,0.999906
...,...,...,...,...,...,...
1123,2025-03-09 19:00:00,82671.2826,2041.4972,129.2483,0.171616,1.000643
1124,2025-03-09 20:00:00,83138.2554,2048.7784,128.4520,0.171554,1.000754
1125,2025-03-09 21:00:00,81939.4262,2026.2881,128.1068,0.169298,1.000761
1126,2025-03-09 22:00:00,80393.8015,2011.2138,127.0260,0.167321,1.000490


In [360]:
all_prices_sideways = all_prices[(all_prices['timestamp'] >= '2024-08-09 00:00:00') & (all_prices['timestamp'] <= '2024-09-07 23:00:00')].copy().reset_index(drop=True)
all_prices_sideways

Unnamed: 0,timestamp,btc_price,eth_price,sol_price,doge_price,usdt_price
0,2024-08-09 00:00:00,61257.4370,2667.4673,161.4598,0.107046,1.000368
1,2024-08-09 01:00:00,61443.0406,2672.4434,160.5484,0.106111,1.000330
2,2024-08-09 02:00:00,61289.2862,2671.5673,158.6511,0.105677,1.000233
3,2024-08-09 03:00:00,61404.4036,2696.6841,159.2750,0.106241,1.000234
4,2024-08-09 04:00:00,60764.4714,2661.8933,156.3544,0.104452,1.000219
...,...,...,...,...,...,...
715,2024-09-07 19:00:00,54458.7186,2292.5390,128.5441,0.095121,1.000779
716,2024-09-07 20:00:00,54230.0669,2273.6253,127.5057,0.095193,1.000890
717,2024-09-07 21:00:00,53971.0896,2266.3987,126.9849,0.095151,1.000932
718,2024-09-07 22:00:00,54003.1837,2266.6692,126.9164,0.094951,1.000933


In [361]:
# Save
all_prices_bullish.to_csv("data/regimes/bullish/all_prices_bullish.csv", index=False)
all_prices_bearish.to_csv("data/regimes/bearish/all_prices_bearish.csv", index=False)
all_prices_sideways.to_csv("data/regimes/sideways/all_prices_sideways.csv", index=False)

# Part 2: Macro Factors

In [39]:
import pandas_datareader.data as web
import datetime

### 1. DGS10: 10-Year Treasury Constant Maturity Rate
- Represents the interest rate at which the U.S. government can borrow money for 10 years
- When the 10-year yield rises, it's often interpreted as:
    - Expectation of higher inflation
    - Stronger economic growth
    - Or a signal that the Fed might short-term rates
- A macro indicator of long-term sentiment in financial markets

In [None]:
start = datetime.datetime(2024, 5, 2)
end = datetime.datetime(2025, 4, 1)

tnx = web.DataReader("DGS10", 'fred', start, end)
tnx.index = pd.to_datetime(tnx.index)
hourly_index = pd.date_range(start=start, end=end, freq='H')
tnx_hourly = tnx.reindex(hourly_index, method='ffill')
tnx_hourly.reset_index(inplace=True)
tnx_hourly.rename(columns={'index': 'timestamp'}, inplace=True)
tnx_hourly = tnx_hourly[:-1].copy()

tnx_hourly['DGS10'] = tnx_hourly['DGS10'].interpolate(method='linear').round(2)
tnx_hourly

Unnamed: 0,timestamp,DGS10
0,2024-05-02 00:00:00,4.58
1,2024-05-02 01:00:00,4.58
2,2024-05-02 02:00:00,4.58
3,2024-05-02 03:00:00,4.58
4,2024-05-02 04:00:00,4.58
...,...,...
8011,2025-03-31 19:00:00,4.23
8012,2025-03-31 20:00:00,4.23
8013,2025-03-31 21:00:00,4.23
8014,2025-03-31 22:00:00,4.23


### 2. Oil

- The daily spot price of West Texas Intermediate (WTI) crude oil, priced in U.S. dollars per barrel.
- WTI is one of the world's most important benchmark crude oils (alongside Brent).
- What does it mean when DCOILWTICO rises?
    - Stronger global demand, supply shocks, or geopolitical risks.
    - Signals rising inflation pressure.
    - May lead to interest rate hikes.
    - Investors might shift toward energy stocks and reduce exposure to tech or consumer sectors.

In [None]:
oil = web.DataReader("DCOILWTICO", 'fred', start, end)
oil.index = pd.to_datetime(oil.index)
oil_hourly = oil.reindex(hourly_index, method='ffill').reset_index().rename(columns={'index': 'timestamp', 'DCOILWTICO': 'crude_oil'})
oil_hourly = oil_hourly[:-1].copy()

oil_hourly["crude_oil"] = oil_hourly["crude_oil"].interpolate(method='linear').round(2)
oil_hourly

Unnamed: 0,timestamp,crude_oil
0,2024-05-02 00:00:00,80.59
1,2024-05-02 01:00:00,80.59
2,2024-05-02 02:00:00,80.59
3,2024-05-02 03:00:00,80.59
4,2024-05-02 04:00:00,80.59
...,...,...
8011,2025-03-31 19:00:00,71.87
8012,2025-03-31 20:00:00,71.87
8013,2025-03-31 21:00:00,71.87
8014,2025-03-31 22:00:00,71.87


### 3. DXY: U.S. Dollar Index
- It measures the value of the U.S. dollar relative to a basket of major foreign currencies
- A rising DXY means the U.S. dollar is getting stronger vs other major currencies.
- A stronger dollar makes U.S. exports more expensive and imports cheaper.
- A strong dollar can reduce inflation by lowering import prices.
- Often seen as a safe-haven asset—rising during global uncertainty.

In [None]:
# Downloaded online
dxy = pd.read_csv("data/dxy.csv")
dxy

Unnamed: 0,Date,Open,High,Low,Close
0,03/31/2025,104.01,104.39,103.75,104.21
1,03/28/2025,104.28,104.50,103.90,104.04
2,03/27/2025,104.65,104.65,104.07,104.34
3,03/26/2025,104.25,104.68,104.18,104.55
4,03/25/2025,104.33,104.47,103.94,104.18
...,...,...,...,...,...
233,05/08/2024,105.42,105.64,105.39,105.55
234,05/07/2024,105.14,105.45,105.03,105.41
235,05/06/2024,105.05,105.20,104.87,105.05
236,05/03/2024,105.31,105.37,104.52,105.03


In [None]:
dxy['Date'] = pd.to_datetime(dxy['Date'])
dxy = dxy.sort_values('Date')
dxy = dxy[['Date', 'Close']].rename(columns={'Date': 'date', 'Close': 'dxy'})
dxy = dxy.set_index('date').asfreq('D')
dxy['dxy'] = dxy['dxy'].interpolate(method='linear')

hourly_index = pd.date_range(start=dxy.index.min(), end=dxy.index.max() + pd.Timedelta(days=1), freq='H')
hourly_index = hourly_index[:-1]
dxy_hourly = dxy.reindex(hourly_index, method='ffill').reset_index()
dxy_hourly = dxy_hourly.rename(columns={'index': 'timestamp'})

# 保留两位小数
dxy_hourly['dxy'] = dxy_hourly['dxy'].round(2)
dxy_hourly


Unnamed: 0,timestamp,dxy
0,2024-05-02 00:00:00,105.30
1,2024-05-02 01:00:00,105.30
2,2024-05-02 02:00:00,105.30
3,2024-05-02 03:00:00,105.30
4,2024-05-02 04:00:00,105.30
...,...,...
8011,2025-03-31 19:00:00,104.21
8012,2025-03-31 20:00:00,104.21
8013,2025-03-31 21:00:00,104.21
8014,2025-03-31 22:00:00,104.21


### 4. VIX
- CBOE Volatility Index, "the fear index"
- It measures the market's expectation of 30-day volatility in the S&P 500 index, based on options pricing
- What does it mean when VIX rises?
    - Investors expect more market turbulence in the next month.
    - Often occurs during times of financial stress, geopolitical tension, or economic uncertainty.
    - Can signal a sell-off or correction in equities, especially the S&P 500.
    - Traders may move to safe assets like bonds, gold, or the dollar.

In [None]:
vix = web.DataReader("VIXCLS", 'fred', start, end)
vix.index = pd.to_datetime(vix.index)
vix = vix.asfreq('D')
vix['VIXCLS'] = vix['VIXCLS'].interpolate(method='linear')

vix_hourly = vix.reindex(hourly_index, method='ffill').reset_index().rename(columns={'index': 'timestamp', 'VIXCLS': 'vix'})
vix_hourly['vix'] = vix_hourly['vix'].round(2) 
vix_hourly

Unnamed: 0,timestamp,vix
0,2024-05-02 00:00:00,14.68
1,2024-05-02 01:00:00,14.68
2,2024-05-02 02:00:00,14.68
3,2024-05-02 03:00:00,14.68
4,2024-05-02 04:00:00,14.68
...,...,...
8011,2025-03-31 19:00:00,22.28
8012,2025-03-31 20:00:00,22.28
8013,2025-03-31 21:00:00,22.28
8014,2025-03-31 22:00:00,22.28


### 5. Gold Price
- USD per troy ounce
- Gold is considered a safe-haven asset, especially in times of crisis or inflation
- Rising gold prices may indicate risk aversion, while falling prices suggest confidence in equities or fiat currencies
- Often moves inversely to U.S. real interest rates and the dollar index (DXY)

In [None]:
# Downloaded from kaggle
gold = pd.read_csv("data/gold_hourly.csv", sep=';')
gold = gold[['Date', 'Close']]
gold.rename(columns={'Date': 'timestamp', 'Close': 'gold_price'}, inplace=True)
gold['timestamp'] = pd.to_datetime(gold['timestamp'])
gold = gold[gold['timestamp'] >= '2024-05-02'].copy()
gold.reset_index(drop=True, inplace=True)
gold = gold[:-1].copy()
gold

Unnamed: 0,timestamp,gold_price
0,2024-05-02 01:00:00,2320.83
1,2024-05-02 02:00:00,2323.78
2,2024-05-02 03:00:00,2324.43
3,2024-05-02 04:00:00,2324.13
4,2024-05-02 05:00:00,2320.20
...,...,...
4266,2025-01-31 19:00:00,2808.76
4267,2025-01-31 20:00:00,2798.91
4268,2025-01-31 21:00:00,2796.30
4269,2025-01-31 22:00:00,2800.88


In [None]:
gold.set_index('timestamp', inplace=True)
start_time = pd.Timestamp('2024-05-02 00:00:00')
end_time = pd.Timestamp('2025-01-31 23:00:00')
full_range = pd.date_range(start=start_time, end=end_time, freq='H')
gold_part1 = gold.reindex(full_range)
gold_part1.reset_index(inplace=True)
gold_part1.rename(columns={'index': 'timestamp'}, inplace=True)
gold_part1['gold_price'] = gold_part1['gold_price'].bfill()
# 这是从网上下载的数据，但是只到25年1月底，后面的数据自己手动按照yahoo finance上补全的
# https://finance.yahoo.com/quote/GC%3DF/history/?period1=1714521600&period2=1743379200 

In [165]:
gold_part1.to_csv("data/gold_hourly_part1.csv", index=False)

In [None]:
gold_temp = pd.read_csv("data/gold_hourly_temp.csv", parse_dates=["timestamp"])
gold_temp["timestamp"] = pd.to_datetime(gold_temp["timestamp"], dayfirst=True)
gold_temp["gold_price"] = gold_temp["gold_price"].interpolate(method="linear").round(2)
gold_temp

Unnamed: 0,timestamp,gold_price
0,2024-05-02 00:00:00,2320.83
1,2024-05-02 01:00:00,2320.83
2,2024-05-02 02:00:00,2323.78
3,2024-05-02 03:00:00,2324.43
4,2024-05-02 04:00:00,2324.13
...,...,...
8011,2025-03-31 19:00:00,3086.50
8012,2025-03-31 20:00:00,3086.50
8013,2025-03-31 21:00:00,3086.50
8014,2025-03-31 22:00:00,3086.50


In [None]:
gold_hourly = gold_temp.copy()

### 6. SP500

- S&P 500 Index
- It tracks the performance of 500 large-cap U.S. companies, including names like Apple, Microsoft, Amazon, etc.
- Unit: Index points (not USD)
- What does it mean when the S&P 500 rises?
    - Investors are more optimistic about U.S. corporate earnings and the economic outlook.
    - Usually reflects lower perceived risk, accommodative monetary policy, or strong growth.
    - May lead to increased investments in equities and risk assets.

In [None]:
start = datetime.datetime(2024, 5, 2)
end = datetime.datetime(2025, 3, 31)
sp500 = web.DataReader("SP500", "fred", start, end)
sp500.index = pd.to_datetime(sp500.index)
sp500 = sp500.asfreq('D')
sp500["SP500"] = sp500["SP500"].interpolate(method='linear')    
sp500_hourly = sp500.reindex(hourly_index, method='ffill').reset_index().rename(columns={'index': 'timestamp', 'SP500': 'sp500'})
sp500_hourly['sp500'] = sp500_hourly['sp500'].round(2)
sp500_hourly

Unnamed: 0,timestamp,sp500
0,2024-05-02 00:00:00,5064.20
1,2024-05-02 01:00:00,5064.20
2,2024-05-02 02:00:00,5064.20
3,2024-05-02 03:00:00,5064.20
4,2024-05-02 04:00:00,5064.20
...,...,...
8011,2025-03-31 19:00:00,5611.85
8012,2025-03-31 20:00:00,5611.85
8013,2025-03-31 21:00:00,5611.85
8014,2025-03-31 22:00:00,5611.85


### Merge Macro Data Together!

In [207]:
macro1 = tnx_hourly.merge(oil_hourly, on='timestamp')
macro1

Unnamed: 0,timestamp,DGS10,crude_oil
0,2024-05-02 00:00:00,4.58,80.59
1,2024-05-02 01:00:00,4.58,80.59
2,2024-05-02 02:00:00,4.58,80.59
3,2024-05-02 03:00:00,4.58,80.59
4,2024-05-02 04:00:00,4.58,80.59
...,...,...,...
8011,2025-03-31 19:00:00,4.23,71.87
8012,2025-03-31 20:00:00,4.23,71.87
8013,2025-03-31 21:00:00,4.23,71.87
8014,2025-03-31 22:00:00,4.23,71.87


In [208]:
macro2 = macro1.merge(dxy_hourly, on='timestamp')
macro2

Unnamed: 0,timestamp,DGS10,crude_oil,dxy
0,2024-05-02 00:00:00,4.58,80.59,105.30
1,2024-05-02 01:00:00,4.58,80.59,105.30
2,2024-05-02 02:00:00,4.58,80.59,105.30
3,2024-05-02 03:00:00,4.58,80.59,105.30
4,2024-05-02 04:00:00,4.58,80.59,105.30
...,...,...,...,...
8011,2025-03-31 19:00:00,4.23,71.87,104.21
8012,2025-03-31 20:00:00,4.23,71.87,104.21
8013,2025-03-31 21:00:00,4.23,71.87,104.21
8014,2025-03-31 22:00:00,4.23,71.87,104.21


In [209]:
macro3 = macro2.merge(vix_hourly, on='timestamp')
macro3

Unnamed: 0,timestamp,DGS10,crude_oil,dxy,vix
0,2024-05-02 00:00:00,4.58,80.59,105.30,14.68
1,2024-05-02 01:00:00,4.58,80.59,105.30,14.68
2,2024-05-02 02:00:00,4.58,80.59,105.30,14.68
3,2024-05-02 03:00:00,4.58,80.59,105.30,14.68
4,2024-05-02 04:00:00,4.58,80.59,105.30,14.68
...,...,...,...,...,...
8011,2025-03-31 19:00:00,4.23,71.87,104.21,22.28
8012,2025-03-31 20:00:00,4.23,71.87,104.21,22.28
8013,2025-03-31 21:00:00,4.23,71.87,104.21,22.28
8014,2025-03-31 22:00:00,4.23,71.87,104.21,22.28


In [210]:
macro4 = macro3.merge(gold_hourly, on='timestamp')
macro4

Unnamed: 0,timestamp,DGS10,crude_oil,dxy,vix,gold_price
0,2024-05-02 00:00:00,4.58,80.59,105.30,14.68,2320.83
1,2024-05-02 01:00:00,4.58,80.59,105.30,14.68,2320.83
2,2024-05-02 02:00:00,4.58,80.59,105.30,14.68,2323.78
3,2024-05-02 03:00:00,4.58,80.59,105.30,14.68,2324.43
4,2024-05-02 04:00:00,4.58,80.59,105.30,14.68,2324.13
...,...,...,...,...,...,...
8011,2025-03-31 19:00:00,4.23,71.87,104.21,22.28,3086.50
8012,2025-03-31 20:00:00,4.23,71.87,104.21,22.28,3086.50
8013,2025-03-31 21:00:00,4.23,71.87,104.21,22.28,3086.50
8014,2025-03-31 22:00:00,4.23,71.87,104.21,22.28,3086.50


In [211]:
macro5 = macro4.merge(sp500_hourly, on='timestamp')
macro5

Unnamed: 0,timestamp,DGS10,crude_oil,dxy,vix,gold_price,sp500
0,2024-05-02 00:00:00,4.58,80.59,105.30,14.68,2320.83,5064.20
1,2024-05-02 01:00:00,4.58,80.59,105.30,14.68,2320.83,5064.20
2,2024-05-02 02:00:00,4.58,80.59,105.30,14.68,2323.78,5064.20
3,2024-05-02 03:00:00,4.58,80.59,105.30,14.68,2324.43,5064.20
4,2024-05-02 04:00:00,4.58,80.59,105.30,14.68,2324.13,5064.20
...,...,...,...,...,...,...,...
8011,2025-03-31 19:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85
8012,2025-03-31 20:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85
8013,2025-03-31 21:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85
8014,2025-03-31 22:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85


In [255]:
macro = macro5.copy()
macro.isna().sum()

timestamp     0
DGS10         0
crude_oil     0
dxy           0
vix           0
gold_price    0
sp500         0
dtype: int64

In [256]:
# Add a column to convert all the values into a single text column
def format_macro_text(row):
    return (
        f"- 10-Year Treasury Yield (DGS10): U.S. nominal government bond yield - {row['DGS10']}%\n"
        f"- Crude Oil Price (WTI): Spot price in USD per barrel - ${row['crude_oil']}\n"
        f"- U.S. Dollar Index (DXY): {row['dxy']}\n"
        f"- Gold Price (Spot): USD per troy ounce - ${row['gold_price']}\n"
        f"- S&P 500 Index: {row['sp500']}\n"
        f"- CBOE Volatility Index (VIX): 30-day implied volatility of S&P 500 index options - {row['vix']}"
    )

macro["text"] = macro.apply(format_macro_text, axis=1)
macro.head()

Unnamed: 0,timestamp,DGS10,crude_oil,dxy,vix,gold_price,sp500,text
0,2024-05-02 00:00:00,4.58,80.59,105.3,14.68,2320.83,5064.2,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 01:00:00,4.58,80.59,105.3,14.68,2320.83,5064.2,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 02:00:00,4.58,80.59,105.3,14.68,2323.78,5064.2,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 03:00:00,4.58,80.59,105.3,14.68,2324.43,5064.2,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 04:00:00,4.58,80.59,105.3,14.68,2324.13,5064.2,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [257]:
macro = macro[3:].reset_index(drop=True)
macro

Unnamed: 0,timestamp,DGS10,crude_oil,dxy,vix,gold_price,sp500,text
0,2024-05-02 03:00:00,4.58,80.59,105.30,14.68,2324.43,5064.20,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 04:00:00,4.58,80.59,105.30,14.68,2324.13,5064.20,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 05:00:00,4.58,80.59,105.30,14.68,2320.20,5064.20,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 06:00:00,4.58,80.59,105.30,14.68,2319.50,5064.20,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 07:00:00,4.58,80.59,105.30,14.68,2318.20,5064.20,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...,...,...,...,...,...
8008,2025-03-31 19:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8009,2025-03-31 20:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8010,2025-03-31 21:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8011,2025-03-31 22:00:00,4.23,71.87,104.21,22.28,3086.50,5611.85,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [258]:
macro.to_csv("data/macro.csv", index=False)

In [259]:
macro_text = macro[['timestamp','text']].copy()
macro_text.rename(columns={'text':'macro_text'}, inplace=True)
macro_text.head()  # 8013 rows

Unnamed: 0,timestamp,macro_text
0,2024-05-02 03:00:00,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 04:00:00,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 05:00:00,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 06:00:00,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 07:00:00,- 10-Year Treasury Yield (DGS10): U.S. nominal...


# Part 3: Combine Texts to Form Prompts (Use 4h)

In [221]:
combined = pd.DataFrame({
    "timestamp": btc["timestamp"],
    "btc_text": btc["text"],
    "eth_text": eth["text"],
    "sol_text": sol["text"],
    "doge_text": doge["text"],
    "usdt_text": usdt["text"],
})

combined["all_texts"] = combined.apply(
    lambda row: (
        f"[BTC]\n{row['btc_text']}\n\n"
        f"[ETH]\n{row['eth_text']}\n\n"
        f"[SOL]\n{row['sol_text']}\n\n"
        f"[DOGE]\n{row['doge_text']}\n\n"
        f"[USDT]\n{row['usdt_text']}"   
    ),
    
    axis=1
)

### K = 3

In [23]:
# Sliding window
K = 3
combined["all_texts_3h"] = None
for i in range(K - 1, len(combined)):
    window_texts = combined.loc[i - K + 1 : i, "all_texts"].tolist()
    combined.at[i, "all_texts_3h"] = "\n--------------------------------------------------\n".join(
        [f"=== Hour -{K - j} ===\n{text}" for j, text in enumerate(window_texts)]
    )

In [24]:
print(combined["all_texts_3h"].iloc[2])

=== Hour -3 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 12908 transactions → low activity (txn ratio = 0.6669, compared to 24h average)
- 31194 active wallets
- 32819.8232 BTC transferred
- Average transaction fee: 0.00007967
- 6.4825 MB block size used

[ETH]
Price: 2947.5852 USD
Price change compared to last hour: -0.7934% (slight decrease)
Volume: 68867.9318 ETH
Market cap: 364,276,973,974.75 USD
Trend signal: short MA (6h) is above long MA (24h) → buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 46619 transactions → normal activity 

In [25]:
# Add Instruction to Form Prompts
combined["prompt_3h_v1"] = combined["all_texts_3h"].apply(
    lambda x: (
        "You are a multi-asset crypto market analyst supporting a portfolio management system.\n\n"
        "Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.\n\n"
        "Below is a chronological snapshot of hourly market data for the last 3 hours, from oldest to newest.\n"
        "The data includes pricing, technical indicators, and on-chain activity for each asset.\n"
        "Start with a brief section **Overall Market Summary**.\n"
        "Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).\n"
        "No other sections needed.\n"
        "Be data-driven, factual, concise, and professional. Use clear headings.\n\n"
        + x
    ) if pd.notna(x) else None
)

In [26]:
print(combined["prompt_3h_v1"].iloc[2])

You are a multi-asset crypto market analyst supporting a portfolio management system.

Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.

Below is a chronological snapshot of hourly market data for the last 3 hours, from oldest to newest.
The data includes pricing, technical indicators, and on-chain activity for each asset.
Start with a brief section **Overall Market Summary**.
Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).
No other sections needed.
Be data-driven, factual, concise, and professional. Use clear headings.

=== Hour -3 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is abo

### K = 4

In [222]:
# Sliding window
K = 4
combined["all_texts_4h"] = None
for i in range(K - 1, len(combined)):
    window_texts = combined.loc[i - K + 1 : i, "all_texts"].tolist()
    combined.at[i, "all_texts_4h"] = "\n--------------------------------------------------\n".join(
        [f"=== Hour -{K - j} ===\n{text}" for j, text in enumerate(window_texts)]
    )

print(combined["all_texts_4h"].iloc[3])    

=== Hour -4 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 12908 transactions → low activity (txn ratio = 0.6669, compared to 24h average)
- 31194 active wallets
- 32819.8232 BTC transferred
- Average transaction fee: 0.00007967
- 6.4825 MB block size used

[ETH]
Price: 2947.5852 USD
Price change compared to last hour: -0.7934% (slight decrease)
Volume: 68867.9318 ETH
Market cap: 364,276,973,974.75 USD
Trend signal: short MA (6h) is above long MA (24h) → buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 46619 transactions → normal activity 

In [223]:
combined

Unnamed: 0,timestamp,btc_text,eth_text,sol_text,doge_text,usdt_text,all_texts,all_texts_4h
0,2024-05-02 00:00:00,Price: 57759.9547 USD\nPrice change compared t...,Price: 2947.5852 USD\nPrice change compared to...,Price: 131.4696 USD\nPrice change compared to ...,Price: 0.12780137 USD\nPrice change compared t...,Price: 0.99954376 USD (stablecoin)\nVolume: 95...,[BTC]\nPrice: 57759.9547 USD\nPrice change com...,
1,2024-05-02 01:00:00,Price: 57109.6836 USD\nPrice change compared t...,Price: 2905.4601 USD\nPrice change compared to...,Price: 129.6897 USD\nPrice change compared to ...,Price: 0.12584491 USD\nPrice change compared t...,Price: 0.99970228 USD (stablecoin)\nVolume: 66...,[BTC]\nPrice: 57109.6836 USD\nPrice change com...,
2,2024-05-02 02:00:00,Price: 57462.1227 USD\nPrice change compared t...,Price: 2917.0471 USD\nPrice change compared to...,Price: 129.6112 USD\nPrice change compared to ...,Price: 0.12618126 USD\nPrice change compared t...,Price: 0.99976003 USD (stablecoin)\nVolume: 85...,[BTC]\nPrice: 57462.1227 USD\nPrice change com...,
3,2024-05-02 03:00:00,Price: 57441.6225 USD\nPrice change compared t...,Price: 2928.585 USD\nPrice change compared to ...,Price: 130.0905 USD\nPrice change compared to ...,Price: 0.12643519 USD\nPrice change compared t...,Price: 0.99962788 USD (stablecoin)\nVolume: 12...,[BTC]\nPrice: 57441.6225 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...
4,2024-05-02 04:00:00,Price: 57386.5479 USD\nPrice change compared t...,Price: 2921.4791 USD\nPrice change compared to...,Price: 129.5669 USD\nPrice change compared to ...,Price: 0.12516028 USD\nPrice change compared t...,Price: 0.99966309 USD (stablecoin)\nVolume: 44...,[BTC]\nPrice: 57386.5479 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...
...,...,...,...,...,...,...,...,...
8011,2025-03-31 19:00:00,Price: 82447.4689 USD\nPrice change compared t...,Price: 1827.5233 USD\nPrice change compared to...,Price: 125.2499 USD\nPrice change compared to ...,Price: 0.16574646 USD\nPrice change compared t...,Price: 1.00003921 USD (stablecoin)\nVolume: 24...,[BTC]\nPrice: 82447.4689 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 83309.1402 USD\...
8012,2025-03-31 20:00:00,Price: 82439.1058 USD\nPrice change compared t...,Price: 1819.8772 USD\nPrice change compared to...,Price: 125.4111 USD\nPrice change compared to ...,Price: 0.16566830 USD\nPrice change compared t...,Price: 1.00000542 USD (stablecoin)\nVolume: 15...,[BTC]\nPrice: 82439.1058 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 82697.7376 USD\...
8013,2025-03-31 21:00:00,Price: 82567.0649 USD\nPrice change compared t...,Price: 1824.6029 USD\nPrice change compared to...,Price: 125.753 USD\nPrice change compared to l...,Price: 0.16579113 USD\nPrice change compared t...,Price: 0.99993957 USD (stablecoin)\nVolume: 24...,[BTC]\nPrice: 82567.0649 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 83309.5474 USD\...
8014,2025-03-31 22:00:00,Price: 82412.5917 USD\nPrice change compared t...,Price: 1824.7112 USD\nPrice change compared to...,Price: 125.5478 USD\nPrice change compared to ...,Price: 0.16565591 USD\nPrice change compared t...,Price: 0.99992930 USD (stablecoin)\nVolume: 97...,[BTC]\nPrice: 82412.5917 USD\nPrice change com...,=== Hour -4 ===\n[BTC]\nPrice: 82447.4689 USD\...


In [224]:
all_texts_4h = combined[['timestamp', 'all_texts_4h']].copy()
all_texts_4h.head()

Unnamed: 0,timestamp,all_texts_4h
0,2024-05-02 00:00:00,
1,2024-05-02 01:00:00,
2,2024-05-02 02:00:00,
3,2024-05-02 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...
4,2024-05-02 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...


In [226]:
all_texts_4h = all_texts_4h[3:].reset_index(drop=True)
all_texts_4h.head()

Unnamed: 0,timestamp,all_texts_4h
0,2024-05-02 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...
1,2024-05-02 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...
2,2024-05-02 05:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57462.1227 USD\...
3,2024-05-02 06:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57441.6225 USD\...
4,2024-05-02 07:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57386.5479 USD\...


In [228]:
print(all_texts_4h["all_texts_4h"].iloc[0])

=== Hour -4 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 12908 transactions → low activity (txn ratio = 0.6669, compared to 24h average)
- 31194 active wallets
- 32819.8232 BTC transferred
- Average transaction fee: 0.00007967
- 6.4825 MB block size used

[ETH]
Price: 2947.5852 USD
Price change compared to last hour: -0.7934% (slight decrease)
Volume: 68867.9318 ETH
Market cap: 364,276,973,974.75 USD
Trend signal: short MA (6h) is above long MA (24h) → buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 46619 transactions → normal activity 

In [227]:
all_texts_4h.to_csv("all_texts_4h.csv", index=False)

In [None]:
# Add Instruction to Form Prompts (Market Analyst) (Skip)
combined["prompt_4h_v1"] = combined["all_texts_4h"].apply(
    lambda x: (
        "You are a multi-asset crypto market analyst supporting a portfolio management system.\n\n"
        "Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.\n\n"
        "Below is a chronological snapshot of hourly market data for the last 4 hours, from oldest to newest.\n"
        "The data includes pricing, technical indicators, and on-chain activity for each asset.\n"
        "Start with a brief section **Overall Market Summary**.\n"
        "Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).\n"
        "No other sections needed.\n"
        "Be data-driven, factual, concise, and professional. Use clear headings.\n\n"
        + x
    ) if pd.notna(x) else None
)

print(combined["prompt_4h_v1"].iloc[3])

You are a multi-asset crypto market analyst supporting a portfolio management system.

Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.

Below is a chronological snapshot of hourly market data for the last 4 hours, from oldest to newest.
The data includes pricing, technical indicators, and on-chain activity for each asset.
Start with a brief section **Overall Market Summary**.
Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).
No other sections needed.
Be data-driven, factual, concise, and professional. Use clear headings.

=== Hour -4 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is abo

In [32]:
print(combined["timestamp"].iloc[3])

2024-05-02 03:00:00


### K = 6

In [29]:
# Sliding window
K = 6
combined["all_texts_6h"] = None
for i in range(K - 1, len(combined)):
    window_texts = combined.loc[i - K + 1 : i, "all_texts"].tolist()
    combined.at[i, "all_texts_6h"] = "\n--------------------------------------------------\n".join(
        [f"=== Hour -{K - j} ===\n{text}" for j, text in enumerate(window_texts)]
    )

In [30]:
print(combined["all_texts_6h"].iloc[5]) 

=== Hour -6 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 12908 transactions → low activity (txn ratio = 0.6669, compared to 24h average)
- 31194 active wallets
- 32819.8232 BTC transferred
- Average transaction fee: 0.00007967
- 6.4825 MB block size used

[ETH]
Price: 2947.5852 USD
Price change compared to last hour: -0.7934% (slight decrease)
Volume: 68867.9318 ETH
Market cap: 364,276,973,974.75 USD
Trend signal: short MA (6h) is above long MA (24h) → buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 46619 transactions → normal activity 

In [31]:
# Add Instruction to Form Prompts
combined["prompt_6h_v1"] = combined["all_texts_6h"].apply(
    lambda x: (
        "You are a multi-asset crypto market analyst supporting a portfolio management system.\n\n"
        "Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.\n\n"
        "Below is a chronological snapshot of hourly market data for the last 6 hours, from oldest to newest.\n"
        "The data includes pricing, technical indicators, and on-chain activity for each asset.\n"
        "Start with a brief section **Overall Market Summary**.\n"
        "Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).\n"
        "No other sections needed.\n"
        "Be data-driven, factual, concise, and professional. Use clear headings.\n\n"
        + x
    ) if pd.notna(x) else None
)

print(combined["prompt_6h_v1"].iloc[5])

You are a multi-asset crypto market analyst supporting a portfolio management system.

Your task is to analyze the following market data to assist in making multi-asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT.

Below is a chronological snapshot of hourly market data for the last 6 hours, from oldest to newest.
The data includes pricing, technical indicators, and on-chain activity for each asset.
Start with a brief section **Overall Market Summary**.
Then, under the section **Asset-Specific Analysis**, write a short analysis of market behavior for each asset (1–2 sentences).
No other sections needed.
Be data-driven, factual, concise, and professional. Use clear headings.

=== Hour -6 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is abo

# Part 4: Merge Market Data (4h) and Macro Data For Each Hour

In [261]:
all_data = all_texts_4h.merge(macro_text, on='timestamp')
all_data

Unnamed: 0,timestamp,all_texts_4h,macro_text
0,2024-05-02 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 05:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57462.1227 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 06:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57441.6225 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 07:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57386.5479 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
8008,2025-03-31 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 83309.1402 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8009,2025-03-31 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82697.7376 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8010,2025-03-31 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 83309.5474 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8011,2025-03-31 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82447.4689 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [262]:
all_data.rename(columns={'all_texts_4h': 'market_data_4h'}, inplace=True)
all_data

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-05-02 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 05:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57462.1227 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 06:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57441.6225 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 07:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57386.5479 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
8008,2025-03-31 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 83309.1402 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8009,2025-03-31 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82697.7376 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8010,2025-03-31 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 83309.5474 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
8011,2025-03-31 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82447.4689 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [263]:
all_data.to_csv("all_data.csv", index=False)
# start: 2024-05-02 03:00:00
# end: 2025-03-31 23:00:00

In [264]:
print(all_data['macro_text'].iloc[0])

- 10-Year Treasury Yield (DGS10): U.S. nominal government bond yield - 4.58%
- Crude Oil Price (WTI): Spot price in USD per barrel - $80.59
- U.S. Dollar Index (DXY): 105.3
- Gold Price (Spot): USD per troy ounce - $2324.43
- S&P 500 Index: 5064.2
- CBOE Volatility Index (VIX): 30-day implied volatility of S&P 500 index options - 14.68


In [265]:
print(all_data['market_data_4h'].iloc[0])

=== Hour -4 ===
[BTC]
Price: 57759.9547 USD
Price change compared to last hour: -0.9957% (slight decrease)
Volume: 11297.3861 BTC
Market cap: 1,151,677,489,315.15 USD
Trend signal: short MA (6h) is below long MA (24h) → no buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 12908 transactions → low activity (txn ratio = 0.6669, compared to 24h average)
- 31194 active wallets
- 32819.8232 BTC transferred
- Average transaction fee: 0.00007967
- 6.4825 MB block size used

[ETH]
Price: 2947.5852 USD
Price change compared to last hour: -0.7934% (slight decrease)
Volume: 68867.9318 ETH
Market cap: 364,276,973,974.75 USD
Trend signal: short MA (6h) is above long MA (24h) → buy signal
Momentum signal: MACD (12h vs 26h EMA) is above signal line → bullish momentum
Price regime (Bollinger Bands): within band (normal range)
On-chain activity:
- 46619 transactions → normal activity 

# Part 5:  Splitting into Various Periods (Regimes)

In [266]:
all_data.head()

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-05-02 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57759.9547 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-05-02 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57109.6836 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-05-02 05:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57462.1227 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-05-02 06:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57441.6225 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-05-02 07:00:00,=== Hour -4 ===\n[BTC]\nPrice: 57386.5479 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [269]:
bullish = all_data[(all_data['timestamp'] >= '2024-11-05 00:00:00') & (all_data['timestamp'] <= '2024-12-04 23:00:00')].copy().reset_index(drop=True)
display(bullish)

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-11-05 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67109.7924 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-11-05 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67910.7287 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-11-05 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67839.6651 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-11-05 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 68006.8008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-11-05 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67969.5688 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
715,2024-12-04 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 94919.784 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
716,2024-12-04 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 95610.7602 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
717,2024-12-04 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 96259.2879 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
718,2024-12-04 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 97293.2167 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [390]:
bearish = all_data[(all_data['timestamp'] >= '2025-01-22 00:00:00') & (all_data['timestamp'] <= '2025-03-09 23:00:00')].copy().reset_index(drop=True)
display(bearish)

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2025-01-22 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106840.0945 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2025-01-22 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106049.2196 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2025-01-22 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106188.1359 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2025-01-22 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105991.692 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2025-01-22 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105822.8884 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
1123,2025-03-09 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82562.9675 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1124,2025-03-09 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82421.161 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1125,2025-03-09 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82726.7141 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1126,2025-03-09 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82671.2826 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [271]:
sideways = all_data[(all_data['timestamp'] >= '2024-08-09 00:00:00') & (all_data['timestamp'] <= '2024-09-07 23:00:00')].copy().reset_index(drop=True)
display(sideways)

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-08-09 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61159.7482 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-08-09 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 62358.4648 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-08-09 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61734.0756 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-08-09 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61257.437 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-08-09 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61443.0406 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
715,2024-09-07 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54525.6008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
716,2024-09-07 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54193.6441 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
717,2024-09-07 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54345.4161 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
718,2024-09-07 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54458.7186 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [None]:
bullish.to_csv("data/regimes/bullish/bullish_data.csv", index=False)
bearish.to_csv("data/regimes/bearish/bearish_data.csv", index=False)
sideways.to_csv("data/regimes/sideways/sideways_data.csv", index=False)

In [393]:
bearish.to_csv("data/regimes/bearish/bearish_data.csv", index=False)

# Part 6: Market Data + Macro + News -> Prompt (For Different Regimes)

## A. Bullish

In [None]:
# Get news ready
bullish_news = bullish[['timestamp']].copy()
bullish_news['news'] = None
bullish_news

Unnamed: 0,timestamp,news
0,2024-11-05 00:00:00,
1,2024-11-05 01:00:00,
2,2024-11-05 02:00:00,
3,2024-11-05 03:00:00,
4,2024-11-05 04:00:00,
...,...,...
715,2024-12-04 19:00:00,
716,2024-12-04 20:00:00,
717,2024-12-04 21:00:00,
718,2024-12-04 22:00:00,


In [None]:
from pathlib import Path
bullish_news_folder = Path("data/regimes/bullish/bullish_news_md")

for day in bullish_news['timestamp'].dt.date.unique():
    md_path = bullish_news_folder / f"{day}.md"
    if md_path.exists():
        with open(md_path, "r", encoding="utf-8") as file:
            content = file.read()
        bullish_news.loc[bullish_news['timestamp'].dt.date == day, 'news'] = content
    else:
        bullish_news.loc[bullish_news['timestamp'].dt.date == day, 'news'] = ""

In [292]:
bullish_news

Unnamed: 0,timestamp,news
0,2024-11-05 00:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
1,2024-11-05 01:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
2,2024-11-05 02:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
3,2024-11-05 03:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
4,2024-11-05 04:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
...,...,...
715,2024-12-04 19:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
716,2024-12-04 20:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
717,2024-12-04 21:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
718,2024-12-04 22:00:00,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...


In [None]:
bullish_news.to_csv("data/regimes/bullish/bullish_news.csv", index=False)

In [None]:
bullish_data = pd.read_csv("data/regimes/bullish/bullish_data.csv", parse_dates=["timestamp"])
bullish_data

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-11-05 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67109.7924 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-11-05 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67910.7287 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-11-05 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67839.6651 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-11-05 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 68006.8008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-11-05 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67969.5688 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
715,2024-12-04 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 94919.784 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
716,2024-12-04 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 95610.7602 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
717,2024-12-04 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 96259.2879 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
718,2024-12-04 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 97293.2167 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [332]:
bullish_complete = bullish_data.merge(bullish_news, on="timestamp")
bullish_complete

Unnamed: 0,timestamp,market_data_4h,macro_text,news
0,2024-11-05 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67109.7924 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
1,2024-11-05 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67910.7287 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
2,2024-11-05 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67839.6651 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
3,2024-11-05 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 68006.8008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
4,2024-11-05 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67969.5688 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
...,...,...,...,...
715,2024-12-04 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 94919.784 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
716,2024-12-04 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 95610.7602 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
717,2024-12-04 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 96259.2879 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...
718,2024-12-04 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 97293.2167 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Breaks $100K...


In [333]:
# Read into prompt header and tail
with open("data/prompts/prompt_header.md", "r") as file:
    prompt_header = file.read().strip()

with open("data/prompts/prompt_tail.md", "r") as file:
    prompt_tail = file.read().strip()

In [334]:
bullish_complete.head()

Unnamed: 0,timestamp,market_data_4h,macro_text,news
0,2024-11-05 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67109.7924 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
1,2024-11-05 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67910.7287 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
2,2024-11-05 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67839.6651 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
3,2024-11-05 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 68006.8008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...
4,2024-11-05 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 67969.5688 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n1. **Bitcoin Surges Amid ...


In [327]:
# Function to build the prompt
def generate_prompt(row):
    
    macro = row['macro_text']
    market_data_4h = row['market_data_4h']
    news = row['news']

    prompt_body = f"""## Input

### 1. Macroeconomic Context
{macro}

### 2. Market Data (Past 4 Hours)
{market_data_4h}

### 3. News Summaries
{news}

### 4. Current Portfolio Status
[PORTFOLIO HERE]
"""
    return f"{prompt_header}\n\n{prompt_body}\n{prompt_tail}"


In [335]:
bullish_complete['prompt'] = bullish_complete.apply(generate_prompt, axis=1)

In [336]:
initial_portfolio_text = "- BTC: 0\n- ETH: 0\n- SOL: 0\n- DOGE: 0\n- USDT: 1,000,000\n"
bullish_complete.loc[0, 'prompt'] = bullish_complete.loc[0, 'prompt'].replace("[PORTFOLIO HERE]", initial_portfolio_text)
print(bullish_complete['prompt'][0])

You are an experienced multi-asset crypto trading agent operating in an hourly trading system. Your task is to make short-term asset allocation decisions across five cryptocurrencies: BTC, ETH, SOL, DOGE, and USDT. Your goal is to maximize your overall profit.

For each hour, you are provided with:

1. **Macroeconomic Context:** This hour's macro indicators such as interest rates, oil prices, the U.S. dollar index, gold, the S&P 500, and volatility.
2. **Recent Market Data**: A chronological snapshot of the last 4 hours of market data, including pricing, technical indicators, and on-chain activity for each asset (from oldest to newest).
3. **Daily News Summaries**: Important news summaries for each asset, published on the same day, from sources such as CoinDesk, Cointelegraph, or Decrypt.
4. **Current Portfolio Status**: Your current holdings for each asset, including the quantity of BTC, ETH, SOL, DOGE, and USDT.

## Objective:

Analyze both market data and news, based on your current

In [339]:
bullish_complete.to_csv("data/regimes/bullish/bullish_complete.csv", index=False)

In [342]:
bullish_prompts = bullish_complete[['timestamp', 'prompt']].copy()
bullish_prompts.to_csv("data/regimes/bullish/bullish_prompts.csv", index=False)

In [343]:
bullish_prompts.isna().sum()

timestamp    0
prompt       0
dtype: int64

## B. Bearish

In [399]:
bearish

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2025-01-22 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106840.0945 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2025-01-22 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106049.2196 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2025-01-22 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106188.1359 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2025-01-22 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105991.692 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2025-01-22 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105822.8884 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
1123,2025-03-09 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82562.9675 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1124,2025-03-09 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82421.161 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1125,2025-03-09 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82726.7141 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1126,2025-03-09 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82671.2826 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [400]:
# Get news ready
bearish_news = bearish[['timestamp']].copy()
bearish_news['news'] = None
bearish_news

Unnamed: 0,timestamp,news
0,2025-01-22 00:00:00,
1,2025-01-22 01:00:00,
2,2025-01-22 02:00:00,
3,2025-01-22 03:00:00,
4,2025-01-22 04:00:00,
...,...,...
1123,2025-03-09 19:00:00,
1124,2025-03-09 20:00:00,
1125,2025-03-09 21:00:00,
1126,2025-03-09 22:00:00,


In [401]:
bearish_news_folder = Path("data/regimes/bearish/bearish_news_md")

for day in bearish_news['timestamp'].dt.date.unique():
    md_path = bearish_news_folder / f"{day}.md"
    if md_path.exists():
        with open(md_path, "r", encoding="utf-8") as file:
            content = file.read()
        bearish_news.loc[bearish_news['timestamp'].dt.date == day, 'news'] = content
    else:
        bearish_news.loc[bearish_news['timestamp'].dt.date == day, 'news'] = ""

In [402]:
bearish_news

Unnamed: 0,timestamp,news
0,2025-01-22 00:00:00,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
1,2025-01-22 01:00:00,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
2,2025-01-22 02:00:00,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
3,2025-01-22 03:00:00,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
4,2025-01-22 04:00:00,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
...,...,...
1123,2025-03-09 19:00:00,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1124,2025-03-09 20:00:00,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1125,2025-03-09 21:00:00,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1126,2025-03-09 22:00:00,**Bitcoin (BTC)**\n\n- **Market plunges amid m...


In [403]:
bearish_news.isna().sum()

timestamp    0
news         0
dtype: int64

In [404]:
bearish_news.to_csv("data/regimes/bearish/bearish_news.csv", index=False)

In [405]:
bearish_data = pd.read_csv("data/regimes/bearish/bearish_data.csv", parse_dates=["timestamp"])
bearish_data

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2025-01-22 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106840.0945 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2025-01-22 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106049.2196 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2025-01-22 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106188.1359 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2025-01-22 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105991.692 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2025-01-22 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105822.8884 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
1123,2025-03-09 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82562.9675 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1124,2025-03-09 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82421.161 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1125,2025-03-09 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82726.7141 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1126,2025-03-09 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82671.2826 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [406]:
bearish_complete = bearish_data.merge(bearish_news, on="timestamp")
bearish_complete

Unnamed: 0,timestamp,market_data_4h,macro_text,news
0,2025-01-22 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106840.0945 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
1,2025-01-22 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106049.2196 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
2,2025-01-22 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 106188.1359 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
3,2025-01-22 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105991.692 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
4,2025-01-22 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 105822.8884 USD...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Long-term holder sell...
...,...,...,...,...
1123,2025-03-09 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82562.9675 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1124,2025-03-09 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82421.161 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1125,2025-03-09 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82726.7141 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Market plunges amid m...
1126,2025-03-09 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 82671.2826 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Market plunges amid m...


In [407]:
bearish_complete.isna().sum()

timestamp         0
market_data_4h    0
macro_text        0
news              0
dtype: int64

In [408]:
# Read into prompt header and tail
with open("data/prompts/prompt_header.md", "r") as file:
    prompt_header = file.read().strip()

with open("data/prompts/prompt_tail.md", "r") as file:
    prompt_tail = file.read().strip()
    
# Function to build the prompt
def generate_prompt(row):
    
    macro = row['macro_text']
    market_data_4h = row['market_data_4h']
    news = row['news']

    prompt_body = f"""## Input

### 1. Macroeconomic Context
{macro}

### 2. Market Data (Past 4 Hours)
{market_data_4h}

### 3. News Summaries
{news}

### 4. Current Portfolio Status
[PORTFOLIO HERE]
"""
    return f"{prompt_header}\n\n{prompt_body}\n{prompt_tail}"

bearish_complete['prompt'] = bearish_complete.apply(generate_prompt, axis=1)

In [409]:
bearish_complete.isna().sum()

timestamp         0
market_data_4h    0
macro_text        0
news              0
prompt            0
dtype: int64

In [410]:
bearish_complete.to_csv("data/regimes/bearish/bearish_complete.csv", index=False)

# Only keep the timestamp and prompts
bearish_prompts = bearish_complete[['timestamp', 'prompt']].copy()
bearish_prompts.to_csv("data/regimes/bearish/bearish_prompts.csv", index=False)
bearish_prompts.isna().sum()

timestamp    0
prompt       0
dtype: int64

## C. Sideways

In [411]:
sideways

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-08-09 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61159.7482 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-08-09 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 62358.4648 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-08-09 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61734.0756 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-08-09 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61257.437 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-08-09 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61443.0406 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
715,2024-09-07 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54525.6008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
716,2024-09-07 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54193.6441 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
717,2024-09-07 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54345.4161 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
718,2024-09-07 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54458.7186 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [412]:
# Get news ready
sideways_news = sideways[['timestamp']].copy()
sideways_news['news'] = None
sideways_news

Unnamed: 0,timestamp,news
0,2024-08-09 00:00:00,
1,2024-08-09 01:00:00,
2,2024-08-09 02:00:00,
3,2024-08-09 03:00:00,
4,2024-08-09 04:00:00,
...,...,...
715,2024-09-07 19:00:00,
716,2024-09-07 20:00:00,
717,2024-09-07 21:00:00,
718,2024-09-07 22:00:00,


In [413]:
sideways_news_folder = Path("data/regimes/sideways/sideways_news_md")

for day in sideways_news['timestamp'].dt.date.unique():
    md_path = sideways_news_folder / f"{day}.md"
    if md_path.exists():
        with open(md_path, "r", encoding="utf-8") as file:
            content = file.read()
        sideways_news.loc[sideways_news['timestamp'].dt.date == day, 'news'] = content
    else:
        sideways_news.loc[sideways_news['timestamp'].dt.date == day, 'news'] = ""

In [414]:
sideways_news

Unnamed: 0,timestamp,news
0,2024-08-09 00:00:00,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
1,2024-08-09 01:00:00,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
2,2024-08-09 02:00:00,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
3,2024-08-09 03:00:00,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
4,2024-08-09 04:00:00,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
...,...,...
715,2024-09-07 19:00:00,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
716,2024-09-07 20:00:00,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
717,2024-09-07 21:00:00,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
718,2024-09-07 22:00:00,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...


In [415]:
sideways_news.isna().sum()

timestamp    0
news         0
dtype: int64

In [416]:
sideways_news.to_csv("data/regimes/sideways/sideways_news.csv", index=False)

In [417]:
sideways_data = pd.read_csv("data/regimes/sideways/sideways_data.csv", parse_dates=["timestamp"])
sideways_data

Unnamed: 0,timestamp,market_data_4h,macro_text
0,2024-08-09 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61159.7482 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
1,2024-08-09 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 62358.4648 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
2,2024-08-09 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61734.0756 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
3,2024-08-09 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61257.437 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
4,2024-08-09 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61443.0406 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
...,...,...,...
715,2024-09-07 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54525.6008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
716,2024-09-07 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54193.6441 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
717,2024-09-07 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54345.4161 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...
718,2024-09-07 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54458.7186 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...


In [419]:
sideways_complete = sideways_data.merge(sideways_news, on="timestamp")
sideways_complete

Unnamed: 0,timestamp,market_data_4h,macro_text,news
0,2024-08-09 00:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61159.7482 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
1,2024-08-09 01:00:00,=== Hour -4 ===\n[BTC]\nPrice: 62358.4648 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
2,2024-08-09 02:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61734.0756 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
3,2024-08-09 03:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61257.437 USD\n...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
4,2024-08-09 04:00:00,=== Hour -4 ===\n[BTC]\nPrice: 61443.0406 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **BTC jumps nearly 12% ...
...,...,...,...,...
715,2024-09-07 19:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54525.6008 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
716,2024-09-07 20:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54193.6441 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
717,2024-09-07 21:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54345.4161 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...
718,2024-09-07 22:00:00,=== Hour -4 ===\n[BTC]\nPrice: 54458.7186 USD\...,- 10-Year Treasury Yield (DGS10): U.S. nominal...,**Bitcoin (BTC)**\n\n- **Volatile swing tied t...


In [420]:
sideways_complete.isna().sum()

timestamp         0
market_data_4h    0
macro_text        0
news              0
dtype: int64

In [421]:
# Read into prompt header and tail
with open("data/prompts/prompt_header.md", "r") as file:
    prompt_header = file.read().strip()

with open("data/prompts/prompt_tail.md", "r") as file:
    prompt_tail = file.read().strip()
    
# Function to build the prompt
def generate_prompt(row):
    
    macro = row['macro_text']
    market_data_4h = row['market_data_4h']
    news = row['news']

    prompt_body = f"""## Input

### 1. Macroeconomic Context
{macro}

### 2. Market Data (Past 4 Hours)
{market_data_4h}

### 3. News Summaries
{news}

### 4. Current Portfolio Status
[PORTFOLIO HERE]
"""
    return f"{prompt_header}\n\n{prompt_body}\n{prompt_tail}"

sideways_complete['prompt'] = sideways_complete.apply(generate_prompt, axis=1)

In [422]:
sideways_complete.isna().sum()

timestamp         0
market_data_4h    0
macro_text        0
news              0
prompt            0
dtype: int64

In [423]:
sideways_complete.to_csv("data/regimes/sideways/sideways_complete.csv", index=False)

# Only keep the timestamp and prompts
sideways_prompts = sideways_complete[['timestamp', 'prompt']].copy()
sideways_prompts.to_csv("data/regimes/sideways/sideways_prompts.csv", index=False)
sideways_prompts.isna().sum()

timestamp    0
prompt       0
dtype: int64

timestamp    0
prompt       0
dtype: int64