<a href="https://colab.research.google.com/github/ekaratnida/Applied-machine-learning/blob/master/ReadBTCdata2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import time, requests, pandas as pd
pd.set_option("display.max_colwidth", None)
INFO_URL = "https://api.hyperliquid.xyz/info"

In [2]:
def fetch_hl_candles(coin="BTC", interval="15m", days=7):
    now_ms = int(time.time() * 1000)
    start_ms = now_ms - days * 24 * 60 * 60 * 1000
    payload = {
        "type": "candleSnapshot",
        "req": {"coin": coin, "interval": interval, "startTime": start_ms, "endTime": now_ms},
    }
    r = requests.post(INFO_URL, json=payload, timeout=15)
    r.raise_for_status()
    data = r.json()  # list of candles
    #print(data)
    df = pd.DataFrame(data)

    # rename and clean
    rename = {"t":"open_time","T":"close_time","o":"open","h":"high","l":"low","c":"close","v":"volume","n":"trades","i":"interval","s":"symbol"}
    df = df.rename(columns=rename)
    num_cols = ["open","high","low","close","volume"]
    for c in num_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df["open_time"]  = pd.to_datetime(df["open_time"],  unit="ms", utc=True)
    df["close_time"] = pd.to_datetime(df["close_time"], unit="ms", utc=True)
    return df.sort_values("open_time")

In [3]:
df = fetch_hl_candles("BTC", "1h", days=7)
print(df.shape)
print(df.tail()) #[["open_time","open","high","low","close"]])


(169, 10)
                    open_time                       close_time symbol  \
164 2025-08-22 08:00:00+00:00 2025-08-22 08:59:59.999000+00:00    BTC   
165 2025-08-22 09:00:00+00:00 2025-08-22 09:59:59.999000+00:00    BTC   
166 2025-08-22 10:00:00+00:00 2025-08-22 10:59:59.999000+00:00    BTC   
167 2025-08-22 11:00:00+00:00 2025-08-22 11:59:59.999000+00:00    BTC   
168 2025-08-22 12:00:00+00:00 2025-08-22 12:59:59.999000+00:00    BTC   

    interval      open     close      high       low      volume  trades  
164       1h  113065.0  112971.0  113284.0  112924.0   385.39413    6475  
165       1h  112972.0  113026.0  113102.0  112782.0   276.78663    5941  
166       1h  113026.0  112510.0  113042.0  112501.0   340.40292    7396  
167       1h  112509.0  112307.0  112613.0  112159.0  1003.36392    9112  
168       1h  112307.0  112037.0  112349.0  112000.0   585.08182    6588  


In [4]:
import requests
import pandas as pd
import time

INFO_URL = "https://api.hyperliquid.xyz/info"   # replace with correct endpoint
NEWS_URL = "https://newsapi.org/v2/everything"
from google.colab import userdata
NEWS_API_KEY = userdata.get('NEWS_API')

In [5]:

def fetch_hl_candles(coin="BTC", interval="15m", days=7):
    now_ms = int(time.time() * 1000)
    start_ms = now_ms - days * 24 * 60 * 60 * 1000
    payload = {
        "type": "candleSnapshot",
        "req": {"coin": coin, "interval": interval, "startTime": start_ms, "endTime": now_ms},
    }
    r = requests.post(INFO_URL, json=payload, timeout=15)
    r.raise_for_status()
    data = r.json()  # list of candles
    df = pd.DataFrame(data)

    # rename and clean
    rename = {"t":"open_time","T":"close_time","o":"open","h":"high","l":"low","c":"close",
              "v":"volume","n":"trades","i":"interval","s":"symbol"}
    df = df.rename(columns=rename)
    num_cols = ["open","high","low","close","volume"]
    for c in num_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df["open_time"]  = pd.to_datetime(df["open_time"],  unit="ms", utc=True)
    df["close_time"] = pd.to_datetime(df["close_time"], unit="ms", utc=True)
    return df.sort_values("open_time")


def fetch_btc_news(days=7, page_size=10):
    now = pd.Timestamp.utcnow()
    from_date = (now - pd.Timedelta(days=days)).strftime("%Y-%m-%d")

    params = {
        "q": "bitcoin OR BTC",
        "from": from_date,
        "sortBy": "publishedAt",
        "language": "en",
        "apiKey": NEWS_API_KEY,
        "pageSize": page_size
    }
    r = requests.get(NEWS_URL, params=params, timeout=15)
    r.raise_for_status()
    articles = r.json().get("articles", [])
    df = pd.DataFrame(articles)
    if not df.empty:
        df["publishedAt"] = pd.to_datetime(df["publishedAt"], utc=True)
    return df


def sync_price_with_news():
    # 1. Fetch candles
    candles = fetch_hl_candles()
    # 2. Fetch news
    news = fetch_btc_news()

    if news.empty:
        return candles, news

    # 3. Merge by time window (map each news item to nearest candle)
    merged = pd.merge_asof(
        news.sort_values("publishedAt"),
        candles.sort_values("open_time"),
        left_on="publishedAt",
        right_on="open_time",
        direction="backward"
    )
    return merged


if __name__ == "__main__":
    merged_df = sync_price_with_news()
    print(merged_df[["publishedAt","title","open","close","high","low","volume"]].head())


                publishedAt  \
0 2025-08-21 11:55:45+00:00   
1 2025-08-21 11:55:53+00:00   
2 2025-08-21 12:00:00+00:00   
3 2025-08-21 12:00:00+00:00   
4 2025-08-21 12:00:00+00:00   

                                                                                     title  \
0         Bitcoin Falters in Choppy Market, Ether Stays Resilient: Crypto Daybook Americas   
1                  Hong Kong construction firm’s stock jumps on $483M Bitcoin treasury bet   
2                     Bitcoin Bombshell: Eric Trump Hints At Secret $22 Billion Nation Buy   
3                Markets Today: Bitcoin Bears Start to Emerge, Ye's YZY Surges Then Slumps   
4  LendFriend Expands Crypto Mortgage Programs to California, Colorado, Florida, and Texas   

       open     close      high       low      volume  
0  113326.0  113113.0  113326.0  112956.0  1042.16112  
1  113326.0  113113.0  113326.0  112956.0  1042.16112  
2  113113.0  113118.0  113173.0  113017.0   100.78287  
3  113113.0  113118.0  1

##Exercise
Use multiple APIs from this link (https://github.com/public-apis/public-apis?tab=readme-ov-file
) or from other sources to generate a dataset based on your objective.