# Data Collection V2

In [1]:
import json 
import websocket
import os
import pandas as pd
import time

In [2]:
assets = ["BTC", "ETH", "LTC"]
assets_hourly = [coins.lower() + "usdt@ticker_1h" for coins in assets]
assets_hourly = "/".join(assets_hourly)
print(assets_hourly)

assets_daily = [coins.lower() + "usdt@ticker_1d" for coins in assets]
assets_daily = "/".join(assets_daily)
print(assets_daily)


btcusdt@ticker_1h/ethusdt@ticker_1h/ltcusdt@ticker_1h
btcusdt@ticker_1d/ethusdt@ticker_1d/ltcusdt@ticker_1d


In [3]:
cav_dir_hourly = "../Dataset/hourlyV2"
cav_dir_daily = "../Dataset/daily24thV2"
csv_files_name = ["Bitcoin.csv", "Ethereum.csv", "Litecoin.csv"]

csv_files_hourly = [os.path.join(cav_dir_hourly, coins) for coins in csv_files_name]
print(csv_files_hourly)

csv_files_daily = [os.path.join(cav_dir_daily, coins) for coins in csv_files_name]
print(csv_files_daily)


['../Dataset/hourlyV2/Bitcoin.csv', '../Dataset/hourlyV2/Ethereum.csv', '../Dataset/hourlyV2/Litecoin.csv']
['../Dataset/daily24thV2/Bitcoin.csv', '../Dataset/daily24thV2/Ethereum.csv', '../Dataset/daily24thV2/Litecoin.csv']


In [4]:
for file in csv_files_hourly:
    os.makedirs(os.path.dirname(file), exist_ok=True)
    if not os.path.exists(file):
        with open(file, "w") as f:
            f.write("Date,Open,High,Low,Close,Volume\n")
            f.close()
    else:
        print(f"{file} already exists.")

for file in csv_files_daily:
    os.makedirs(os.path.dirname(file), exist_ok=True)
    if not os.path.exists(file):
        with open(file, "w") as f:
            f.write("Date,Open,High,Low,Close,Volume\n")
            f.close()
    else:
        print(f"{file} already exists.")

../Dataset/hourlyV2/Bitcoin.csv already exists.
../Dataset/hourlyV2/Ethereum.csv already exists.
../Dataset/hourlyV2/Litecoin.csv already exists.
../Dataset/daily24thV2/Bitcoin.csv already exists.
../Dataset/daily24thV2/Ethereum.csv already exists.
../Dataset/daily24thV2/Litecoin.csv already exists.


In [5]:
source = ""

In [6]:
def retrieve_csv_file(symbol: str, hourly=True):
    # print(symbol == "btcusdt")
    if hourly:
        csv_file_in_use = csv_files_hourly
    else:
        csv_file_in_use = csv_files_daily

    if symbol:
        if symbol ==  "btcusdt":
            return csv_file_in_use[0]
        elif symbol ==  "ethusdt":  
            return csv_file_in_use[1]
        elif symbol ==  "ltcusdt":
            return csv_file_in_use[2]
        
    else:
        raise ValueError("Invalid symbol")

In [11]:
def on_open(ws):
    print("Connection opened")

def on_message(ws, message, hourly=True):
    message = json.loads(message)
    global source
    source = message
    # print(message)

    coin_data = message['data']

    symbol = coin_data["s"].lower()
    csv_save_file = retrieve_csv_file(str(symbol), hourly)
    timestamp = coin_data["E"]
    open_price = coin_data["o"]
    high_price = coin_data["h"]
    low_price = coin_data["l"]
    close_price = coin_data["c"]
    volume = coin_data["v"]

    timestamp = pd.to_datetime(timestamp, unit='ms')
    print(timestamp)

    # Create a DataFrame from the coin_data
    df = pd.DataFrame({
        "Date": [timestamp],
        "Open": [open_price],
        "High": [high_price],
        "Low": [low_price],
        "Close": [close_price],
        "Volume": [volume]
    })

    # Append the DataFrame to the CSV file
    df.to_csv(f"{csv_save_file}", mode="a", header=False, index=False)


def on_close(ws, close_status_code, close_msg):
    print("Connection closed")  

def on_error(ws, error):
    print("Error:", error)

In [12]:
socket_hourly = f"wss://stream.binance.com:9443/stream?streams={assets_hourly}"
socket_daily = f"wss://stream.binance.com:9443/stream?streams={assets_daily}"
socket_hourly, socket_daily

('wss://stream.binance.com:9443/stream?streams=btcusdt@ticker_1h/ethusdt@ticker_1h/ltcusdt@ticker_1h',
 'wss://stream.binance.com:9443/stream?streams=btcusdt@ticker_1d/ethusdt@ticker_1d/ltcusdt@ticker_1d')

In [13]:
def run_websocket_hourly(socket_name):
    ws = websocket.create_connection(socket_name)
    # print("WebSocket connection opened")

    # ws = websocket.WebSocketApp(
    #     socket_hourly, 
    #     on_open=on_open,
    #     on_close=on_close,
    #     on_error=on_error,
    #     on_message=on_message
    # )
    
    result = ws.recv()
    on_message(ws, result)

    print("Received:", result)
    ws.close()

def run_websocket_daily(socket_name):
    ws = websocket.create_connection(socket_name)
    # print("WebSocket connection opened")

    # ws = websocket.WebSocketApp(
    #     socket_daily, 
    #     on_open=on_open,
    #     on_close=on_close,
    #     on_error=on_error,
    #     on_message=on_message
    # )
        
    result = ws.recv()
    on_message(ws, result, hourly=False)
    print("Received:", result)
    ws.close()


In [15]:
count = 0
socket = "wss://stream.binance.com:9443/stream?streams="
while True:
    if count % 24 == 0:
        print("\n\nDaily report:")
        # runs the websocket for daily data
        for coins in assets:
            socket_name = socket + coins.lower() + "usdt@ticker_1d"
            run_websocket_daily(socket_name)
        
    count += 1
    print("\n\nHourly report:")
    for coins in assets:
        socket_name = socket + coins.lower() + "usdt@ticker_1h"
        run_websocket_hourly(socket_name)

    time.sleep(3600) 



Daily report:
2025-04-08 08:20:01.818000
Received: {"stream":"btcusdt@ticker_1d","data":{"e":"1dTicker","E":1744100401818,"s":"BTCUSDT","p":"3143.66000000","P":"4.138","w":"78487.37747529","o":"75975.89000000","h":"81243.58000000","l":"75744.08000000","c":"79119.55000000","v":"58309.15458000","q":"4576532625.78528260","O":1744014000000,"C":1744100401447,"F":4788009618,"L":4795414672,"n":7405055}}
2025-04-08 08:20:02.818000
Received: {"stream":"ethusdt@ticker_1d","data":{"e":"1dTicker","E":1744100402818,"s":"ETHUSDT","p":"74.30000000","P":"5.008","w":"1548.73530838","o":"1483.64000000","h":"1639.00000000","l":"1474.61000000","c":"1557.94000000","v":"1506890.01880000","q":"2333773777.96599600","O":1744014000000,"C":1744100402818,"F":2307043064,"L":2311272783,"n":4229720}}
2025-04-08 08:20:03.819000
Received: {"stream":"ltcusdt@ticker_1d","data":{"e":"1dTicker","E":1744100403819,"s":"LTCUSDT","p":"6.03000000","P":"9.280","w":"69.61884729","o":"64.98000000","h":"73.04000000","l":"64.8900

KeyboardInterrupt: 