In [1]:
import requests
import pandas as pd
import time
from datetime import datetime

url = "https://api.binance.com/api/v3/klines"
start_time = int(datetime(2020, 1, 1).timestamp() * 1000)
end_time = int(datetime.now().timestamp() * 1000)

interval = "1h"
limit = 1000
symbol = "BTCUSDT"

all_data = []

while start_time < end_time:
    params = {
        "symbol": symbol,
        "interval": interval,
        "startTime": start_time,
        "limit": limit
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        print(f"Error: {response.status_code} {response.text}")
        break

    data = response.json()
    if not data:
        break

    all_data.extend(data)
    start_time = data[-1][0] + 1
    time.sleep(0.2)

columns = [
    "open_time", "open", "high", "low", "close", "volume",
    "close_time", "quote_asset_volume", "num_trades",
    "taker_buy_base_volume", "taker_buy_quote_volume", "ignore"
]

btc_df = pd.DataFrame(all_data, columns=columns)
btc_df["open_time"] = pd.to_datetime(btc_df["open_time"], unit="ms")
btc_df["close_time"] = pd.to_datetime(btc_df["close_time"], unit="ms")

float_cols = [
    "open", "high", "low", "close", "volume", 
    "quote_asset_volume", "taker_buy_base_volume", "taker_buy_quote_volume"
]
for col in float_cols:
    btc_df[col] = btc_df[col].astype(float)

btc_df['price_change_pct'] = btc_df['close'].pct_change()
btc_df['vol_change_pct'] = btc_df['volume'].pct_change()
btc_df['rolling_mean_3'] = btc_df['close'].rolling(window=3).mean()
btc_df['rolling_std_3'] = btc_df['close'].rolling(window=3).std()
btc_df['rolling_mean_6'] = btc_df['close'].rolling(window=6).mean()
btc_df['rolling_std_6'] = btc_df['close'].rolling(window=6).std()
btc_df['close_position'] = (btc_df['close'] - btc_df['low']) / (btc_df['high'] - btc_df['low'])
btc_df['close_lag_1'] = btc_df['close'].shift(1)
btc_df['volume_lag_1'] = btc_df['volume'].shift(1)
btc_df['return'] = btc_df['close'].pct_change().shift(-1)

def compute_rsi(series, period=14):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def compute_macd(series, fast=12, slow=26, signal=9):
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

def compute_bollinger_bands(series, window=20, num_std=2):
    rolling_mean = series.rolling(window=window).mean()
    rolling_std = series.rolling(window=window).std()
    return rolling_mean + num_std * rolling_std, rolling_mean - num_std * rolling_std

def label_movement(row):
    if row > 0.01:
        return 1
    elif row < -0.01:
        return -1
    else:
        return 0

btc_df['rsi_14'] = compute_rsi(btc_df['close'], period=14)
btc_df['macd'], btc_df['macd_signal'] = compute_macd(btc_df['close'])
btc_df['boll_upper'], btc_df['boll_lower'] = compute_bollinger_bands(btc_df['close'])
btc_df['target_multi'] = btc_df['return'].apply(label_movement)

btc_df.dropna(inplace=True)
btc_df.reset_index(drop=True, inplace=True)


In [19]:
btc_df = btc_df.iloc[:-1]

In [14]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

x = btc_df.drop(columns=['target_multi', 'open_time', 'close_time'], errors='ignore')
y = btc_df['target_multi']

x.replace([np.inf, -np.inf], np.nan, inplace=True)
x.dropna(inplace=True)
y = y.loc[x.index]

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(
    x_scaled, y, test_size=0.2, shuffle=False
)

model = LogisticRegression(max_iter=100)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Test Accuracy: 0.9975520195838433
              precision    recall  f1-score   support

          -1       1.00      0.97      0.98       290
           0       1.00      1.00      1.00      9238
           1       0.95      1.00      0.97       276

    accuracy                           1.00      9804
   macro avg       0.98      0.99      0.99      9804
weighted avg       1.00      1.00      1.00      9804

