In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
import random
from collections import deque
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
import os
# Configuration
TICKER = 'BTCUSDT'
SEQ_LENGTH = 60
PREDICTION_STEPS = 10
INITIAL_BALANCE = 10000.0
TRADE_FEE_RATE = 0.0001
MIN_TRADE_AMOUNT = 10
GRID_COUNT = 20
EPOCHS = 0
EPISODES = 100
BATCH_SIZE = 256
MEMORY_SIZE = 10000
GAMMA = 0.95
EPSILON_DECAY = 0.9999995
MIN_EPSILON = 0.1
LEARNING_RATE = 0.0001
TARGET_UPDATE_FREQ = 100
LSTM_LEARNING_RATE = 100
LSTM_PATH = 'lstm_predictor_fixed_grid2.pth'
DOUBLE_DQN_PATH = 'dqn_agent_fixed_grid.pth'

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class DataProcessor:
    def __init__(self, filepath):
        self.data = pd.read_csv(filepath)
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        
    def preprocess(self):
        df = self.data.copy()
        # Feature engineering
        df['Returns'] = df['close'].pct_change()
        df['Volatility'] = df['Returns'].rolling(20).std()
        df.dropna(inplace=True)
        
        # Scale features
        scaled = self.scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'Returns', 'Volatility']])
        return scaled, df['close'].values
    
    def create_sequences(self, data, target):
        X, y = [], []
        for i in range(len(data) - SEQ_LENGTH - PREDICTION_STEPS):
            X.append(data[i:i+SEQ_LENGTH])
            y.append(target[i+SEQ_LENGTH+PREDICTION_STEPS-1])
        return np.array(X), np.array(y)

class AdaptiveLRScheduler:
    def __init__(self, optimizer, patience=5, factor=0.5, min_lr=1e-20, max_lr=1.0, threshold=1e-4, fluctuation_threshold=0.2):
        self.optimizer = optimizer
        self.patience = patience
        self.factor = factor
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.threshold = threshold
        self.fluctuation_threshold = fluctuation_threshold
        
        self.loss_history = []
        self.wait = 0
        self.best_loss = float('inf')
    def step(self, current_loss):
        self.loss_history.append(current_loss)

        if len(self.loss_history) > 2:
            fluctuation = abs(self.loss_history[-1] - self.loss_history[-2]) / (self.loss_history[-2] + 1e-8)
        else:
            fluctuation = 0

        # 1. Detect fluctuations
        if fluctuation > self.fluctuation_threshold:
            self._reduce_lr("High fluctuation detected")
            self.wait = 0
            return

        # 2. Detect improvement
        improvement = self.best_loss - current_loss
        if improvement > self.threshold:
            self.best_loss = current_loss
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self._reduce_lr("Loss plateau detected")
                self.wait = 0

    def _reduce_lr(self, reason):
        for param_group in self.optimizer.param_groups:
            old_lr = param_group['lr']
            new_lr = max(old_lr * self.factor, self.min_lr)
            param_group['lr'] = new_lr
        print(f"[LR SCHEDULER] {reason}. LR reduced to: {new_lr:.10f}")

class LSTMPredictor(nn.Module):
    def __init__(self, input_size, hidden_size=50, num_layers=2):
        super(LSTMPredictor, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

    def save(self, path, best_loss):
        torch.save({
            'model_state_dict': self.state_dict(),
            'best_loss': best_loss
        }, path)
        print(best_loss)
    def load(self, path, map_location=None):
        if os.path.exists(path):
            checkpoint = torch.load(path, map_location=map_location)
            self.load_state_dict(checkpoint['model_state_dict'])
            self.eval()  # Optional: switch to eval mode.
            return checkpoint.get('best_loss', float('inf'))
        else:
            print(f"⚠️ Warning: No checkpoint found at {path}.")
            return float('inf')


class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_size)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class TradingEnvironment:
    def __init__(self, data, prices, predictor, grids):
        self.data = data
        self.prices = prices
        self.predictor = predictor
        self.grids = grids
        self.reset()
        
    def reset(self):
        self.current_step = SEQ_LENGTH
        self.balance = INITIAL_BALANCE
        self.btc_held = 0.0
        self.portfolio_value = [INITIAL_BALANCE]
        self.trades = []
        self.done = False
        self.prev_portfolio_value = INITIAL_BALANCE   # <-- Add this line
        return self.get_state()

    
    def get_state(self):
        current_data = self.data[self.current_step]
        price = self.prices[self.current_step]
        grid_position = np.digitize(price, self.grids) / GRID_COUNT
        
        # Get prediction
        with torch.no_grad():
            seq_data = self.data[self.current_step-SEQ_LENGTH:self.current_step]
            seq_tensor = torch.tensor(seq_data[np.newaxis, ...], dtype=torch.float32).to(device)
            prediction = self.predictor(seq_tensor).cpu().item()
        
        return np.array([
            *current_data,
            self.balance / INITIAL_BALANCE,
            self.btc_held * price / INITIAL_BALANCE,
            grid_position,
            prediction / price
        ])
    
    def execute_trade(self, action, amount):
        fee = amount * TRADE_FEE_RATE
        return amount - fee
    
    def step(self, action):
        """
        Actions:
        0 = hold
        1 = buy
        2 = sell
        """
        price = self.prices[self.current_step]
        grid_position = np.digitize(price, self.grids) / len(self.grids)  # normalized 0-1
        
        trade_amount = 0
        min_trade_usd = 10  # Minimum USD amount per trade, for example
        min_trade_btc = 0.001  # Minimum BTC amount per trade
        
        if action == 1:  # Buy
            # Buy more when price is near lower grids (grid_position close to 0)
            multiplier = max(0.1, 1 - grid_position)  # from 1 at bottom grid to 0.1 near top grid
            amount_to_spend = self.balance * multiplier
            trade_amount = max(amount_to_spend, min_trade_usd)
            
            btc_to_buy = trade_amount / price
            btc_to_buy = min(btc_to_buy, self.balance / price)  # Can't buy more than balance allows
            btc_to_buy = max(btc_to_buy, min_trade_btc)
            
            # Execute buy
            cost = btc_to_buy * price
            if cost <= self.balance:
                self.balance -= cost
                self.btc_held += btc_to_buy
                self.trades.append(('buy', price, self.current_step))
        
        elif action == 2:  # Sell
            # Sell more when price is near upper grids (grid_position close to 1)
            multiplier = max(0.1, grid_position)  # from 0.1 at bottom grid to 1 near top grid
            btc_to_sell = self.btc_held * multiplier
            btc_to_sell = max(btc_to_sell, min_trade_btc)
            
            # Execute sell
            if btc_to_sell <= self.btc_held:
                revenue = btc_to_sell * price
                self.balance += revenue
                self.btc_held -= btc_to_sell
                self.trades.append(('sell', price, self.current_step))
        
        else:
            # Hold - no trade
            pass
        
        # Update portfolio value, rewards, etc.
        self.current_step += 1
        self.done = self.current_step >= len(self.prices) - 1
        
        portfolio_value = self.balance + self.btc_held * price
        reward = portfolio_value - self.prev_portfolio_value
        self.prev_portfolio_value = portfolio_value
        
        next_state = self.get_state()
        
        return next_state, reward, self.done, portfolio_value

class AdaptiveTradingEnvironment(TradingEnvironment):
    def __init__(self, data, prices, predictor, initial_grids, 
                 recalibration_interval=100, lookback_window=500):
        self.original_grids = initial_grids
        self.recalibration_interval = recalibration_interval
        self.lookback_window = lookback_window
        self.steps_since_recalibration = 0
        self.grid_history = [initial_grids.copy()]
        super().__init__(data, prices, predictor, initial_grids)
    def reset(self):
        self.steps_since_recalibration = 0
        self.grids = self.original_grids.copy()
        return super().reset()
    
    def recalculate_grids(self):
        """Recalculate grids based on recent price window"""
        start_idx = max(0, self.current_step - self.lookback_window)
        recent_prices = self.prices[start_idx:self.current_step]
        
        # Use percentiles to avoid extreme values
        low = np.percentile(recent_prices, 10)  # 10th percentile
        high = np.percentile(recent_prices, 90)  # 90th percentile
        
        # Safety checks
        if high <= low:
            high = low * 1.2  # Prevent invalid range
            
        new_grids = np.linspace(low, high, num=GRID_COUNT+1)[1:-1]
        self.grid_history.append(new_grids)
        return new_grids
    
    def step(self, action):
        # Recalculate grids periodically
        self.steps_since_recalibration += 1
        if self.steps_since_recalibration >= self.recalibration_interval:
            self.grids = self.recalculate_grids()
            self.steps_since_recalibration = 0
        
        return super().step(action)
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=MEMORY_SIZE)
        self.epsilon = 1
        self.model = DQN(state_size, action_size).to(device)
        self.target_model = DQN(state_size, action_size).to(device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.target_model.eval()
        self.optimizer = optim.Adam(self.model.parameters(), lr=LEARNING_RATE)
        self.scheduler = StepLR(self.optimizer, step_size=50, gamma=0.95)
        self.criterion = nn.MSELoss()
        self.steps = 0
    
    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
            with torch.no_grad():
                q_values = self.model(state_tensor)
            return torch.argmax(q_values).item()
    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        
        minibatch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)
        
        states = torch.tensor(np.array(states), dtype=torch.float32).to(device)
        actions = torch.tensor(actions, dtype=torch.long).unsqueeze(1).to(device)
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(next_states), dtype=torch.float32).to(device)
        dones = torch.tensor(dones, dtype=torch.float32).to(device)
        
        # Get current Q values
        current_q = self.model(states).gather(1, actions)
        
        # Get target Q values
        with torch.no_grad():
            next_q = self.target_model(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * GAMMA * next_q
        
        # Compute loss
        loss = self.criterion(current_q.squeeze(), target_q)
        
        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        self.scheduler.step()
        # Update target network
        self.steps += 1
        if self.steps % TARGET_UPDATE_FREQ == 0:
            self.update_target_model()
        
        # Epsilon decay
        if self.epsilon > MIN_EPSILON:
            self.epsilon *= EPSILON_DECAY
            
    def save(self, path):
        torch.save(self.model.state_dict(), path)
    
    def load(self, path):
        self.model.load_state_dict(torch.load(path))

def create_grids(prices, num_grids=GRID_COUNT):
    low = np.percentile(prices, 5)
    high = np.percentile(prices, 95)
    return np.linspace(low, high, num=num_grids+1)[1:-1]

def plot_grid_history(env):
    plt.figure(figsize=(12, 6))
    plt.plot(env.prices, label='Price', alpha=0.5)
    
    # Plot all grid levels over time
    for i, grids in enumerate(env.grid_history):
        if i % 5 == 0:  # Plot every 5th recalibration for clarity
            step = i * env.recalibration_interval
            for grid in grids:
                plt.axhline(y=grid, color='gray', alpha=0.1)
    
    plt.title('Adaptive Grid Levels Over Time')
    plt.legend()
    plt.show()
    
def train_lstm(X_train, y_train, input_size, epochs=20, batch_size=64):
    model = LSTMPredictor(input_size).to(device)
    best_val_loss = model.load(LSTM_PATH, map_location=device)
    print('loaded', best_val_loss)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LSTM_LEARNING_RATE)
    scheduler = AdaptiveLRScheduler(optimizer, patience=10, factor=0.5)
    # Convert to tensors
    X_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
    
    dataset = TensorDataset(X_tensor, y_tensor)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss/len(loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.6f}")
        scheduler.step(avg_loss)
        if avg_loss < best_val_loss:
            best_val_loss = avg_loss
            model.save(LSTM_PATH, best_val_loss)
            print('saved')
    return model

Using device: cuda


In [3]:
import time
from binance.client import Client
import pandas as pd
import os
from requests.exceptions import ReadTimeout, RequestException

client = Client()


def get_binance_data(symbol=TICKER, interval=Client.KLINE_INTERVAL_1MINUTE, lookback='60', retries=3, delay=20):
    """Fetch latest Binance OHLCV data with retry mechanism."""
    attempt = 0
    while attempt < retries:
        try:
            klines = client.get_klines(symbol=symbol, interval=interval, limit=int(lookback))
            break  # Break if successful
        except (ReadTimeout, RequestException) as e:
            attempt += 1
            print(f"[Attempt {attempt}/{retries}] Error fetching data: {e}. Retrying in {delay} seconds...")
            time.sleep(delay)
    else:
        print("Failed to fetch Binance data after multiple retries.")
        return pd.DataFrame()  # Return empty DataFrame on failure

    df = pd.DataFrame(klines, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'num_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ])

    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].astype({
        'open': float, 'high': float, 'low': float, 'close': float, 'volume': float
    })

    return df

In [None]:
import joblib
def train_model(csv):
    
    processor = DataProcessor(csv)
    scaled_data, raw_prices = processor.preprocess()
    X, y = processor.create_sequences(scaled_data, raw_prices)
    joblib.dump(processor.scaler, 'min_max_scaler.joblib')
    # Split data
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    # 2. Train LSTM Price Predictor
    print("Training LSTM Price Predictor...")
    lstm_model = train_lstm(X_train, y_train, input_size=X.shape[2], epochs=EPOCHS, batch_size=BATCH_SIZE)

    lstm_model.eval()
    
    # 3. Prepare Trading Environment
    grids = create_grids(raw_prices)
    env = AdaptiveTradingEnvironment(
    scaled_data, 
    raw_prices, 
    lstm_model, 
    initial_grids=grids,
    recalibration_interval=1000,  # Recalculate every 200 steps
    lookback_window=500         # Use last 500 prices for recalculation
    )
    # 4. Initialize DQN Agent
    state_size = env.get_state().shape[0]
    action_size = 3  # hold, buy, sell
    agent = DQNAgent(state_size, action_size)
    agent.epsilon = 0.5

    # 5. Training Loop
    print("\nTraining DQN Agent...")
    portfolio_history = []

    for e in range(EPISODES):
        state = env.reset()
        total_reward = 0
        episode_portfolio = [INITIAL_BALANCE]
        
        while not env.done:
            action = agent.act(state)
            next_state, reward, done, portfolio_value = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            episode_portfolio.append(portfolio_value)
            
            if len(agent.memory) > BATCH_SIZE:
                agent.replay(BATCH_SIZE)
        
        portfolio_history.append(episode_portfolio[-1])
        print(f"Episode {e+1}/{EPISODES}, Reward: {total_reward:.4f}, Portfolio: ${episode_portfolio[-1]:,.2f}, Epsilon: {agent.epsilon:.4f}")
        if (e+1) % 10 == 0:
            agent.save('dqn_agent_fixed_grid.pth')
            print("Models saved successfully")
    # 6. Evaluation
    print("\nEvaluating on test data...")
    test_env = TradingEnvironment(scaled_data[split:], raw_prices[split:], lstm_model, grids)
    state = test_env.reset()
    portfolio_values = []
    trade_log = []

    while not test_env.done:
        action = agent.act(state)
        next_state, reward, done, portfolio_value = test_env.step(action)
        state = next_state
        portfolio_values.append(portfolio_value)
        
        if test_env.trades and test_env.trades[-1][0] in ['buy', 'sell']:
            trade_log.append(test_env.trades[-1])

    # 7. Results
    final_value = test_env.balance + test_env.btc_held * test_env.prices[-1]
    print(f"\n{'='*50}")
    print(f"Initial Balance: ${INITIAL_BALANCE:,.2f}")
    print(f"Final Portfolio Value: ${final_value:,.2f}")
    print(f"Total Return: {((final_value - INITIAL_BALANCE) / INITIAL_BALANCE)*100:.2f}%")
    print(f"Total Trades Executed: {len(test_env.trades)}")
    print(f"Grid Levels: {np.around(grids, 2)}")

    # # Plot results
    # plt.figure(figsize=(15, 10))

    # # Price and grid levels
    # plt.subplot(3, 1, 1)
    # plt.plot(test_env.prices, label='Price')
    # for grid in grids:
    #     plt.axhline(y=grid, color='gray', linestyle='--', alpha=0.3)
    # plt.title('Price with Grid Levels')
    # plt.legend()

    # # Portfolio value
    # plt.subplot(3, 1, 2)
    # plt.plot(portfolio_values)
    # plt.title('Portfolio Value')
    # plt.xlabel('Time Step')
    # plt.ylabel('Value (USD)')

    # # Trade visualization
    # plt.subplot(3, 1, 3)
    # buys = [t[1] for t in trade_log if t[0] == 'buy']
    # sells = [t[1] for t in trade_log if t[0] == 'sell']
    # plt.plot(test_env.prices, label='Price')
    # plt.scatter([i for i, t in enumerate(trade_log) if t[0]=='buy'], buys, 
    #             color='green', marker='^', alpha=0.7, label='Buy')
    # plt.scatter([i for i, t in enumerate(trade_log) if t[0]=='sell'], sells, 
    #             color='red', marker='v', alpha=0.7, label='Sell')
    # plt.title('Trading Signals')
    # plt.legend()

    # plt.tight_layout()
    # plt.savefig('trading_results.png')
    # plt.show()

    # plot_grid_history(env)
    # # Save models
    # torch.save(lstm_model.state_dict(), 'lstm_predictor_fixed_grid.pth')
    # agent.save('dqn_agent_fixed_grid.pth')
    # print("Models saved successfully")

# 1. Data Preparation
def dataCreation():
    df_live = get_binance_data(symbol=TICKER, interval=Client.KLINE_INTERVAL_1MINUTE, lookback='1000')
    if df_live.empty:
        raise Exception("No data retrieved; skipping model training.")
    temp_csv = 'live_temp.csv'
    df_live.to_csv(temp_csv, index=False)
    return temp_csv

for i in range(1000):
    try:
        print(f"=== Iteration {i+1} ===")
        temp_csv = dataCreation()
        train_model(temp_csv)
    except Exception as e:
        print(f"Error in loop iteration {i+1}: {e}")


=== Iteration 1 ===
Training LSTM Price Predictor...
loaded 95962.83333333333

Training DQN Agent...
Episode 1/100, Reward: 191.5150, Portfolio: $10,191.52, Epsilon: 0.4998
Episode 2/100, Reward: 276.5317, Portfolio: $10,276.53, Epsilon: 0.4996
Episode 3/100, Reward: 241.7375, Portfolio: $10,241.74, Epsilon: 0.4994
Episode 4/100, Reward: 175.5864, Portfolio: $10,175.59, Epsilon: 0.4991
Episode 5/100, Reward: 225.4284, Portfolio: $10,225.43, Epsilon: 0.4989
Episode 6/100, Reward: 175.0459, Portfolio: $10,175.05, Epsilon: 0.4987
Episode 7/100, Reward: 271.4977, Portfolio: $10,271.50, Epsilon: 0.4985
Episode 8/100, Reward: 250.6263, Portfolio: $10,250.63, Epsilon: 0.4982
Episode 9/100, Reward: 262.0239, Portfolio: $10,262.02, Epsilon: 0.4980
Episode 10/100, Reward: 257.8684, Portfolio: $10,257.87, Epsilon: 0.4978
Models saved successfully
Episode 11/100, Reward: 214.9637, Portfolio: $10,214.96, Epsilon: 0.4975
Episode 12/100, Reward: 147.4363, Portfolio: $10,147.44, Epsilon: 0.4973
Episod

In [None]:
import os
import time
import schedule
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from binance.client import Client
from binance.exceptions import BinanceAPIException, BinanceOrderException
from collections import deque
import joblib # For loading scaler  

# --- Configuration (mostly from your Cell 1) ---
TICKER = 'BTCUSDT'
SEQ_LENGTH = 60
PREDICTION_STEPS = 1 # Not directly used in live state construction if LSTM predicts next price
INITIAL_BALANCE_REF = 10000.0 # Reference for normalization, actual balance managed live
TRADE_FEE_RATE = 0.0001 # For local simulation if needed, actual fees by Binance
MIN_TRADE_AMOUNT_USD = 10 # Minimum trade value in USD
GRID_COUNT = 100
LSTM_INPUT_FEATURES = 7 # open, high, low, close, volume, Returns, Volatility
DQN_STATE_SIZE = 11    # 7 market features + norm_balance + norm_btc_value + grid_pos + pred/price
ACTION_SIZE = 3        # Hold, Buy, Sell

# Live trading parameters
LIVE_RECALIBRATION_INTERVAL = 200 # Steps (minutes)
LIVE_LOOKBACK_WINDOW_GRID = 500 # Steps (minutes) for grid recalc
ORDER_TYPE = Client.ORDER_TYPE_MARKET

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device for live trading: {device}")

# --- Global State (Initialize once) ---
# These will be initialized in start_live_trading_bot()
binance_client = None
lstm_predictor_live = None
dqn_agent_live = None
min_max_scaler_live = None

# Portfolio and Grid State
current_usdt_balance = 1000.0 # Example: Starting USDT for live test
current_btc_held = 0.0
current_grid_levels = np.array([])
live_steps_since_recal = 0
live_recent_prices_for_grid = deque(maxlen=LIVE_LOOKBACK_WINDOW_GRID)

# --- Helper: Load your LSTMPredictor and DQN classes (copy from Cell 1) ---
# class LSTMPredictor(nn.Module): ... (already defined in your notebook)
# class DQN(nn.Module): ... (already defined in your notebook)
# class DQNAgent: ... (already defined in your notebook, we only need its model for acting)
# def create_grids(prices, num_grids=GRID_COUNT): ... (already defined)
# def get_binance_data(...): ... (already defined)

def initialize_live_trader():
    global binance_client, lstm_predictor_live, dqn_agent_live, min_max_scaler_live
    global current_grid_levels, current_usdt_balance, current_btc_held

    # 1. Initialize Binance Client (Testnet)

    api_key = 'OrR1dfYW9wa8pjjgfOrk5XhdICHZEwFvsKSCgmUgyGrFu7YMfvR2WBhbTMELu858'
    api_secret = 'JOENNjZJ7PWeVp6GbQBMcBJC95VzjlASitaeSVol1jIzjmIpnLXUWsganegbQZVE'
    if not api_key or not api_secret:
        print("FATAL: Binance Testnet API keys not found in environment variables.")
        return False
    binance_client = Client(api_key, api_secret, testnet=True)
    try:
        binance_client.ping()
        print("Binance Testnet client initialized and connected.")
    except Exception as e:
        print(f"Failed to connect to Binance Testnet: {e}")
        return False

    # 2. Load Scaler
    try:
        min_max_scaler_live = joblib.load('min_max_scaler.joblib')
        print("MinMaxScaler loaded successfully.")
    except FileNotFoundError:
        print("FATAL: min_max_scaler.joblib not found. Train model first to generate it.")
        return False
    except Exception as e:
        print(f"Error loading scaler: {e}")
        return False

    # 3. Load LSTM Predictor
    lstm_predictor_live = LSTMPredictor(input_size=LSTM_INPUT_FEATURES).to(device)
    try:
        lstm_predictor_live.load_state_dict(torch.load('lstm_predictor_fixed_grid.pth', map_location=device))
        lstm_predictor_live.eval()
        print("LSTM model loaded successfully.")
    except FileNotFoundError:
        print("FATAL: lstm_predictor_fixed_grid.pth not found.")
        return False
    except Exception as e:
        print(f"Error loading LSTM model: {e}")
        return False

    # 4. Load DQN Agent's Model
    # We need the DQNAgent class definition if not already available globally
    # For simplicity, we'll load the state_dict into a DQN model instance
    dqn_model_live = DQN(DQN_STATE_SIZE, ACTION_SIZE).to(device)
    try:
        # Assuming dqn_agent_fixed_grid.pth saves the model.state_dict()
        dqn_model_live.load_state_dict(torch.load('dqn_agent_fixed_grid.pth', map_location=device))
        dqn_model_live.eval()
        # Wrap it in a simple structure for the .act() method or use dqn_model_live directly
        class TempDQNAgent:
            def __init__(self, model):
                self.model = model
                self.action_size = model.fc3.out_features # Infer action_size
                self.epsilon = 0.0 # No exploration in live trading

            def act(self, state_tensor): # Expects a tensor
                if np.random.rand() <= self.epsilon:
                    return random.randrange(self.action_size)
                with torch.no_grad():
                    q_values = self.model(state_tensor)
                return torch.argmax(q_values).item()

        dqn_agent_live = TempDQNAgent(dqn_model_live)
        print("DQN model loaded successfully.")

    except FileNotFoundError:
        print("FATAL: dqn_agent_fixed_grid.pth not found.")
        return False
    except Exception as e:
        print(f"Error loading DQN model: {e}")
        return False

    # 5. Initialize Grids (fetch some recent data to base them on)
    print("Fetching initial data for grid setup...")
    initial_price_data = get_binance_data(symbol=TICKER, interval=Client.KLINE_INTERVAL_1MINUTE, lookback='200')
    if not initial_price_data.empty and len(initial_price_data) > 20:
        current_grid_levels = create_grids(initial_price_data['close'].values, GRID_COUNT)
        print(f"Initial grid levels created: {np.around(current_grid_levels[:5], 2)}... (first 5)")
        # Populate recent prices for adaptive grid
        for price in initial_price_data['close'].values[-(LIVE_LOOKBACK_WINDOW_GRID//2):]: # prime part of it
            live_recent_prices_for_grid.append(price)
    else:
        print("Could not fetch enough initial data for grid setup. Using default wide grids.")
        # Fallback grids if data fetch fails, adjust as needed
        avg_price_guess = 60000
        current_grid_levels = np.linspace(avg_price_guess * 0.8, avg_price_guess * 1.2, GRID_COUNT + 1)[1:-1]

    # Fetch initial balance from Testnet (optional, or use hardcoded start)
    # For this example, we use the hardcoded current_usdt_balance and current_btc_held
    print(f"Initial live trading balance: USDT: {current_usdt_balance}, BTC: {current_btc_held}")

    return True


def recalculate_grids_live(prices_list):
    """Recalculate grids based on recent price window for live trading."""
    if not prices_list or len(prices_list) < 50: # Need enough data
        return current_grid_levels # Return existing if not enough data
    
    recent_prices = np.array(prices_list)
    low = np.percentile(recent_prices, 10)
    high = np.percentile(recent_prices, 90)
    
    if high <= low: # Safety check
        print("Warning: Grid recalculation - high <= low. Widening range.")
        high = low * 1.05 # Ensure some spread
        if high == low : high = low + 100 # if low is 0 or very small

    new_grids = np.linspace(low, high, num=GRID_COUNT + 1)[1:-1]
    if len(new_grids) == 0: # another safety
        print("Warning: Grid recalculation resulted in empty grids. Keeping old ones.")
        return current_grid_levels
    return new_grids


def run_live_trading_cycle():
    global current_usdt_balance, current_btc_held, current_grid_levels
    global live_steps_since_recal, live_recent_prices_for_grid
    global lstm_predictor_live, dqn_agent_live, min_max_scaler_live, binance_client

    print(f"\n[{time.ctime()}] --- Running Live Trading Cycle ---")

    # 1. Fetch Market Data
    # Need SEQ_LENGTH for LSTM input + some buffer for feature calculation (e.g., rolling volatility)
    # The number of features for scaling is 7: 'open', 'high', 'low', 'close', 'volume', 'Returns', 'Volatility'
    required_klines = SEQ_LENGTH + 20 # 20 for rolling window of volatility
    df_live_raw = get_binance_data(symbol=TICKER, interval=Client.KLINE_INTERVAL_1MINUTE, lookback=str(required_klines))

    if df_live_raw.empty or len(df_live_raw) < required_klines:
        print("Not enough live data fetched. Skipping cycle.")
        return

    current_price = df_live_raw['close'].iloc[-1]
    live_recent_prices_for_grid.append(current_price)
    print(f"Current Price ({TICKER}): {current_price:.2f}")

    # 2. Preprocess Data for LSTM
    df_features = df_live_raw[['open', 'high', 'low', 'close', 'volume']].copy()
    df_features['Returns'] = df_features['close'].pct_change()
    df_features['Volatility'] = df_features['Returns'].rolling(window=20).std() # Match training
    df_features.dropna(inplace=True) # Remove NaNs from rolling calculations

    if len(df_features) < SEQ_LENGTH:
        print("Not enough data after feature engineering. Skipping cycle.")
        return

    # Select the exact features used for scaling during training
    features_to_scale = df_features[['open', 'high', 'low', 'close', 'volume', 'Returns', 'Volatility']]
    scaled_live_features = min_max_scaler_live.transform(features_to_scale)
    
    lstm_input_sequence = scaled_live_features[-SEQ_LENGTH:] # Shape: (SEQ_LENGTH, LSTM_INPUT_FEATURES)
    lstm_input_tensor = torch.tensor(lstm_input_sequence[np.newaxis, ...], dtype=torch.float32).to(device)

    # 3. Get LSTM Prediction
    with torch.no_grad():
        # Assuming LSTM predicts the next closing price (actual value, not scaled)
        # This needs to match how LSTM was trained and how its output was used in env.get_state()
        lstm_predicted_price = lstm_predictor_live(lstm_input_tensor).cpu().item()
    
    # The state uses prediction / price.
    # If LSTM output is already a price, this is fine.
    # If LSTM output is scaled, it needs to be inverse_transformed first.
    # Your LSTMPredictor outputs a single value, assumed to be the price prediction.
    normalized_lstm_prediction = lstm_predicted_price / current_price if current_price != 0 else 0
    print(f"LSTM Predicted Price: {lstm_predicted_price:.2f}, Normalized for state: {normalized_lstm_prediction:.4f}")

    # 4. Construct DQN State
    # Last tick of scaled data for the state
    current_scaled_market_data_tick = lstm_input_sequence[-1] 
    
    # Grid position
    if len(current_grid_levels) == 0: # Safety for uninitialized grids
        grid_position_live = 0.5 # Default to middle
    else:
        grid_position_live = np.digitize(current_price, current_grid_levels) / len(current_grid_levels) # Normalized
        grid_position_live = np.clip(grid_position_live, 0, 1) # Ensure it's within [0,1]


    # Normalize balance and holdings for the state
    # Using INITIAL_BALANCE_REF as the denominator for normalization consistency
    norm_balance = current_usdt_balance / INITIAL_BALANCE_REF
    norm_btc_value = (current_btc_held * current_price) / INITIAL_BALANCE_REF

    live_state_np = np.array([
        *current_scaled_market_data_tick,
        norm_balance,
        norm_btc_value,
        grid_position_live,
        normalized_lstm_prediction
    ], dtype=np.float32)

    if len(live_state_np) != DQN_STATE_SIZE:
        print(f"FATAL: Constructed state size {len(live_state_np)} != DQN_STATE_SIZE {DQN_STATE_SIZE}. Mismatch in features.")
        return

    live_state_tensor = torch.tensor(live_state_np, dtype=torch.float32).unsqueeze(0).to(device)

    # 5. Get DQN Action
    action = dqn_agent_live.act(live_state_tensor) # 0: hold, 1: buy, 2: sell
    action_map = {0: "HOLD", 1: "BUY", 2: "SELL"}
    print(f"DQN Action: {action_map[action]}")

    # 6. Execute Trade
    if action == 1:  # Buy
        # Logic adapted from your TradingEnvironment.step()
        # Buy more when price is near lower grids (grid_position_live close to 0)
        buy_multiplier = max(0.1, 1 - grid_position_live)
        # Example: allocate a percentage of balance based on multiplier, e.g., 5% of balance * multiplier
        # This is a simplification; your original code might have more complex sizing.
        # Ensure this is a dynamic part of your strategy.
        # Let's say we decide to use up to 10% of available USDT balance for a trade, scaled by multiplier
        max_spend_usd = current_usdt_balance * 0.10 
        amount_to_spend_usd = max_spend_usd * buy_multiplier
        
        if amount_to_spend_usd >= MIN_TRADE_AMOUNT_USD and current_usdt_balance >= amount_to_spend_usd :
            print(f"Attempting BUY: Spend {amount_to_spend_usd:.2f} USDT for {TICKER} at ~{current_price:.2f}")
            try:
                order = binance_client.create_order( # Use create_order for real testnet trades
                    symbol=TICKER,
                    side=Client.SIDE_BUY,
                    type=ORDER_TYPE,
                    quoteOrderQty=f"{amount_to_spend_usd:.8f}") # Binance API needs string for precision
                print(f"BUY Test Order successful: {order}")
                # For real orders, you'd update balance after confirmation:
                # current_usdt_balance -= actual_cost_from_order_fills
                # current_btc_held += actual_btc_bought_from_order_fills
            except BinanceAPIException as e:
                print(f"Binance API Exception (BUY): {e}")
            except Exception as e:
                print(f"Error executing BUY order: {e}")
        else:
            print(f"BUY condition not met: Spend amount {amount_to_spend_usd:.2f} < min OR insufficient balance.")

    elif action == 2:  # Sell
        # Sell more when price is near upper grids (grid_position_live close to 1)
        sell_multiplier = max(0.1, grid_position_live)
        # Example: sell a percentage of BTC holdings, e.g., 10% of holdings * multiplier
        max_sell_btc = current_btc_held * 0.10 
        amount_to_sell_btc = max_sell_btc * sell_multiplier
        
        value_of_btc_to_sell = amount_to_sell_btc * current_price
        if current_btc_held > 0 and amount_to_sell_btc > 0 and value_of_btc_to_sell >= MIN_TRADE_AMOUNT_USD:
             # Ensure quantity meets Binance minimums (e.g. for BTC often 0.00001, check symbol info)
            min_btc_qty_binance = 0.00001 # Example, fetch dynamically for robustness
            if amount_to_sell_btc < min_btc_qty_binance:
                print(f"SELL quantity {amount_to_sell_btc:.8f} BTC is below Binance minimum {min_btc_qty_binance:.8f} BTC.")
            else:
                print(f"Attempting SELL: {amount_to_sell_btc:.8f} {TICKER} at ~{current_price:.2f}")
                try:
                    order = binance_client.create_order( # Use create_order for real testnet trades
                        symbol=TICKER,
                        side=Client.SIDE_SELL,
                        type=ORDER_TYPE,
                        quantity=f"{amount_to_sell_btc:.8f}") # Binance API needs string for precision
                    print(f"SELL Test Order successful: {order}")
                    # For real orders, update balance:
                    # current_usdt_balance += actual_revenue_from_order_fills
                    # current_btc_held -= actual_btc_sold_from_order_fills
                except BinanceAPIException as e:
                    print(f"Binance API Exception (SELL): {e}")
                except Exception as e:
                    print(f"Error executing SELL order: {e}")
        else:
            print(f"SELL condition not met: Sell amount {amount_to_sell_btc:.8f} BTC (value {value_of_btc_to_sell:.2f} USD) too small or no BTC held.")
    
    print(f"Current Portfolio: USDT: {current_usdt_balance:.2f}, BTC: {current_btc_held:.8f}, Value: ~{(current_usdt_balance + current_btc_held * current_price):.2f} USDT")

    # 7. Adapt Grids Periodically
    live_steps_since_recal += 1
    if live_steps_since_recal >= LIVE_RECALIBRATION_INTERVAL:
        if len(live_recent_prices_for_grid) >= LIVE_LOOKBACK_WINDOW_GRID:
            print("Recalculating grid levels...")
            new_grids = recalculate_grids_live(list(live_recent_prices_for_grid))
            if not np.array_equal(new_grids, current_grid_levels) and len(new_grids) > 0:
                 current_grid_levels = new_grids
                 print(f"Grid levels adapted. New first 5: {np.around(current_grid_levels[:5], 2)}...")
            else:
                print("Grid levels unchanged or recalculation failed.")
            live_steps_since_recal = 0
        else:
            print(f"Not enough data in live_recent_prices_for_grid ({len(live_recent_prices_for_grid)}/{LIVE_LOOKBACK_WINDOW_GRID}) to adapt grids yet.")


def start_live_trading_bot():
    print("Initializing live trading bot...")
    if not initialize_live_trader():
        print("Bot initialization failed. Exiting.")
        return

    print("Live trading bot initialized. Starting 1-minute trading cycle.")
    print("Using TEST orders. For actual Testnet trades, change create_test_order to create_order.")
    print("Press Ctrl+C to stop the bot.")

    # Run the first cycle immediately
    run_live_trading_cycle() 
    
    # Schedule the job
    schedule.every(1).minutes.do(run_live_trading_cycle)

    while True:
        try:
            schedule.run_pending()
            time.sleep(1)
        except KeyboardInterrupt:
            print("\nStopping live trading bot...")
            break
        except Exception as e:
            print(f"An error occurred in the main loop: {e}")
            time.sleep(60) # Wait a bit before retrying if a major error occurs

# To run the bot:
start_live_trading_bot()


In [None]:
from binance.client import Client

api_key = 'OrR1dfYW9wa8pjjgfOrk5XhdICHZEwFvsKSCgmUgyGrFu7YMfvR2WBhbTMELu858'
api_secret = 'JOENNjZJ7PWeVp6GbQBMcBJC95VzjlASitaeSVol1jIzjmIpnLXUWsganegbQZVE'
client = Client(api_key, api_secret)
client.API_URL = 'https://testnet.binance.vision/api'

# Get current market price for BTC/USDT
ticker = client.get_symbol_ticker(symbol='BTCUSDT')
market_price = float(ticker['price'])

print(f"Current BTC/USDT Market Price: {market_price}")


In [None]:
usdt_to_spend = 10  # for example
quantity_to_buy = round(usdt_to_spend / market_price, 6)  # 6 decimal precision
print(f"Quantity of BTC to buy with ${usdt_to_spend}: {quantity_to_buy}")


In [None]:
client.order_market_buy(symbol='BTCUSDT', quantity=0.001)
client.order_market_sell(symbol='BTCUSDT', quantity=0.001)

In [None]:
client.order_market_buy(symbol='BTCUSDT', quoteOrderQty=100)
client.order_market_sell(symbol='BTCUSDT', quoteOrderQty=100)