In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import warnings
import asyncio
import ccxt.async_support as ccxt  # Use asynchronous version
from backtesting import Backtest, Strategy
from backtesting.lib import resample_apply
from colorama import Fore, Style
from tqdm.notebook import tqdm
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from collections import deque
import random  # Needed for random sampling in agent's replay memory

# Suppress warnings and logs
warnings.filterwarnings("ignore")
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Force TensorFlow to use CPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Ensure plots display correctly in Jupyter notebooks
%matplotlib inline

# Allow nested event loops in Jupyter
import nest_asyncio
nest_asyncio.apply()

# -------------------------------------
# 1. Asynchronous Data Fetching Functions
# -------------------------------------

async def fetch_exchange_data(symbol, timeframe, start_date, end_date, limit=1000):
    exchange = ccxt.bybit({
        'enableRateLimit': True,  # Enable rate limiting
    })
    await exchange.load_markets()
    all_ohlcv_data = []
    since = exchange.parse8601(start_date + 'T00:00:00Z')
    end_timestamp = exchange.parse8601(end_date + 'T23:59:59Z')

    timeframe_ms = timeframe_to_milliseconds(timeframe)

    with tqdm(total=1, desc=f"Fetching data for {symbol}", leave=False) as pbar:
        while since < end_timestamp:
            try:
                ohlcv_data = await exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
                if not ohlcv_data:
                    break
                all_ohlcv_data.extend(ohlcv_data)
                since = ohlcv_data[-1][0] + timeframe_ms
                await asyncio.sleep(exchange.rateLimit / 1000)
            except Exception:
                break
            pbar.update(len(ohlcv_data) / limit)
        pbar.close()

    await exchange.close()

    if not all_ohlcv_data:
        return pd.DataFrame()  # Return empty DataFrame if no data

    df = pd.DataFrame(all_ohlcv_data, columns=['timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
    df['Datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('Datetime', inplace=True)
    df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    return df

def timeframe_to_milliseconds(timeframe):
    """
    Convert a timeframe string to milliseconds.
    """
    unit = timeframe[-1]
    value = int(timeframe[:-1])
    if unit == 'm':
        return value * 60 * 1000
    elif unit == 'h':
        return value * 60 * 60 * 1000
    elif unit == 'd':
        return value * 24 * 60 * 60 * 1000
    elif unit == 'w':
        return value * 7 * 24 * 60 * 60 * 1000
    elif unit == 'M':
        return value * 30 * 24 * 60 * 60 * 1000  # Approximation
    else:
        raise ValueError(f"Unknown timeframe unit: {unit}")

async def fetch_all_data(symbols, timeframe, start_date, end_date):
    tasks = []
    for symbol in symbols:
        task = fetch_exchange_data(symbol, timeframe, start_date, end_date)
        tasks.append(task)
    data_list = await asyncio.gather(*tasks)
    # Map symbols to their respective DataFrames
    return dict(zip(symbols, data_list))

# -------------------------------------
# 2. Calculating Volume Profile
# -------------------------------------

def calculate_volume_profile(data):
    """
    Calculate Volume Area High (VAH), Value Area Low (VAL), and Point of Control (POC) for each day.
    """
    profile = pd.DataFrame(index=data.resample('1D').mean().index)

    def calculate_vah_val_poc(df):
        if df.empty or df['Volume'].sum() == 0:
            return np.nan, np.nan, np.nan

        total_volume = df['Volume'].sum()
        price_sorted = df.sort_values(by='Close').copy()
        price_sorted['CumVolume'] = price_sorted['Volume'].cumsum()

        # Point of Control (POC): Price with the highest volume
        poc_index = df['Volume'].idxmax()
        poc_price = df.loc[poc_index, 'Close']

        # Value Area High (VAH): Price where cumulative volume >= 68% of total volume
        try:
            vah_idx = price_sorted[price_sorted['CumVolume'] >= total_volume * 0.68].index[0]
            vah_price = df.loc[vah_idx, 'High']
        except IndexError:
            vah_price = np.nan

        # Value Area Low (VAL): Price where cumulative volume <= 32% of total volume
        try:
            val_idx = price_sorted[price_sorted['CumVolume'] <= total_volume * 0.32].index[-1]
            val_price = df.loc[val_idx, 'Low']
        except IndexError:
            val_price = np.nan

        return vah_price, val_price, poc_price

    # Apply the function to each day
    result = data.groupby(pd.Grouper(freq='1D')).apply(calculate_vah_val_poc)
    profile[['VAH', 'VAL', 'POC']] = pd.DataFrame(result.tolist(), index=profile.index)

    return profile

# -------------------------------------
# 3. Reinforcement Learning Agent
# -------------------------------------

class RLAgent:
    def __init__(self, state_size, action_size, hu=16, lr=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=1000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model(hu, lr)

    def _build_model(self, hu, lr):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(hu, input_dim=self.state_size, activation='relu'))
        model.add(Dense(hu, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=lr))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, training=True):
        # Epsilon-greedy action selection
        if training and np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size=16):
        # Experience replay to train the model
        minibatch = random.sample(self.memory, min(len(self.memory), batch_size))
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        # Decrease exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

# -------------------------------------
# 4. Training Function for the RL Agent
# -------------------------------------

def train_agent(agent, data):
    print(f"{Fore.GREEN}Training the RL agent...{Style.RESET_ALL}")
    episodes = 1  # Single pass over the data
    batch_size = 16
    data_length = len(data)
    training_data = data.copy()

    # Reset the environment
    position_flag = 0  # 1 if in position, 0 otherwise
    last_price = None
    total_rewards = []

    for e in range(episodes):
        state = None
        total_reward = 0
        data_iter = tqdm(range(data_length - 1), desc=f"Episode {e+1}/{episodes}", leave=False)
        for idx in data_iter:
            # Construct state
            current_row = training_data.iloc[idx]
            next_row = training_data.iloc[idx + 1]
            current_price = current_row['Close']
            vah = current_row['VAH']
            val = current_row['VAL']
            poc = current_row['POC']

            if np.isnan(vah) or np.isnan(val) or np.isnan(poc):
                continue  # Skip if data is not available

            state = np.array([[current_price, vah, val, poc, position_flag]])
            state = state / state[0][0]  # Normalize by current price

            # Decide action
            action = agent.act(state, training=True)

            # Map action to trading decision
            reward = 0
            if action == 1 and position_flag == 0:
                # Buy action
                position_flag = 1
                last_price = current_price
            elif action == 2 and position_flag == 1:
                # Sell action
                reward = (current_price - last_price) / last_price
                position_flag = 0
                last_price = None
            elif position_flag == 1 and last_price:
                # Hold position
                reward = (current_price - last_price) / last_price

            # Construct next state
            next_price = next_row['Close']
            vah_next = next_row['VAH']
            val_next = next_row['VAL']
            poc_next = next_row['POC']
            next_state = np.array([[next_price, vah_next, val_next, poc_next, position_flag]])
            next_state = next_state / next_state[0][0]

            done = idx == data_length - 2  # Last data point

            agent.remember(state, action, reward, next_state, done)
            total_reward += reward

            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

            if done:
                break

            data_iter.set_postfix({'Total Reward': f'{total_reward:.4f}', 'Epsilon': f'{agent.epsilon:.4f}'})
        data_iter.close()
        total_rewards.append(total_reward)
        print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward:.4f}, Epsilon: {agent.epsilon:.4f}")

    print(f"{Fore.GREEN}Training completed.{Style.RESET_ALL}")

# -------------------------------------
# 5. Custom Strategy Class for Testing
# -------------------------------------

class TestRLStrategy(Strategy):
    def init(self):
        # Initialize the trained RL agent
        self.state_size = 5  # [Close, VAH, VAL, POC, Position]
        self.action_size = 3  # [Hold, Buy, Sell]
        self.agent = self._load_agent()

        self.position_flag = 0  # 1 if in position, 0 otherwise
        self.last_price = None

        # Prepare data
        self.data.VAH = self.data.VAH.ffill()
        self.data.VAL = self.data.VAL.ffill()
        self.data.POC = self.data.POC.ffill()

    def _load_agent(self):
        # Build the same architecture as during training
        agent = RLAgent(state_size=self.state_size, action_size=self.action_size, hu=16)
        # Load the trained weights
        agent.load('rl_agent_weights.h5')
        return agent

    def next(self):
        # Construct state
        current_price = self.data.Close[-1]
        vah = self.data.VAH[-1]
        val = self.data.VAL[-1]
        poc = self.data.POC[-1]

        if np.isnan(vah) or np.isnan(val) or np.isnan(poc):
            return  # Skip if data is not available

        state = np.array([[current_price, vah, val, poc, self.position_flag]])
        state = state / state[0][0]  # Normalize by current price

        # Decide action
        action = self.agent.act(state, training=False)

        # Map action to trading decision
        if action == 1 and not self.position_flag:
            # Buy action
            self.buy()
            self.position_flag = 1
            self.last_price = current_price
        elif action == 2 and self.position_flag:
            # Sell action
            self.position.close()
            self.position_flag = 0
            self.last_price = None

    def on_trade(self, trade):
        # Update position flag on trade execution
        if trade.is_closed:
            self.position_flag = 0
            self.last_price = None

# -------------------------------------
# 6. Main function to run everything
# -------------------------------------

async def main():
    SYMBOLS = ['SOL/USDT']  # Adjust symbols as needed
    timeframe = '1m'  # Adjust timeframe as needed
    # Adjusted dates within knowledge cutoff
    end_date = '2023-10-31'
    start_date = (dt.datetime.strptime(end_date, '%Y-%m-%d') - dt.timedelta(weeks=2)).strftime('%Y-%m-%d')

    print(f"{Fore.BLUE}Fetching data from {start_date} to {end_date} on a {timeframe} timeframe...{Style.RESET_ALL}")
    data_dict = await fetch_all_data(SYMBOLS, timeframe, start_date, end_date)

    symbol = SYMBOLS[0]
    data = data_dict[symbol]

    if data.empty:
        print(f"{Fore.RED}No data fetched for {symbol}. Exiting.{Style.RESET_ALL}")
        return

    print(f"{Fore.BLUE}Calculating volume profile for {symbol}...{Style.RESET_ALL}")
    profile_data = calculate_volume_profile(data)
    data[['VAH', 'VAL', 'POC']] = profile_data[['VAH', 'VAL', 'POC']]
    data.ffill(inplace=True)

    # Drop rows with NaN values
    data.dropna(inplace=True)

    # Split data into training and testing sets (80% training, 20% testing)
    split_index = int(len(data) * 0.8)
    training_data = data.iloc[:split_index]
    testing_data = data.iloc[split_index:]

    # Build and train the agent using training data
    state_size = 5  # [Close, VAH, VAL, POC, Position]
    action_size = 3  # [Hold, Buy, Sell]
    agent = RLAgent(state_size=state_size, action_size=action_size, hu=16)
    train_agent(agent, training_data)

    # Save the trained agent
    agent.save('rl_agent_weights.h5')

    # Prepare testing data for backtesting.py
    bt_data = testing_data.copy()
    bt_data.index.name = 'Datetime'
    bt_data = bt_data[['Open', 'High', 'Low', 'Close', 'Volume', 'VAH', 'VAL', 'POC']]

    # Run backtest with TestRLStrategy
    bt = Backtest(
        bt_data,
        TestRLStrategy,
        cash=100000,
        commission=.002,
        exclusive_orders=True,
        margin=0.1
    )

    print(f"{Fore.GREEN}Running backtest on testing data...{Style.RESET_ALL}")
    stats = bt.run()

    # Print the backtest results
    print(stats)

    # Plot the backtest results
    bt.plot()

# Since we are in a Jupyter notebook, we can directly await the main function
await main()