# Base Model✅

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Check CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

class DataHandler:
    def __init__(self):
        self.scaler = MinMaxScaler()
    
    def fetch_data(self, symbol, start_date, end_date):
        print(f"Fetching data for {symbol}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        try:
            # Create copy to avoid SettingWithCopyWarning
            df = df.copy()
            
            # Convert to 1D numpy arrays
            close_prices = df['Close'].values.astype(float).flatten()
            high_prices = df['High'].values.astype(float).flatten()
            low_prices = df['Low'].values.astype(float).flatten()
            volume = df['Volume'].values.astype(float).flatten()
            
            # Technical Indicators
            df['Returns'] = df['Close'].pct_change()
            df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
            df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
            df['RSI'] = talib.RSI(close_prices, timeperiod=14)
            
            # MACD
            macd, signal, _ = talib.MACD(close_prices)
            df['MACD'] = macd
            df['MACD_signal'] = signal
            
            # Bollinger Bands
            bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
            df['BB_upper'] = bb_upper
            df['BB_middle'] = bb_middle
            df['BB_lower'] = bb_lower
            
            # Volume indicators
            df['OBV'] = talib.OBV(close_prices, volume)
            
            # Momentum
            df['MOM'] = talib.MOM(close_prices, timeperiod=14)
            
            # Handle NaN values and normalize
            features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                       'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 
                       'BB_lower', 'OBV', 'MOM']
            
            df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
            df[features] = self.scaler.fit_transform(df[features])
            
            print(f"Data shape after preparation: {df.shape}")
            return df.dropna()
            
        except Exception as e:
            print(f"Error in prepare_data: {str(e)}")
            raise

class TradingEnvironment:
    def __init__(self, data, initial_balance=100000):
        if data is None or data.empty:
            raise ValueError("Data cannot be empty")
            
        self.data = data
        self.initial_balance = initial_balance
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI',
                        'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                        'BB_lower', 'OBV', 'MOM']
        
        if not all(f in self.data.columns for f in self.features):
            missing = [f for f in self.features if f not in self.data.columns]
            raise ValueError(f"Missing features in data: {missing}")
            
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Get current price as float
        current_price = float(self.data.iloc[self.current_step]['Close'])
        
        # Store previous portfolio value for reward calculation
        prev_portfolio_value = self.portfolio_value_history[-1]
        
        # Execute trade with zero price protection
        if current_price > 0:  # Only execute trades if price is valid
            if action == 0 and self.position > 0:  # Sell
                self.balance += current_price * self.position
                self.position = 0
            elif action == 2 and self.position == 0:  # Buy
                shares = self.balance / current_price
                self.position = shares
                self.balance = 0
        
        # Move to next step
        self.current_step += 1
        
        # Calculate new portfolio value
        portfolio_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(portfolio_value)
        
        # Calculate reward with safe division
        if prev_portfolio_value > 0:
            reward = (portfolio_value - prev_portfolio_value) / prev_portfolio_value
        else:
            reward = 0
        
        # Check if done
        done = self.current_step >= len(self.data) - 1
        
        return self._get_state(), reward, done

class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32
        
        # Neural network layers
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        
        self.optimizer = optim.Adam(self.parameters())
        self.criterion = nn.MSELoss()
        self.to(device)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        
        with torch.no_grad():
            state = torch.FloatTensor(state).unsqueeze(0).to(device)
            act_values = self(state)
            return torch.argmax(act_values).item()
    
    def replay(self):
        if len(self.memory) < self.batch_size:
            return 0
        
        minibatch = random.sample(self.memory, self.batch_size)
        
        states = torch.FloatTensor([i[0] for i in minibatch]).to(device)
        actions = torch.LongTensor([i[1] for i in minibatch]).to(device)
        rewards = torch.FloatTensor([i[2] for i in minibatch]).to(device)
        next_states = torch.FloatTensor([i[3] for i in minibatch]).to(device)
        dones = torch.FloatTensor([i[4] for i in minibatch]).to(device)
        
        # Current Q values
        current_q_values = self(states).gather(1, actions.unsqueeze(1))
        
        # Next Q values
        with torch.no_grad():
            next_q_values = self(next_states).max(1)[0]
            target_q_values = rewards + (1 - dones) * self.gamma * next_q_values
        
        # Compute loss and update
        loss = self.criterion(current_q_values.squeeze(), target_q_values)
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        # Update epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
        return loss.item()

def test_agent(agent, test_data, initial_balance=10000):
    class TestEnvironment(TradingEnvironment):
        def __init__(self, data, initial_balance):
            super().__init__(data, initial_balance)
    
    # Create test environment
    test_env = TestEnvironment(test_data, initial_balance)
    
    # Disable exploration during testing
    original_epsilon = agent.epsilon
    agent.epsilon = 0
    
    # Run test episode
    state = test_env.reset()
    done = False
    actions_taken = []
    
    while not done:
        with torch.no_grad():
            action = agent.act(state)
            actions_taken.append(action)
            state, reward, done = test_env.step(action)
    
    # Restore original epsilon
    agent.epsilon = original_epsilon
    
    return test_env.portfolio_value_history, actions_taken

def plot_results(train_values, test_values, training_losses):
    fig = go.Figure()

    # Plot portfolio values
    fig.add_trace(go.Scatter(
        x=list(range(len(train_values))),
        y=train_values,
        mode='lines',
        name='Training Portfolio'
    ))

    fig.add_trace(go.Scatter(
        x=list(range(len(train_values), len(train_values) + len(test_values))),
        y=test_values,
        mode='lines',
        name='Testing Portfolio'
    ))

    fig.update_layout(
        title='Portfolio Value Over Time',
        xaxis_title='Trading Days',
        yaxis_title='Portfolio Value ($)',
        legend_title='Legend'
    )

    # Plot training loss
    fig.add_trace(go.Scatter(
        x=list(range(len(training_losses))),
        y=training_losses,
        mode='lines',
        name='Training Loss',
        yaxis='y2'
    ))

    fig.update_layout(
        yaxis2=dict(
            title='Loss',
            overlaying='y',
            side='right'
        ),
        title='Training Loss Over Episodes',
        xaxis_title='Episode',
        yaxis_title='Loss',
        legend_title='Legend'
    )

    fig.show()

def train_agent(env, agent, episodes):
    training_losses = []
    best_reward = float('-inf')
    
    for episode in tqdm(range(episodes), desc="Training"):
        state = env.reset()
        total_reward = 0
        episode_losses = []
        
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            
            agent.remember(state, action, reward, next_state, done)
            loss = agent.replay()
            
            if loss > 0:
                episode_losses.append(loss)
            
            state = next_state
            total_reward += reward
            
            if done:
                avg_loss = np.mean(episode_losses) if episode_losses else 0
                training_losses.append(avg_loss)
                
                if total_reward > best_reward:
                    best_reward = total_reward
                    torch.save(agent.state_dict(), 'best_model.pth')
                
                print(f"\nEpisode: {episode + 1}/{episodes}")
                print(f"Total Reward: {total_reward:.2f}")
                print(f"Average Loss: {avg_loss:.4f}")
                print(f"Epsilon: {agent.epsilon:.4f}")
                print(f"Final Portfolio Value: ${env.portfolio_value_history[-1]:.2f}")
                break
    
    return training_losses

if __name__ == "__main__":
    try:
        # Set random seeds for reproducibility
        torch.manual_seed(42)
        np.random.seed(42)
        random.seed(42)
        
        # Parameters
        SYMBOL = 'AAPL'
        START_DATE = '2020-01-01'
        END_DATE = '2024-02-11'
        EPISODES = 100
        
        print("Initializing trading simulation...")
        
        # Initialize components
        data_handler = DataHandler()
        full_data = data_handler.fetch_data(SYMBOL, START_DATE, END_DATE)
        train_size = int(len(full_data) * 0.6)
        val_size = int(len(full_data) * 0.2)

        train_data = full_data.iloc[:train_size]
        val_data = full_data.iloc[train_size:train_size+val_size]
        test_data = full_data.iloc[train_size+val_size:]    
        
        print("\nInitializing training environment...")
        env = TradingEnvironment(train_data)
        
        # Initialize agent
        state_size = len(env.features)
        action_size = 3  # sell, hold, buy
        agent = DQNAgent(state_size, action_size)
        
        print("\nStarting training...")
        print(f"State size: {state_size}")
        print(f"Action size: {action_size}")
        
        # Train agent
        training_losses = train_agent(env, agent, EPISODES)
        
        print("\nStarting testing phase...")
        test_portfolio_values, test_actions = test_agent(agent, test_data, initial_balance=10000)
        
        # Plot results
        plot_results(env.portfolio_value_history, test_portfolio_values, training_losses)
        
        # Print final results
        print("\nTraining Results:")
        print(f"Initial Balance: ${100000:.2f}")
        print(f"Final Training Portfolio Value: ${env.portfolio_value_history[-1]:.2f}")
        print(f"Training Return: {((env.portfolio_value_history[-1] / 100000) - 1) * 100:.2f}%")
        
        print("\nTesting Results:")
        print(f"Initial Balance: ${10000:.2f}")
        print(f"Final Testing Portfolio Value: ${test_portfolio_values[-1]:.2f}")
        print(f"Testing Return: {((test_portfolio_values[-1] / 10000) - 1) * 100:.2f}%")
        
        # Save final model
        torch.save({
            'model_state_dict': agent.state_dict(),
            'optimizer_state_dict': agent.optimizer.state_dict(),
            'training_losses': training_losses,
            'train_portfolio_values': env.portfolio_value_history,
            'test_portfolio_values': test_portfolio_values,
        }, 'final_model.pth')
        
    except Exception as e:
        print(f"\nError occurred: {str(e)}")
        raise

# Improved Version👍

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set device for computations
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------------
# DataHandler and Aggregation
# -------------------------
class DataHandler:
    def __init__(self):
        self.scaler = MinMaxScaler()
    
    def fetch_data(self, symbol, start_date, end_date):
        print(f"Fetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        try:
            df = df.copy()
            # Preserve original close price for portfolio calculations
            df['CloseOrig'] = df['Close'].astype(float)
            
            # Convert price and volume arrays
            close_prices = df['Close'].values.astype(float).flatten()
            high_prices = df['High'].values.astype(float).flatten()
            low_prices = df['Low'].values.astype(float).flatten()
            volume = df['Volume'].values.astype(float).flatten()
            
            # Calculate technical indicators
            df['Returns'] = df['Close'].pct_change()
            df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
            df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
            df['RSI'] = talib.RSI(close_prices, timeperiod=14)
            
            macd, signal, _ = talib.MACD(close_prices)
            df['MACD'] = macd
            df['MACD_signal'] = signal
            
            bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
            df['BB_upper'] = bb_upper
            df['BB_middle'] = bb_middle
            df['BB_lower'] = bb_lower
            
            df['OBV'] = talib.OBV(close_prices, volume)
            df['MOM'] = talib.MOM(close_prices, timeperiod=14)
            
            # Features to be used as input; note: we do not normalize the original price
            features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                        'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 
                        'BB_lower', 'OBV', 'MOM']
            
            # Fill NA values
            df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
            # Normalize only the features (not the CloseOrig)
            df[features] = self.scaler.fit_transform(df[features])
            
            print(f"Data shape after preparation: {df.shape}")
            return df.dropna()
        except Exception as e:
            print(f"Error in prepare_data: {str(e)}")
            raise

    def fetch_multiple_data(self, symbols, start_date, end_date):
        data_dict = {}
        for symbol in symbols:
            data_dict[symbol] = self.fetch_data(symbol, start_date, end_date)
        return data_dict

def aggregate_data(data_dict):
    """
    Aggregates the data across multiple symbols by taking the average of 
    features over the common date range.
    """
    common_index = None
    for df in data_dict.values():
        if common_index is None:
            common_index = df.index
        else:
            common_index = common_index.intersection(df.index)
    
    reindexed = []
    for symbol, df in data_dict.items():
        reindexed.append(df.loc[common_index])
    # Create a multi-index DataFrame (first level: symbol, second: feature)
    concatenated = pd.concat(reindexed, axis=1, keys=data_dict.keys())
    # Average over the symbols (grouping by the feature level)
    aggregated = concatenated.groupby(axis=1, level=1).mean()
    return aggregated

# -------------------------
# Trading Environment
# -------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=100000):
        if data is None or data.empty:
            raise ValueError("Data cannot be empty")
        # Retain the original date index for testing visualization.
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI',
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        if 'CloseOrig' not in data.columns:
            raise ValueError("Missing 'CloseOrig' column for price calculations")
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # Number of shares held
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Current price comes from the raw close
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]
        
        # Action space (5 discrete actions):
        # 0: Sell all position
        # 1: Sell half of position
        # 2: Hold (do nothing)
        # 3: Buy using 50% of available cash
        # 4: Buy using full available cash
        if current_price > 0:
            if action == 0 and self.position > 0:  # Sell all
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:  # Sell half
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:  # Buy with 50% cash
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= current_price * shares_to_buy
            elif action == 4 and self.balance > 0:  # Buy with full cash
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
            # Action 2 is hold (do nothing)
        
        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0
        
        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done

# -------------------------
# DQN Agent
# -------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32
        
        # Neural network layers
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            act_values = self(state_tensor)
            return torch.argmax(act_values).item()
    
    def replay(self):
        if len(self.memory) < self.batch_size:
            return 0
        minibatch = random.sample(self.memory, self.batch_size)
        states = torch.FloatTensor([i[0] for i in minibatch]).to(device)
        actions = torch.LongTensor([i[1] for i in minibatch]).to(device)
        rewards = torch.FloatTensor([i[2] for i in minibatch]).to(device)
        next_states = torch.FloatTensor([i[3] for i in minibatch]).to(device)
        dones = torch.FloatTensor([i[4] for i in minibatch]).to(device)
        
        current_q = self(states).gather(1, actions.unsqueeze(1)).squeeze()
        with torch.no_grad():
            next_q = self(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * self.gamma * next_q
        
        loss = self.criterion(current_q, target_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
        return loss.item()

# -------------------------
# Training and Testing Functions
# -------------------------
def train_agent(env, agent, episodes):
    training_losses = []
    best_reward = float('-inf')
    
    for episode in tqdm(range(episodes), desc="Training"):
        state = env.reset()
        total_reward = 0
        episode_losses = []
        
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            loss = agent.replay()
            if loss > 0:
                episode_losses.append(loss)
            state = next_state
            total_reward += reward
            
            if done:
                avg_loss = np.mean(episode_losses) if episode_losses else 0
                training_losses.append(avg_loss)
                if total_reward > best_reward:
                    best_reward = total_reward
                    torch.save(agent.state_dict(), 'best_model.pth')
                print(f"\nEpisode: {episode+1}/{episodes} | Total Reward: {total_reward:.2f} | Avg Loss: {avg_loss:.4f} | Epsilon: {agent.epsilon:.4f} | Final Portfolio: ${env.portfolio_value_history[-1]:.2f}")
                break
    return training_losses

def test_agent(agent, test_data, initial_balance=10000):
    # Create a test environment subclassing TradingEnvironment
    class TestEnvironment(TradingEnvironment):
        def __init__(self, data, initial_balance):
            super().__init__(data, initial_balance)
    
    test_env = TestEnvironment(test_data, initial_balance)
    original_epsilon = agent.epsilon
    agent.epsilon = 0  # disable exploration for test
    state = test_env.reset()
    actions_taken = []
    
    done = False
    while not done:
        action = agent.act(state)
        actions_taken.append(action)
        state, reward, done = test_env.step(action)
    
    agent.epsilon = original_epsilon
    return test_env.portfolio_value_history, actions_taken

# -------------------------
# Plotting Functions
# -------------------------
def plot_results(train_values, test_values, training_losses):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(range(len(train_values))),
        y=train_values,
        mode='lines',
        name='Training Portfolio'
    ))
    fig.add_trace(go.Scatter(
        x=list(range(len(train_values), len(train_values)+len(test_values))),
        y=test_values,
        mode='lines',
        name='Testing Portfolio'
    ))
    fig.update_layout(title='Portfolio Value Over Time',
                      xaxis_title='Trading Steps',
                      yaxis_title='Portfolio Value ($)')
    
    loss_fig = go.Figure()
    loss_fig.add_trace(go.Scatter(
        x=list(range(len(training_losses))),
        y=training_losses,
        mode='lines',
        name='Training Loss'
    ))
    loss_fig.update_layout(title='Training Loss Over Episodes',
                           xaxis_title='Episode',
                           yaxis_title='Loss')
    
    fig.show()
    loss_fig.show()

def plot_test_signals(test_df, test_actions):
    # Use the original test data index and raw prices to overlay signals.
    dates = test_df.index
    prices = test_df['CloseOrig'].values
    buy_dates = []
    buy_prices = []
    sell_dates = []
    sell_prices = []
    
    # Map actions to signals:
    # 0 and 1 are Sell signals; 3 and 4 are Buy signals; 2 is Hold.
    for i, action in enumerate(test_actions):
        if action in [3, 4]:
            buy_dates.append(dates[i])
            buy_prices.append(prices[i])
        elif action in [0, 1]:
            sell_dates.append(dates[i])
            sell_prices.append(prices[i])
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=dates, y=prices, mode='lines', name='SP500 Index'))
    fig.add_trace(go.Scatter(x=buy_dates, y=buy_prices, mode='markers', name='Buy Signal',
                             marker=dict(color='green', size=10, symbol='triangle-up')))
    fig.add_trace(go.Scatter(x=sell_dates, y=sell_prices, mode='markers', name='Sell Signal',
                             marker=dict(color='red', size=10, symbol='triangle-down')))
    fig.update_layout(title='S&P500 Index Price with Trading Signals',
                      xaxis_title='Date',
                      yaxis_title='Price')
    fig.show()

# -------------------------
# Main Program
# -------------------------
if __name__ == "__main__":
    try:
        # Set seeds for reproducibility
        torch.manual_seed(42)
        np.random.seed(42)
        random.seed(42)
        
        # Define parameters
        TOP_10_SP500_STOCKS = ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'GOOGL', 'META', 'TSLA', 'BRK-B', 'UNH', 'XOM']
        INDEX_SYMBOL = '^GSPC'
        TRAIN_START_DATE = '2009-01-01'
        TRAIN_END_DATE = '2024-01-01'    # Approximately 15 years
        TEST_START_DATE = '2024-01-01'
        TEST_END_DATE = '2025-01-01'       # 1-year test period
        EPISODES = 100
        
        print("Initializing data handler and fetching training data for top-10 stocks...")
        data_handler = DataHandler()
        top10_data = data_handler.fetch_multiple_data(TOP_10_SP500_STOCKS, TRAIN_START_DATE, TRAIN_END_DATE)
        train_data = aggregate_data(top10_data)
        
        print("\nFetching testing data for the S&P500 index...")
        test_data = data_handler.fetch_data(INDEX_SYMBOL, TEST_START_DATE, TEST_END_DATE)
        
        print("\nInitializing training environment...")
        train_env = TradingEnvironment(train_data, initial_balance=100000)
        state_size = len(train_env.features)
        action_size = 5  # (0: Sell all, 1: Sell half, 2: Hold, 3: Buy with half cash, 4: Buy with full cash)
        print(f"State size: {state_size}, Action size: {action_size}")
        
        # Initialize agent
        agent = DQNAgent(state_size, action_size)
        
        print("\nStarting training...")
        training_losses = train_agent(train_env, agent, EPISODES)
        
        print("\nStarting testing phase on the S&P500 index...")
        test_portfolio_values, test_actions = test_agent(agent, test_data, initial_balance=10000)
        
        # Plot portfolio evolution and training loss
        plot_results(train_env.portfolio_value_history, test_portfolio_values, training_losses)
        
        # Visualize trading signals over the test period using raw index data
        plot_test_signals(test_data, test_actions)
        
        # Print final results
        print("\nTraining Results:")
        print(f"Initial Training Balance: $100000.00")
        print(f"Final Training Portfolio Value: ${train_env.portfolio_value_history[-1]:.2f}")
        training_return = ((train_env.portfolio_value_history[-1] / 100000) - 1) * 100
        print(f"Training Return: {training_return:.2f}%")
        
        print("\nTesting Results:")
        print(f"Initial Testing Balance: $10000.00")
        print(f"Final Testing Portfolio Value: ${test_portfolio_values[-1]:.2f}")
        testing_return = ((test_portfolio_values[-1] / 10000) - 1) * 100
        print(f"Testing Return: {testing_return:.2f}%")
        
        # Save final model and training statistics
        torch.save({
            'model_state_dict': agent.state_dict(),
            'optimizer_state_dict': agent.optimizer.state_dict(),
            'training_losses': training_losses,
            'train_portfolio_history': train_env.portfolio_value_history,
            'test_portfolio_history': test_portfolio_values,
            'test_actions': test_actions
        }, 'final_model.pth')
    
    except Exception as e:
        print(f"\nError occurred: {str(e)}")
        raise


Using device: cuda
Initializing data handler and fetching training data for top-10 stocks...
Fetching data for AAPL from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for MSFT from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for AMZN from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for NVDA from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for GOOGL from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for META from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (2923, 17)
Fetching data for TSLA from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data shape after preparation: (3400, 17)
Fetching data for BRK-B from 2009-01-01 to 2024-01-01...





Data shape after preparation: (3774, 17)
Fetching data for UNH from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)
Fetching data for XOM from 2009-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Data shape after preparation: (3774, 17)

Fetching testing data for the S&P500 index...
Fetching data for ^GSPC from 2024-01-01 to 2025-01-01...
Data shape after preparation: (252, 17)

Initializing training environment...
State size: 12, Action size: 5

Starting training...


Training:   1%|          | 1/100 [00:07<12:08,  7.36s/it]


Episode: 1/100 | Total Reward: 0.88 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $212069.46


Training:   2%|▏         | 2/100 [00:13<11:05,  6.79s/it]


Episode: 2/100 | Total Reward: 0.52 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $157418.80


Training:   3%|▎         | 3/100 [00:20<10:37,  6.57s/it]


Episode: 3/100 | Total Reward: 0.87 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $213607.83


Training:   4%|▍         | 4/100 [00:26<10:29,  6.56s/it]


Episode: 4/100 | Total Reward: 0.84 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $214374.53


Training:   5%|▌         | 5/100 [00:35<11:42,  7.40s/it]


Episode: 5/100 | Total Reward: 1.04 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $251406.27


Training:   6%|▌         | 6/100 [00:46<13:40,  8.72s/it]


Episode: 6/100 | Total Reward: 0.88 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $215917.88


Training:   7%|▋         | 7/100 [00:57<14:28,  9.33s/it]


Episode: 7/100 | Total Reward: 1.40 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $374476.66


Training:   8%|▊         | 8/100 [01:07<14:54,  9.72s/it]


Episode: 8/100 | Total Reward: 0.98 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $236685.22


Training:   9%|▉         | 9/100 [01:15<13:44,  9.06s/it]


Episode: 9/100 | Total Reward: 1.23 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $308984.00


Training:  10%|█         | 10/100 [01:22<12:33,  8.37s/it]


Episode: 10/100 | Total Reward: 1.03 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $254576.32


Training:  11%|█         | 11/100 [01:29<11:43,  7.90s/it]


Episode: 11/100 | Total Reward: 1.04 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $257762.73


Training:  12%|█▏        | 12/100 [01:35<11:03,  7.54s/it]


Episode: 12/100 | Total Reward: 1.55 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $412952.03


Training:  13%|█▎        | 13/100 [01:42<10:29,  7.23s/it]


Episode: 13/100 | Total Reward: 1.39 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $363143.01


Training:  14%|█▍        | 14/100 [01:48<09:57,  6.95s/it]


Episode: 14/100 | Total Reward: 1.01 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $246106.38


Training:  15%|█▌        | 15/100 [01:55<09:42,  6.85s/it]


Episode: 15/100 | Total Reward: 1.49 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $396765.60


Training:  16%|█▌        | 16/100 [02:04<10:36,  7.58s/it]


Episode: 16/100 | Total Reward: 1.73 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $494920.77


Training:  17%|█▋        | 17/100 [02:15<11:43,  8.47s/it]


Episode: 17/100 | Total Reward: 1.08 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $260630.72


Training:  18%|█▊        | 18/100 [02:24<11:55,  8.72s/it]


Episode: 18/100 | Total Reward: 0.85 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $212379.51


Training:  19%|█▉        | 19/100 [02:32<11:28,  8.50s/it]


Episode: 19/100 | Total Reward: 1.54 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $406159.16


Training:  20%|██        | 20/100 [02:43<12:16,  9.21s/it]


Episode: 20/100 | Total Reward: 0.56 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $159072.09


Training:  21%|██        | 21/100 [02:50<11:20,  8.61s/it]


Episode: 21/100 | Total Reward: 0.88 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $216236.01


Training:  22%|██▏       | 22/100 [02:59<11:29,  8.84s/it]


Episode: 22/100 | Total Reward: 0.48 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $147632.99


Training:  23%|██▎       | 23/100 [03:10<12:09,  9.48s/it]


Episode: 23/100 | Total Reward: 1.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $299602.13


Training:  24%|██▍       | 24/100 [03:21<12:31,  9.89s/it]


Episode: 24/100 | Total Reward: 1.11 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $273691.21


Training:  25%|██▌       | 25/100 [03:32<12:38, 10.12s/it]


Episode: 25/100 | Total Reward: 0.79 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $204405.09


Training:  26%|██▌       | 26/100 [03:43<12:44, 10.33s/it]


Episode: 26/100 | Total Reward: 0.68 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $180500.03


Training:  27%|██▋       | 27/100 [03:53<12:40, 10.42s/it]


Episode: 27/100 | Total Reward: 1.09 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $267084.41


Training:  28%|██▊       | 28/100 [04:04<12:34, 10.48s/it]


Episode: 28/100 | Total Reward: 1.09 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $266644.09


Training:  29%|██▉       | 29/100 [04:15<12:31, 10.59s/it]


Episode: 29/100 | Total Reward: 1.31 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $317273.05


Training:  30%|███       | 30/100 [04:26<12:24, 10.63s/it]


Episode: 30/100 | Total Reward: 0.72 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $186709.06


Training:  31%|███       | 31/100 [04:37<12:25, 10.81s/it]


Episode: 31/100 | Total Reward: 1.48 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $402918.92


Training:  32%|███▏      | 32/100 [04:49<12:51, 11.34s/it]


Episode: 32/100 | Total Reward: 0.95 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $235213.21


Training:  33%|███▎      | 33/100 [05:01<12:37, 11.31s/it]


Episode: 33/100 | Total Reward: 1.08 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $268890.87


Training:  34%|███▍      | 34/100 [05:11<12:17, 11.17s/it]


Episode: 34/100 | Total Reward: 0.85 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $217362.59


Training:  35%|███▌      | 35/100 [05:18<10:33,  9.74s/it]


Episode: 35/100 | Total Reward: 0.78 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $201445.58


Training:  36%|███▌      | 36/100 [05:24<09:23,  8.81s/it]


Episode: 36/100 | Total Reward: 1.12 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $286194.76


Training:  37%|███▋      | 37/100 [05:31<08:30,  8.11s/it]


Episode: 37/100 | Total Reward: 1.37 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $360128.06


Training:  38%|███▊      | 38/100 [05:37<07:49,  7.57s/it]


Episode: 38/100 | Total Reward: 1.40 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $364784.85


Training:  39%|███▉      | 39/100 [05:43<07:17,  7.17s/it]


Episode: 39/100 | Total Reward: 1.02 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $249035.07


Training:  40%|████      | 40/100 [05:50<06:56,  6.94s/it]


Episode: 40/100 | Total Reward: 1.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $308731.46


Training:  41%|████      | 41/100 [05:56<06:40,  6.79s/it]


Episode: 41/100 | Total Reward: 0.97 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $233738.07


Training:  42%|████▏     | 42/100 [06:04<06:49,  7.06s/it]


Episode: 42/100 | Total Reward: 0.84 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $206982.56


Training:  43%|████▎     | 43/100 [06:12<07:01,  7.40s/it]


Episode: 43/100 | Total Reward: 0.90 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $225642.04


Training:  44%|████▍     | 44/100 [06:22<07:38,  8.19s/it]


Episode: 44/100 | Total Reward: 0.68 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $186093.50


Training:  45%|████▌     | 45/100 [06:32<07:59,  8.73s/it]


Episode: 45/100 | Total Reward: 1.45 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $377556.92


Training:  46%|████▌     | 46/100 [06:43<08:19,  9.26s/it]


Episode: 46/100 | Total Reward: 1.22 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $305421.69


Training:  47%|████▋     | 47/100 [06:53<08:29,  9.62s/it]


Episode: 47/100 | Total Reward: 1.09 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $263151.77


Training:  48%|████▊     | 48/100 [07:04<08:31,  9.84s/it]


Episode: 48/100 | Total Reward: 1.34 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $343450.07


Training:  49%|████▉     | 49/100 [07:14<08:32, 10.06s/it]


Episode: 49/100 | Total Reward: 1.22 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $312353.95


Training:  50%|█████     | 50/100 [07:24<08:27, 10.14s/it]


Episode: 50/100 | Total Reward: 1.37 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $343305.64


Training:  51%|█████     | 51/100 [07:35<08:20, 10.22s/it]


Episode: 51/100 | Total Reward: 1.27 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $325047.95


Training:  52%|█████▏    | 52/100 [07:46<08:19, 10.40s/it]


Episode: 52/100 | Total Reward: 1.24 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $302134.96


Training:  53%|█████▎    | 53/100 [07:56<08:14, 10.53s/it]


Episode: 53/100 | Total Reward: 1.04 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $257285.08


Training:  54%|█████▍    | 54/100 [08:07<08:07, 10.59s/it]


Episode: 54/100 | Total Reward: 1.21 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $305383.76


Training:  55%|█████▌    | 55/100 [08:18<07:57, 10.61s/it]


Episode: 55/100 | Total Reward: 1.86 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $561886.46


Training:  56%|█████▌    | 56/100 [08:28<07:46, 10.60s/it]


Episode: 56/100 | Total Reward: 0.88 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $222615.00


Training:  57%|█████▋    | 57/100 [08:39<07:35, 10.59s/it]


Episode: 57/100 | Total Reward: 1.26 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $312142.65


Training:  58%|█████▊    | 58/100 [08:50<07:23, 10.56s/it]


Episode: 58/100 | Total Reward: 0.99 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $243562.25


Training:  59%|█████▉    | 59/100 [09:00<07:12, 10.55s/it]


Episode: 59/100 | Total Reward: 0.74 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $195625.13


Training:  60%|██████    | 60/100 [09:11<07:02, 10.57s/it]


Episode: 60/100 | Total Reward: 0.90 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $228483.33


Training:  61%|██████    | 61/100 [09:21<06:52, 10.57s/it]


Episode: 61/100 | Total Reward: 0.99 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $243494.92


Training:  62%|██████▏   | 62/100 [09:32<06:44, 10.64s/it]


Episode: 62/100 | Total Reward: 0.87 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $217616.60


Training:  63%|██████▎   | 63/100 [09:43<06:32, 10.62s/it]


Episode: 63/100 | Total Reward: 1.34 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $340624.88


Training:  64%|██████▍   | 64/100 [09:53<06:21, 10.61s/it]


Episode: 64/100 | Total Reward: 1.05 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $255360.18


Training:  65%|██████▌   | 65/100 [10:04<06:11, 10.61s/it]


Episode: 65/100 | Total Reward: 0.87 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $213133.65


Training:  66%|██████▌   | 66/100 [10:15<06:02, 10.65s/it]


Episode: 66/100 | Total Reward: 0.86 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $207800.74


Training:  67%|██████▋   | 67/100 [10:25<05:49, 10.60s/it]


Episode: 67/100 | Total Reward: 0.92 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $224509.94


Training:  68%|██████▊   | 68/100 [10:36<05:38, 10.57s/it]


Episode: 68/100 | Total Reward: 0.74 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $194791.79


Training:  69%|██████▉   | 69/100 [10:46<05:25, 10.49s/it]


Episode: 69/100 | Total Reward: 0.96 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $237882.46


Training:  70%|███████   | 70/100 [10:56<05:13, 10.44s/it]


Episode: 70/100 | Total Reward: 1.35 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $357368.70


Training:  71%|███████   | 71/100 [11:06<05:01, 10.39s/it]


Episode: 71/100 | Total Reward: 0.94 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $233977.25


Training:  72%|███████▏  | 72/100 [11:17<04:54, 10.53s/it]


Episode: 72/100 | Total Reward: 0.97 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $241624.83


Training:  73%|███████▎  | 73/100 [11:28<04:44, 10.53s/it]


Episode: 73/100 | Total Reward: 1.52 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $414602.19


Training:  74%|███████▍  | 74/100 [11:38<04:32, 10.47s/it]


Episode: 74/100 | Total Reward: 1.36 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $346793.43


Training:  75%|███████▌  | 75/100 [11:48<04:19, 10.40s/it]


Episode: 75/100 | Total Reward: 1.14 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $284766.78


Training:  76%|███████▌  | 76/100 [11:59<04:09, 10.39s/it]


Episode: 76/100 | Total Reward: 1.18 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $297982.67


Training:  77%|███████▋  | 77/100 [12:09<03:57, 10.34s/it]


Episode: 77/100 | Total Reward: 1.30 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $331405.43


Training:  78%|███████▊  | 78/100 [12:20<03:49, 10.42s/it]


Episode: 78/100 | Total Reward: 1.26 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $319633.29


Training:  79%|███████▉  | 79/100 [12:30<03:40, 10.48s/it]


Episode: 79/100 | Total Reward: 1.37 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $362377.42


Training:  80%|████████  | 80/100 [12:41<03:30, 10.53s/it]


Episode: 80/100 | Total Reward: 1.16 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $294751.92


Training:  81%|████████  | 81/100 [12:51<03:20, 10.53s/it]


Episode: 81/100 | Total Reward: 1.49 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $408712.62


Training:  82%|████████▏ | 82/100 [13:02<03:09, 10.53s/it]


Episode: 82/100 | Total Reward: 0.53 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $154410.58


Training:  83%|████████▎ | 83/100 [13:12<02:58, 10.49s/it]


Episode: 83/100 | Total Reward: 1.25 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $312852.21


Training:  84%|████████▍ | 84/100 [13:23<02:47, 10.47s/it]


Episode: 84/100 | Total Reward: 1.24 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $316964.06


Training:  85%|████████▌ | 85/100 [13:33<02:36, 10.45s/it]


Episode: 85/100 | Total Reward: 1.00 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $247313.48


Training:  86%|████████▌ | 86/100 [13:44<02:26, 10.43s/it]


Episode: 86/100 | Total Reward: 1.26 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $312240.59


Training:  87%|████████▋ | 87/100 [13:54<02:16, 10.48s/it]


Episode: 87/100 | Total Reward: 1.24 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $308702.61


Training:  88%|████████▊ | 88/100 [14:04<02:05, 10.42s/it]


Episode: 88/100 | Total Reward: 1.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $296249.69


Training:  89%|████████▉ | 89/100 [14:15<01:54, 10.44s/it]


Episode: 89/100 | Total Reward: 1.02 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $252211.34


Training:  90%|█████████ | 90/100 [14:25<01:44, 10.42s/it]


Episode: 90/100 | Total Reward: 0.75 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $197209.43


Training:  91%|█████████ | 91/100 [14:36<01:34, 10.45s/it]


Episode: 91/100 | Total Reward: 1.78 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $528580.49


Training:  92%|█████████▏| 92/100 [14:46<01:23, 10.46s/it]


Episode: 92/100 | Total Reward: 1.42 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $373193.13


Training:  93%|█████████▎| 93/100 [14:58<01:15, 10.81s/it]


Episode: 93/100 | Total Reward: 1.15 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $287926.69


Training:  94%|█████████▍| 94/100 [15:08<01:04, 10.72s/it]


Episode: 94/100 | Total Reward: 1.25 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $317412.92


Training:  95%|█████████▌| 95/100 [15:19<00:53, 10.73s/it]


Episode: 95/100 | Total Reward: 0.99 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $247700.28


Training:  96%|█████████▌| 96/100 [15:30<00:42, 10.67s/it]


Episode: 96/100 | Total Reward: 1.03 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $259605.94


Training:  97%|█████████▋| 97/100 [15:40<00:32, 10.68s/it]


Episode: 97/100 | Total Reward: 0.97 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $243003.65


Training:  98%|█████████▊| 98/100 [15:51<00:21, 10.71s/it]


Episode: 98/100 | Total Reward: 1.18 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $295580.43


Training:  99%|█████████▉| 99/100 [16:02<00:10, 10.70s/it]


Episode: 99/100 | Total Reward: 0.85 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $213955.27


Training: 100%|██████████| 100/100 [16:12<00:00,  9.73s/it]


Episode: 100/100 | Total Reward: 0.94 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $237083.59

Starting testing phase on the S&P500 index...






Training Results:
Initial Training Balance: $100000.00
Final Training Portfolio Value: $237083.59
Training Return: 137.08%

Testing Results:
Initial Testing Balance: $10000.00
Final Testing Portfolio Value: $11768.26
Testing Return: 17.68%


# Testing and Visualization of Improved Version over different indices

In [4]:
import numpy as np
import yfinance as yf
import talib
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

# Set the device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------------
# DataHandler: Downloads and Prepares Test Data
# -------------------------
class DataHandler:
    def __init__(self):
        from sklearn.preprocessing import MinMaxScaler
        self.scaler = MinMaxScaler()

    def fetch_data(self, symbol, start_date, end_date):
        print(f"\nFetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)

    def prepare_data(self, df):
        df = df.copy()
        # Preserve the original close prices for portfolio value calculation.
        df['CloseOrig'] = df['Close'].astype(float)
        
        # Convert necessary columns to one-dimensional float64 arrays to avoid dimension issues.
        close_prices = np.array(df['Close'], dtype=np.float64).flatten()
        volume = np.array(df['Volume'], dtype=np.float64).flatten()

        # Compute technical indicators.
        df['Returns'] = df['Close'].pct_change()
        df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
        df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
        df['RSI'] = talib.RSI(close_prices, timeperiod=14)
        macd, signal, _ = talib.MACD(close_prices)
        df['MACD'] = macd
        df['MACD_signal'] = signal
        bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
        df['BB_upper'] = bb_upper
        df['BB_middle'] = bb_middle
        df['BB_lower'] = bb_lower
        df['OBV'] = talib.OBV(close_prices, volume)
        df['MOM'] = talib.MOM(close_prices, timeperiod=14)

        # Features used for training (normalization applied to these)
        features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                    'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 
                    'BB_lower', 'OBV', 'MOM']
        df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
        df[features] = self.scaler.fit_transform(df[features])
        df = df.dropna()
        print(f"Data shape after preparation: {df.shape}")
        return df

# -------------------------
# Trading Environment for Testing Mode
# -------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=10000):
        if data.empty:
            raise ValueError("Data cannot be empty")
        # Reset index to simulate sequential trading.
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        # This list must match the feature names used in training.
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI',
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        self.reset()

    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # Number (or proportion) of shares held.
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()

    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values

    def step(self, action):
        # Actual price for trading is taken from the unscaled close.
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]

        # Define the 5-action space:
        # 0: Sell all, 1: Sell half, 2: Hold, 3: Buy with 50% of cash, 4: Buy with full cash.
        if current_price > 0:
            if action == 0 and self.position > 0:
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= cash_to_use
            elif action == 4 and self.balance > 0:
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
            # Action 2 (Hold) does nothing.

        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0

        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done

# -------------------------
# DQNAgent: Loads Policy and Acts Greedily (epsilon = 0 during testing)
# -------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 0.0  # Deterministic policy during testing.
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32

        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

    def act(self, state):
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            action_values = self(state_tensor)
            return torch.argmax(action_values).item()

# -------------------------
# Test Simulation Function
# -------------------------
def test_agent(agent, env):
    state = env.reset()
    done = False
    actions_taken = []
    while not done:
        action = agent.act(state)
        actions_taken.append(action)
        state, reward, done = env.step(action)
    return env.portfolio_value_history, actions_taken

# -------------------------
# Plotting Function
# -------------------------
def plot_results(portfolio_values, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(range(len(portfolio_values))),
        y=portfolio_values,
        mode='lines+markers',
        name='Portfolio Value'
    ))
    fig.update_layout(title=title,
                      xaxis_title='Trading Days',
                      yaxis_title='Portfolio Value ($)')
    fig.show()

# -------------------------
# Main Routine: Load Model and Test on Multiple Indexes
# -------------------------
if __name__ == '__main__':
    try:
        # List of indexes to test; include at least five.
        index_list = ['^DJI', '^GSPC', '^FTSE', '^NSEI', '^N225']
        TEST_START_DATE = '2024-01-01'
        TEST_END_DATE = '2025-01-01'
        TEST_INITIAL_BALANCE = 10000

        # Load the pre-trained model once.
        state_size = 12    # Must match training state vector dimension.
        action_size = 5    # 5-action space used during training.
        agent = DQNAgent(state_size, action_size)
        model_path = 'final_model.pth'
        checkpoint = torch.load(model_path, map_location=device)
        agent.load_state_dict(checkpoint['model_state_dict'])
        agent.eval()
        print("Model loaded successfully.")

        # Initialize a DataHandler (common for all tests).
        data_handler = DataHandler()

        # Test the model on each index.
        for symbol in index_list:
            try:
                test_data = data_handler.fetch_data(symbol, TEST_START_DATE, TEST_END_DATE)
                test_env = TradingEnvironment(test_data, initial_balance=TEST_INITIAL_BALANCE)
                portfolio_history, actions_taken = test_agent(agent, test_env)
                final_value = portfolio_history[-1]
                testing_return = ((final_value/TEST_INITIAL_BALANCE) - 1) * 100
                title = f"{symbol} Portfolio Value (Return: {testing_return:.2f}%)"
                plot_results(portfolio_history, title)
                print(f"\nTesting Results for {symbol}:")
                print(f"  Initial Balance: ${TEST_INITIAL_BALANCE:.2f}")
                print(f"  Final Portfolio Value: ${final_value:.2f}")
                print(f"  Return: {testing_return:.2f}%")
            except Exception as inner_e:
                print(f"Error testing on {symbol}: {inner_e}")

    except Exception as e:
        print(f"Error during testing: {e}")
        raise


Using device: cuda


[*********************100%***********************]  1 of 1 completed

Model loaded successfully.

Fetching data for ^DJI from 2024-01-01 to 2025-01-01...
Data shape after preparation: (252, 17)





[*********************100%***********************]  1 of 1 completed


Testing Results for ^DJI:
  Initial Balance: $10000.00
  Final Portfolio Value: $10709.70
  Return: 7.10%

Fetching data for ^GSPC from 2024-01-01 to 2025-01-01...
Data shape after preparation: (252, 17)





[*********************100%***********************]  1 of 1 completed


Testing Results for ^GSPC:
  Initial Balance: $10000.00
  Final Portfolio Value: $11768.26
  Return: 17.68%

Fetching data for ^FTSE from 2024-01-01 to 2025-01-01...
Data shape after preparation: (254, 17)





[*********************100%***********************]  1 of 1 completed


Testing Results for ^FTSE:
  Initial Balance: $10000.00
  Final Portfolio Value: $10166.23
  Return: 1.66%

Fetching data for ^NSEI from 2024-01-01 to 2025-01-01...
Data shape after preparation: (246, 17)





[*********************100%***********************]  1 of 1 completed


Testing Results for ^NSEI:
  Initial Balance: $10000.00
  Final Portfolio Value: $10799.99
  Return: 8.00%

Fetching data for ^N225 from 2024-01-01 to 2025-01-01...
Data shape after preparation: (245, 17)






Testing Results for ^N225:
  Initial Balance: $10000.00
  Final Portfolio Value: $12851.42
  Return: 28.51%


# Training over Multiple Indices for Better Performance🎭

In [9]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ---------------------------
# DataHandler Class
# ---------------------------
class DataHandler:
    def __init__(self):
        from sklearn.preprocessing import MinMaxScaler
        self.scaler = MinMaxScaler()
    
    def fetch_data(self, symbol, start_date, end_date):
        print(f"Fetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        df = df.copy()
        # Preserve the original closing price for portfolio simulation
        df['CloseOrig'] = df['Close'].astype(float)
        
        # Convert columns explicitly to one-dimensional arrays (to avoid TA-Lib errors)
        close_prices = np.array(df['Close'], dtype=np.float64).flatten()
        volume = np.array(df['Volume'], dtype=np.float64).flatten()
        
        # Compute technical indicators
        df['Returns'] = df['Close'].pct_change()
        df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
        df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
        df['RSI'] = talib.RSI(close_prices, timeperiod=14)
        macd, signal, _ = talib.MACD(close_prices)
        df['MACD'] = macd
        df['MACD_signal'] = signal
        bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
        df['BB_upper'] = bb_upper
        df['BB_middle'] = bb_middle
        df['BB_lower'] = bb_lower
        df['OBV'] = talib.OBV(close_prices, volume)
        df['MOM'] = talib.MOM(close_prices, timeperiod=14)
        
        # List of features to normalize (do not scale CloseOrig)
        features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                    'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 
                    'BB_lower', 'OBV', 'MOM']
        df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
        df[features] = self.scaler.fit_transform(df[features])
        df = df.dropna()
        print(f"Prepared data shape for {df.index[-1]} rows and {len(df.columns)} columns.")
        return df
    
    def fetch_multiple_data(self, symbols, start_date, end_date):
        data_dict = {}
        for symbol in symbols:
            try:
                data = self.fetch_data(symbol, start_date, end_date)
                data_dict[symbol] = data
            except Exception as e:
                print(f"Error fetching {symbol}: {e}")
        return data_dict

# ---------------------------
# Aggregate Data Function
# ---------------------------
def aggregate_data(data_dict):
    """
    Given a dictionary of DataFrames (keyed by symbol), find the common date
    index and average the features.
    """
    common_index = None
    for df in data_dict.values():
        if common_index is None:
            common_index = df.index
        else:
            common_index = common_index.intersection(df.index)
    reindexed = []
    for symbol, df in data_dict.items():
        reindexed.append(df.loc[common_index])
    concatenated = pd.concat(reindexed, axis=1, keys=data_dict.keys())
    # Average features across symbols; this applies to every column including CloseOrig.
    aggregated = concatenated.groupby(axis=1, level=1).mean()
    return aggregated

# ---------------------------
# Trading Environment Class
# ---------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=100000):
        if data is None or data.empty:
            raise ValueError("Data cannot be empty")
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        # Features used as state (these are the normalized technical indicators)
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI',
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        # "CloseOrig" is used for actual portfolio value calculation.
        if 'CloseOrig' not in self.data.columns:
            raise ValueError("Missing 'CloseOrig' column in data.")
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # shares held
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Retrieve actual price from the unscaled "CloseOrig"
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]
        # Action space (5 discrete actions):
        # 0: Sell all, 1: Sell half, 2: Hold, 3: Buy with 50% cash, 4: Buy with full cash.
        if current_price > 0:
            if action == 0 and self.position > 0:
                # Sell all
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:
                # Sell half
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:
                # Buy with 50% cash
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= cash_to_use
            elif action == 4 and self.balance > 0:
                # Buy with full cash
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
            # Action 2 (Hold) does nothing.
        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0
        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done

# ---------------------------
# DQNAgent Class
# ---------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0  # initial exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32

        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            action_values = self(state_tensor)
            return torch.argmax(action_values).item()

    def replay(self):
        if len(self.memory) < self.batch_size:
            return 0
        minibatch = random.sample(self.memory, self.batch_size)
        states = torch.FloatTensor([m[0] for m in minibatch]).to(device)
        actions = torch.LongTensor([m[1] for m in minibatch]).to(device)
        rewards = torch.FloatTensor([m[2] for m in minibatch]).to(device)
        next_states = torch.FloatTensor([m[3] for m in minibatch]).to(device)
        dones = torch.FloatTensor([m[4] for m in minibatch]).to(device)
        current_q = self(states).gather(1, actions.unsqueeze(1)).squeeze()
        with torch.no_grad():
            next_q = self(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * self.gamma * next_q
        loss = self.criterion(current_q, target_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        return loss.item()

# ---------------------------
# Training Function
# ---------------------------
def train_agent(agent, training_datasets, episodes):
    training_losses = []
    best_reward = float('-inf')
    
    # Training proceeds across episodes.
    # In each episode we randomly choose one aggregated dataset (i.e. one country) 
    # and simulate a complete trading episode over its available historical period.
    for episode in tqdm(range(episodes), desc="Training Episodes"):
        # Randomly choose one country's aggregated data
        country, data = random.choice(list(training_datasets.items()))
        env = TradingEnvironment(data, initial_balance=100000)
        state = env.reset()
        total_reward = 0
        episode_losses = []
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            loss = agent.replay()
            if loss:
                episode_losses.append(loss)
            state = next_state
            total_reward += reward
            if done:
                avg_loss = np.mean(episode_losses) if episode_losses else 0
                training_losses.append(avg_loss)
                if total_reward > best_reward:
                    best_reward = total_reward
                    torch.save(agent.state_dict(), 'final_model.pth')
                print(f"\nEpisode {episode+1}/{episodes} | Country: {country} | Total Reward: {total_reward:.2f} | Avg Loss: {avg_loss:.4f} | Epsilon: {agent.epsilon:.4f} | Final Portfolio: ${env.portfolio_value_history[-1]:.2f}")
                break
    return training_losses

# ---------------------------
# Main Routine for Training on Multiple Indexes
# ---------------------------
if __name__ == '__main__':
    try:
        # Define training period with a larger dataset
        TRAIN_START_DATE = '1995-01-01'
        TRAIN_END_DATE   = '2024-01-01'
        EPISODES = 100  # adjust number of episodes as needed
        
        # Dictionary of indexes and their top 10 companies (tickers)
        index_companies = {
            "USA": ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'GOOGL', 'META', 'TSLA', 'BRK-B', 'UNH', 'XOM'],
            "India": ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'LT.NS', 'AXISBANK.NS', 'ITC.NS', 'HINDUNILVR.NS'],
            "Japan": ['7203.T', '6758.T', '9984.T', '8306.T', '6902.T', '9432.T', '7267.T', '7974.T', '6501.T', '8801.T'],
            "UK": ['HSBA.L', 'BP.L', 'VOD.L', 'GSK.L', 'RIO.L', 'BT-A.L', 'ULVR.L', 'DGE.L', 'AZN.L', 'BATS.L'],
            "France": ['OR.PA', 'MC.PA', 'SAN.PA', 'AI.PA', 'BNP.PA', 'DG.PA', 'EN.PA', 'RI.PA', 'KER.PA', 'SU.PA']
        }
        
        # ... rest of your training code remains unchanged ...
        
        data_handler = DataHandler()
        training_datasets = {}
        
        # For each country/index, fetch data for its top companies and aggregate.
        for country, tickers in index_companies.items():
            print(f"\nProcessing {country} data:")
            company_data = data_handler.fetch_multiple_data(tickers, TRAIN_START_DATE, TRAIN_END_DATE)
            if company_data:
                agg_data = aggregate_data(company_data)
                training_datasets[country] = agg_data
            else:
                print(f"No valid data fetched for {country}.")
        
        if not training_datasets:
            raise ValueError("No aggregated training data available from any index!")
        
        # All training datasets must have the expected technical indicator columns.
        # We assume each aggregated DataFrame contains at least the following columns:
        # ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 'BB_lower', 'OBV', 'MOM', 'CloseOrig']
        
        # Initialize the training environment parameters are set within the episode loop.
        state_size = 12  # corresponds to the feature vector (excluding CloseOrig)
        action_size = 5  # five possible actions
        
        # Initialize the DQN agent
        agent = DQNAgent(state_size, action_size)
        
        print("\nStarting training on multiple indexes...")
        training_losses = train_agent(agent, training_datasets, EPISODES)
        
        print("\nTraining Completed. Best model saved as final_model.pth")
    
    except Exception as e:
        print(f"Error during training: {e}")
        raise


[*********************100%***********************]  1 of 1 completed

Using device: cuda

Processing USA data:
Fetching data for AAPL from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for MSFT from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for AMZN from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for NVDA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for GOOGL from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for META from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for TSLA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for BRK-B from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for UNH from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for XOM from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.

Processing India data:
Fetching data for RELIANCE.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for TCS.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for HDFCBANK.NS from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for INFY.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for ICICIBANK.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for KOTAKBANK.NS from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for LT.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for AXISBANK.NS from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for ITC.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for HINDUNILVR.NS from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.

Processing Japan data:
Fetching data for 7203.T from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 6758.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 9984.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 8306.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 6902.T from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 9432.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 7267.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 7974.T from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 6501.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for 8801.T from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.

Processing UK data:
Fetching data for HSBA.L from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for BP.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for VOD.L from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for GSK.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for RIO.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for BT-A.L from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for ULVR.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for DGE.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for AZN.L from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for BATS.L from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.

Processing France data:
Fetching data for OR.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for MC.PA from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for SAN.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for AI.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for BNP.PA from 1995-01-01 to 2024-01-01...



[*********************100%***********************]  1 of 1 completed


Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for DG.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for EN.PA from 1995-01-01 to 2024-01-01...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for RI.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for KER.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.
Fetching data for SU.PA from 1995-01-01 to 2024-01-01...
Prepared data shape for 2023-12-29 00:00:00 rows and 17 columns.






Starting training on multiple indexes...


Training Episodes:   1%|          | 1/100 [00:15<24:47, 15.03s/it]


Episode 1/100 | Country: France | Total Reward: 1.44 | Avg Loss: 0.0002 | Epsilon: 0.0100 | Final Portfolio: $307754.28


Training Episodes:   2%|▏         | 2/100 [00:25<19:53, 12.18s/it]


Episode 2/100 | Country: Japan | Total Reward: 0.95 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $197847.65


Training Episodes:   3%|▎         | 3/100 [00:41<22:31, 13.93s/it]


Episode 3/100 | Country: UK | Total Reward: 16.06 | Avg Loss: 0.0342 | Epsilon: 0.0100 | Final Portfolio: $160757.18


Training Episodes:   4%|▍         | 4/100 [00:52<20:46, 12.99s/it]


Episode 4/100 | Country: India | Total Reward: 2.38 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $758757.08


Training Episodes:   5%|▌         | 5/100 [00:59<16:47, 10.60s/it]


Episode 5/100 | Country: USA | Total Reward: 0.45 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $144427.59


Training Episodes:   6%|▌         | 6/100 [01:12<18:03, 11.53s/it]


Episode 6/100 | Country: France | Total Reward: 0.99 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $221909.35


Training Episodes:   7%|▋         | 7/100 [01:23<17:48, 11.49s/it]


Episode 7/100 | Country: India | Total Reward: 1.29 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $302857.33


Training Episodes:   8%|▊         | 8/100 [01:30<15:11,  9.91s/it]


Episode 8/100 | Country: USA | Total Reward: 0.68 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $175654.32


Training Episodes:   9%|▉         | 9/100 [01:40<14:59,  9.89s/it]


Episode 9/100 | Country: Japan | Total Reward: 1.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $259895.05


Training Episodes:  10%|█         | 10/100 [01:53<16:25, 10.95s/it]


Episode 10/100 | Country: France | Total Reward: 0.78 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $172116.83


Training Episodes:  11%|█         | 11/100 [02:05<16:29, 11.12s/it]


Episode 11/100 | Country: India | Total Reward: 2.74 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1244014.93


Training Episodes:  12%|█▏        | 12/100 [02:16<16:30, 11.26s/it]


Episode 12/100 | Country: India | Total Reward: 3.02 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1492373.11


Training Episodes:  13%|█▎        | 13/100 [02:30<17:40, 12.19s/it]


Episode 13/100 | Country: France | Total Reward: 0.22 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $99523.30


Training Episodes:  14%|█▍        | 14/100 [02:40<16:30, 11.52s/it]


Episode 14/100 | Country: Japan | Total Reward: 0.81 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $172276.34


Training Episodes:  15%|█▌        | 15/100 [02:50<15:31, 10.96s/it]


Episode 15/100 | Country: Japan | Total Reward: 1.21 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $246364.50


Training Episodes:  16%|█▌        | 16/100 [03:00<14:50, 10.60s/it]


Episode 16/100 | Country: Japan | Total Reward: 2.63 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1013422.35


Training Episodes:  17%|█▋        | 17/100 [03:06<12:53,  9.32s/it]


Episode 17/100 | Country: USA | Total Reward: 1.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $299355.14


Training Episodes:  18%|█▊        | 18/100 [03:16<12:56,  9.47s/it]


Episode 18/100 | Country: Japan | Total Reward: 2.75 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1134602.87


Training Episodes:  19%|█▉        | 19/100 [03:26<12:53,  9.56s/it]


Episode 19/100 | Country: Japan | Total Reward: 0.99 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $207120.63


Training Episodes:  20%|██        | 20/100 [03:36<12:50,  9.63s/it]


Episode 20/100 | Country: Japan | Total Reward: 1.79 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $475981.53


Training Episodes:  21%|██        | 21/100 [03:49<14:05, 10.71s/it]


Episode 21/100 | Country: France | Total Reward: 0.39 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $120716.73


Training Episodes:  22%|██▏       | 22/100 [03:58<13:30, 10.39s/it]


Episode 22/100 | Country: Japan | Total Reward: 1.34 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $270148.98


Training Episodes:  23%|██▎       | 23/100 [04:08<13:02, 10.16s/it]


Episode 23/100 | Country: Japan | Total Reward: 1.34 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $263607.12


Training Episodes:  24%|██▍       | 24/100 [04:18<12:41, 10.02s/it]


Episode 24/100 | Country: Japan | Total Reward: 1.94 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $483627.27


Training Episodes:  25%|██▌       | 25/100 [04:29<13:05, 10.47s/it]


Episode 25/100 | Country: India | Total Reward: 2.94 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1364454.97


Training Episodes:  26%|██▌       | 26/100 [04:39<12:38, 10.25s/it]


Episode 26/100 | Country: Japan | Total Reward: 1.07 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $205059.05


Training Episodes:  27%|██▋       | 27/100 [04:45<11:02,  9.08s/it]


Episode 27/100 | Country: USA | Total Reward: 1.54 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $411761.10


Training Episodes:  28%|██▊       | 28/100 [05:01<13:22, 11.15s/it]


Episode 28/100 | Country: UK | Total Reward: 4.41 | Avg Loss: 0.0016 | Epsilon: 0.0100 | Final Portfolio: $654373.38


Training Episodes:  29%|██▉       | 29/100 [05:17<14:55, 12.62s/it]


Episode 29/100 | Country: UK | Total Reward: 6.17 | Avg Loss: 0.0020 | Epsilon: 0.0100 | Final Portfolio: $595231.21


Training Episodes:  30%|███       | 30/100 [05:33<15:54, 13.63s/it]


Episode 30/100 | Country: UK | Total Reward: 2.49 | Avg Loss: 0.0017 | Epsilon: 0.0100 | Final Portfolio: $90491.22


Training Episodes:  31%|███       | 31/100 [05:49<16:31, 14.36s/it]


Episode 31/100 | Country: UK | Total Reward: 7.56 | Avg Loss: 0.0090 | Epsilon: 0.0100 | Final Portfolio: $35776.79


Training Episodes:  32%|███▏      | 32/100 [05:56<13:33, 11.96s/it]


Episode 32/100 | Country: USA | Total Reward: 1.38 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $361452.69


Training Episodes:  33%|███▎      | 33/100 [06:02<11:29, 10.29s/it]


Episode 33/100 | Country: USA | Total Reward: 0.80 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $199786.86


Training Episodes:  34%|███▍      | 34/100 [06:12<11:08, 10.12s/it]


Episode 34/100 | Country: Japan | Total Reward: 1.32 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $299316.39


Training Episodes:  35%|███▌      | 35/100 [06:28<12:51, 11.87s/it]


Episode 35/100 | Country: UK | Total Reward: 13.87 | Avg Loss: 0.0315 | Epsilon: 0.0100 | Final Portfolio: $139529.17


Training Episodes:  36%|███▌      | 36/100 [06:39<12:32, 11.76s/it]


Episode 36/100 | Country: India | Total Reward: 4.06 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $3958281.53


Training Episodes:  37%|███▋      | 37/100 [06:46<10:37, 10.12s/it]


Episode 37/100 | Country: USA | Total Reward: 0.98 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $240455.24


Training Episodes:  38%|███▊      | 38/100 [06:55<10:19, 10.00s/it]


Episode 38/100 | Country: Japan | Total Reward: 1.11 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $224887.13


Training Episodes:  39%|███▉      | 39/100 [07:02<09:03,  8.91s/it]


Episode 39/100 | Country: USA | Total Reward: 0.21 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $115929.59


Training Episodes:  40%|████      | 40/100 [07:18<11:12, 11.21s/it]


Episode 40/100 | Country: UK | Total Reward: 4.78 | Avg Loss: 0.0066 | Epsilon: 0.0100 | Final Portfolio: $1947.90


Training Episodes:  41%|████      | 41/100 [07:30<11:05, 11.28s/it]


Episode 41/100 | Country: India | Total Reward: 2.87 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1272589.06


Training Episodes:  42%|████▏     | 42/100 [07:36<09:28,  9.80s/it]


Episode 42/100 | Country: USA | Total Reward: 1.46 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $387416.79


Training Episodes:  43%|████▎     | 43/100 [07:52<11:04, 11.65s/it]


Episode 43/100 | Country: UK | Total Reward: 17.41 | Avg Loss: 0.0252 | Epsilon: 0.0100 | Final Portfolio: $538869.89


Training Episodes:  44%|████▍     | 44/100 [08:04<10:49, 11.60s/it]


Episode 44/100 | Country: India | Total Reward: 3.04 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1579560.78


Training Episodes:  45%|████▌     | 45/100 [08:10<09:11, 10.02s/it]


Episode 45/100 | Country: USA | Total Reward: 1.34 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $338380.95


Training Episodes:  46%|████▌     | 46/100 [08:21<09:25, 10.47s/it]


Episode 46/100 | Country: India | Total Reward: 3.20 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1821697.03


Training Episodes:  47%|████▋     | 47/100 [08:28<08:08,  9.22s/it]


Episode 47/100 | Country: USA | Total Reward: 1.64 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $451890.50


Training Episodes:  48%|████▊     | 48/100 [08:41<09:01, 10.42s/it]


Episode 48/100 | Country: France | Total Reward: -0.00 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $74485.51


Training Episodes:  49%|████▉     | 49/100 [08:51<08:41, 10.23s/it]


Episode 49/100 | Country: Japan | Total Reward: 0.73 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $170499.84


Training Episodes:  50%|█████     | 50/100 [09:04<09:17, 11.15s/it]


Episode 50/100 | Country: France | Total Reward: 0.95 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $195796.85


Training Episodes:  51%|█████     | 51/100 [09:21<10:24, 12.75s/it]


Episode 51/100 | Country: UK | Total Reward: 0.67 | Avg Loss: 0.0017 | Epsilon: 0.0100 | Final Portfolio: $4019.76


Training Episodes:  52%|█████▏    | 52/100 [09:34<10:20, 12.92s/it]


Episode 52/100 | Country: France | Total Reward: 0.17 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $91482.21


Training Episodes:  53%|█████▎    | 53/100 [09:45<09:48, 12.51s/it]


Episode 53/100 | Country: India | Total Reward: 3.31 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1956957.22


Training Episodes:  54%|█████▍    | 54/100 [09:59<09:46, 12.76s/it]


Episode 54/100 | Country: India | Total Reward: 3.99 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $3732938.77


Training Episodes:  55%|█████▌    | 55/100 [10:26<12:55, 17.22s/it]


Episode 55/100 | Country: UK | Total Reward: 2.89 | Avg Loss: 0.0017 | Epsilon: 0.0100 | Final Portfolio: $205508.98


Training Episodes:  56%|█████▌    | 56/100 [10:37<11:13, 15.30s/it]


Episode 56/100 | Country: USA | Total Reward: 1.23 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $311850.69


Training Episodes:  57%|█████▋    | 57/100 [11:00<12:35, 17.56s/it]


Episode 57/100 | Country: France | Total Reward: 0.67 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $140545.50


Training Episodes:  58%|█████▊    | 58/100 [11:22<13:18, 19.00s/it]


Episode 58/100 | Country: France | Total Reward: 1.06 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $217137.06


Training Episodes:  59%|█████▉    | 59/100 [11:39<12:27, 18.22s/it]


Episode 59/100 | Country: Japan | Total Reward: 1.61 | Avg Loss: 0.0002 | Epsilon: 0.0100 | Final Portfolio: $352276.93


Training Episodes:  60%|██████    | 60/100 [11:50<10:44, 16.11s/it]


Episode 60/100 | Country: USA | Total Reward: 0.45 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $144536.38


Training Episodes:  61%|██████    | 61/100 [12:13<11:49, 18.19s/it]


Episode 61/100 | Country: France | Total Reward: 0.46 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $123274.69


Training Episodes:  62%|██████▏   | 62/100 [12:32<11:44, 18.54s/it]


Episode 62/100 | Country: India | Total Reward: 3.64 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $2759494.11


Training Episodes:  63%|██████▎   | 63/100 [12:49<11:08, 18.06s/it]


Episode 63/100 | Country: Japan | Total Reward: 0.86 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $194320.52


Training Episodes:  64%|██████▍   | 64/100 [13:11<11:27, 19.10s/it]


Episode 64/100 | Country: France | Total Reward: 0.62 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $149981.29


Training Episodes:  65%|██████▌   | 65/100 [13:27<10:32, 18.08s/it]


Episode 65/100 | Country: Japan | Total Reward: 1.68 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $389981.63


Training Episodes:  66%|██████▌   | 66/100 [13:54<11:45, 20.75s/it]


Episode 66/100 | Country: UK | Total Reward: 3.03 | Avg Loss: 0.0074 | Epsilon: 0.0100 | Final Portfolio: $198.23


Training Episodes:  67%|██████▋   | 67/100 [14:04<09:45, 17.75s/it]


Episode 67/100 | Country: USA | Total Reward: 1.03 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $250638.70


Training Episodes:  68%|██████▊   | 68/100 [14:15<08:18, 15.59s/it]


Episode 68/100 | Country: USA | Total Reward: 1.25 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $310266.13


Training Episodes:  69%|██████▉   | 69/100 [14:35<08:43, 16.88s/it]


Episode 69/100 | Country: India | Total Reward: 2.62 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1044152.71


Training Episodes:  70%|███████   | 70/100 [14:58<09:22, 18.74s/it]


Episode 70/100 | Country: France | Total Reward: 0.01 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $77544.58


Training Episodes:  71%|███████   | 71/100 [15:17<09:05, 18.81s/it]


Episode 71/100 | Country: India | Total Reward: 3.36 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $2217949.12


Training Episodes:  72%|███████▏  | 72/100 [15:36<08:46, 18.81s/it]


Episode 72/100 | Country: India | Total Reward: 2.78 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1178094.84


Training Episodes:  73%|███████▎  | 73/100 [15:46<07:19, 16.29s/it]


Episode 73/100 | Country: USA | Total Reward: 1.18 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $283691.80


Training Episodes:  74%|███████▍  | 74/100 [16:08<07:47, 18.00s/it]


Episode 74/100 | Country: France | Total Reward: 0.92 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $198908.62


Training Episodes:  75%|███████▌  | 75/100 [16:27<07:37, 18.31s/it]


Episode 75/100 | Country: India | Total Reward: 1.10 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $258097.63


Training Episodes:  76%|███████▌  | 76/100 [16:43<07:05, 17.72s/it]


Episode 76/100 | Country: Japan | Total Reward: 1.21 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $250977.90


Training Episodes:  77%|███████▋  | 77/100 [16:54<05:58, 15.57s/it]


Episode 77/100 | Country: USA | Total Reward: 0.85 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $217679.65


Training Episodes:  78%|███████▊  | 78/100 [17:23<07:12, 19.66s/it]


Episode 78/100 | Country: UK | Total Reward: 7.37 | Avg Loss: 0.0087 | Epsilon: 0.0100 | Final Portfolio: $4518.49


Training Episodes:  79%|███████▉  | 79/100 [17:34<05:56, 16.99s/it]


Episode 79/100 | Country: USA | Total Reward: 1.30 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $328998.90


Training Episodes:  80%|████████  | 80/100 [17:45<05:02, 15.11s/it]


Episode 80/100 | Country: USA | Total Reward: 1.03 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $257005.53


Training Episodes:  81%|████████  | 81/100 [18:12<05:56, 18.78s/it]


Episode 81/100 | Country: UK | Total Reward: 17.40 | Avg Loss: 0.0234 | Epsilon: 0.0100 | Final Portfolio: $637433.21


Training Episodes:  82%|████████▏ | 82/100 [18:31<05:39, 18.88s/it]


Episode 82/100 | Country: India | Total Reward: 2.65 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1045470.83


Training Episodes:  83%|████████▎ | 83/100 [18:51<05:25, 19.15s/it]


Episode 83/100 | Country: India | Total Reward: 1.27 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $303910.02


Training Episodes:  84%|████████▍ | 84/100 [19:01<04:24, 16.53s/it]


Episode 84/100 | Country: USA | Total Reward: 0.63 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $172613.40


Training Episodes:  85%|████████▌ | 85/100 [19:18<04:09, 16.61s/it]


Episode 85/100 | Country: Japan | Total Reward: 1.38 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $309542.49


Training Episodes:  86%|████████▌ | 86/100 [19:35<03:53, 16.70s/it]


Episode 86/100 | Country: Japan | Total Reward: 1.44 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $316011.93


Training Episodes:  87%|████████▋ | 87/100 [19:52<03:36, 16.69s/it]


Episode 87/100 | Country: Japan | Total Reward: 1.22 | Avg Loss: 0.0002 | Epsilon: 0.0100 | Final Portfolio: $242255.91


Training Episodes:  88%|████████▊ | 88/100 [20:02<02:58, 14.88s/it]


Episode 88/100 | Country: USA | Total Reward: 0.87 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $219507.89


Training Episodes:  89%|████████▉ | 89/100 [20:25<03:08, 17.17s/it]


Episode 89/100 | Country: France | Total Reward: -0.17 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $68479.86


Training Episodes:  90%|█████████ | 90/100 [20:52<03:22, 20.26s/it]


Episode 90/100 | Country: UK | Total Reward: 15.27 | Avg Loss: 0.0274 | Epsilon: 0.0100 | Final Portfolio: $291311.35


Training Episodes:  91%|█████████ | 91/100 [21:12<03:00, 20.01s/it]


Episode 91/100 | Country: India | Total Reward: 1.29 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $286472.28


Training Episodes:  92%|█████████▏| 92/100 [21:39<02:57, 22.14s/it]


Episode 92/100 | Country: UK | Total Reward: -3.56 | Avg Loss: 0.0014 | Epsilon: 0.0100 | Final Portfolio: $31.23


Training Episodes:  93%|█████████▎| 93/100 [21:52<02:15, 19.42s/it]


Episode 93/100 | Country: Japan | Total Reward: 2.35 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $738575.29


Training Episodes:  94%|█████████▍| 94/100 [22:04<01:43, 17.19s/it]


Episode 94/100 | Country: India | Total Reward: 3.21 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1870586.17


Training Episodes:  95%|█████████▌| 95/100 [22:16<01:17, 15.55s/it]


Episode 95/100 | Country: India | Total Reward: 2.98 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1455290.44


Training Episodes:  96%|█████████▌| 96/100 [22:27<00:57, 14.33s/it]


Episode 96/100 | Country: India | Total Reward: 3.18 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $1803250.95


Training Episodes:  97%|█████████▋| 97/100 [22:33<00:35, 11.94s/it]


Episode 97/100 | Country: USA | Total Reward: 0.91 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $221797.29


Training Episodes:  98%|█████████▊| 98/100 [22:45<00:23, 11.70s/it]


Episode 98/100 | Country: Japan | Total Reward: 0.98 | Avg Loss: 0.0001 | Epsilon: 0.0100 | Final Portfolio: $215342.60


Training Episodes:  99%|█████████▉| 99/100 [23:03<00:13, 13.67s/it]


Episode 99/100 | Country: UK | Total Reward: 3.56 | Avg Loss: 0.0063 | Epsilon: 0.0100 | Final Portfolio: $13554.52


Training Episodes: 100%|██████████| 100/100 [23:10<00:00, 13.90s/it]


Episode 100/100 | Country: USA | Total Reward: 0.92 | Avg Loss: 0.0000 | Epsilon: 0.0100 | Final Portfolio: $236717.10

Training Completed. Best model saved as final_model.pth





# Testing the Saved Model (Returns and Porfolio Visualization)

In [10]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

# Set the device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------------
# DataHandler: Downloads and Prepares Test Data
# -------------------------
class DataHandler:
    def __init__(self):
        from sklearn.preprocessing import MinMaxScaler
        self.scaler = MinMaxScaler()
        
    def fetch_data(self, symbol, start_date, end_date):
        print(f"\nFetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        df = df.copy()
        # Preserve original closing price for portfolio calculations
        df['CloseOrig'] = df['Close'].astype(float)
        
        # Convert to one-dimensional float64 numpy arrays
        close_prices = np.array(df['Close'], dtype=np.float64).flatten()
        volume = np.array(df['Volume'], dtype=np.float64).flatten()
        
        # Compute technical indicators
        df['Returns'] = df['Close'].pct_change()
        df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
        df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
        df['RSI'] = talib.RSI(close_prices, timeperiod=14)
        macd, signal, _ = talib.MACD(close_prices)
        df['MACD'] = macd
        df['MACD_signal'] = signal
        bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
        df['BB_upper'] = bb_upper
        df['BB_middle'] = bb_middle
        df['BB_lower'] = bb_lower
        df['OBV'] = talib.OBV(close_prices, volume)
        df['MOM'] = talib.MOM(close_prices, timeperiod=14)
        
        # Define state representation features (do not scale CloseOrig)
        features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                    'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                    'BB_lower', 'OBV', 'MOM']
        # Fill missing values and normalize
        df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
        df[features] = self.scaler.fit_transform(df[features])
        df = df.dropna()
        print(f"Data shape after preparation: {df.shape}")
        return df

# -------------------------
# Trading Environment for Testing
# -------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=10000):
        if data.empty:
            raise ValueError("Data cannot be empty")
        # Reset index for sequential simulation
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        # Must match training state features
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        if 'CloseOrig' not in self.data.columns:
            raise ValueError("Missing 'CloseOrig' column.")
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # Number of shares held
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Retrieve the unscaled current price for trading decisions.
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]
        
        # Action mapping (5 discrete actions):
        # 0: Sell all, 1: Sell half, 2: Hold
        # 3: Buy with 50% available cash, 4: Buy with full available cash.
        if current_price > 0:
            if action == 0 and self.position > 0:
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= cash_to_use
            elif action == 4 and self.balance > 0:
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
                
        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0
        
        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done

# -------------------------
# DQNAgent: Defines the Network & Action Selection (Greedy for Testing)
# -------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        # Set epsilon=0 for testing (greedy policy)
        self.epsilon = 0.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    
    def act(self, state):
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            action_values = self(state_tensor)
            return torch.argmax(action_values).item()

# -------------------------
# Test Simulation Function: Returns Portfolio History, Actions, and Signals
# -------------------------
def test_agent(agent, env):
    state = env.reset()
    done = False
    actions_taken = []
    signals = []  # record "buy", "sell", or "hold" for each step.
    while not done:
        action = agent.act(state)
        actions_taken.append(action)
        # Define buy signals for actions 3 & 4, sell signals for actions 0 & 1.
        if action in [3, 4]:
            signals.append("buy")
        elif action in [0, 1]:
            signals.append("sell")
        else:
            signals.append("hold")
        state, reward, done = env.step(action)
    return env.portfolio_value_history, actions_taken, signals

# -------------------------
# Plotting Functions
# -------------------------
def plot_portfolio(portfolio_values, signals, title):
    # Prepare markers for buy and sell signals along the portfolio evolution
    buy_indices = [i for i, s in enumerate(signals) if s == "buy"]
    sell_indices = [i for i, s in enumerate(signals) if s == "sell"]
    buy_values = [portfolio_values[i] for i in buy_indices]
    sell_values = [portfolio_values[i] for i in sell_indices]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(range(len(portfolio_values))),
        y=portfolio_values,
        mode='lines+markers',
        name='Portfolio Value'
    ))
    fig.add_trace(go.Scatter(
        x=buy_indices,
        y=buy_values,
        mode='markers',
        name='Buy Signal',
        marker=dict(color='green', size=10, symbol='triangle-up')
    ))
    fig.add_trace(go.Scatter(
        x=sell_indices,
        y=sell_values,
        mode='markers',
        name='Sell Signal',
        marker=dict(color='red', size=10, symbol='triangle-down')
    ))
    fig.update_layout(title=title,
                      xaxis_title='Trading Steps',
                      yaxis_title='Portfolio Value ($)')
    fig.show()

def plot_price_signals(test_data, signals, title):
    # Plot price (CloseOrig) with buy and sell markers.
    # Use the environment's reset data (i.e. a sequential index)
    prices = test_data['CloseOrig'].values
    steps = list(range(len(prices)))
    
    buy_indices = [i for i, s in enumerate(signals) if s == "buy"]
    sell_indices = [i for i, s in enumerate(signals) if s == "sell"]
    buy_prices = [prices[i] for i in buy_indices]
    sell_prices = [prices[i] for i in sell_indices]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=steps,
        y=prices,
        mode='lines',
        name='Price'
    ))
    fig.add_trace(go.Scatter(
        x=buy_indices,
        y=buy_prices,
        mode='markers',
        name='Buy Signal',
        marker=dict(color='green', size=10, symbol='triangle-up')
    ))
    fig.add_trace(go.Scatter(
        x=sell_indices,
        y=sell_prices,
        mode='markers',
        name='Sell Signal',
        marker=dict(color='red', size=10, symbol='triangle-down')
    ))
    fig.update_layout(title=title,
                      xaxis_title='Trading Steps',
                      yaxis_title='Price ($)')
    fig.show()

# -------------------------
# Main Testing Routine
# -------------------------
if __name__ == '__main__':
    try:
        # Dictionary of indexes to test: name -> ticker symbol
        indexes = {
            "USA_S&P500": "^GSPC",        # S&P 500 Index
            "India_Nifty50": "^NSEI",      # Nifty 50 Index
            "Japan_Nikkei225": "^N225",    # Nikkei 225 Index
            "UK_FTSE100": "^FTSE",         # FTSE 100 Index
            "France_CAC40": "^FCHI"        # CAC 40 Index
        }
        # Testing period of one year:
        TEST_START_DATE = '2024-01-01'
        TEST_END_DATE   = '2025-01-01'
        TEST_INITIAL_BALANCE = 10000
        
        # Load the pre-trained model.
        state_size = 12  # Must match state dimensions used in training.
        action_size = 5  # Five discrete actions.
        agent = DQNAgent(state_size, action_size)
        model_path = 'final_model.pth'
        checkpoint = torch.load(model_path, map_location=device)
        agent.load_state_dict(checkpoint)  # Adjust if checkpoint is nested.
        agent.eval()
        print("Pre-trained model loaded successfully.")
        
        # Initialize the DataHandler.
        data_handler = DataHandler()
        
        # Test the model on each index.
        for index_name, ticker in indexes.items():
            try:
                print("\n============================================")
                print(f"Testing on {index_name} ({ticker})")
                test_data = data_handler.fetch_data(ticker, TEST_START_DATE, TEST_END_DATE)
                # Initialize the TradingEnvironment using the prepared data.
                env = TradingEnvironment(test_data, initial_balance=TEST_INITIAL_BALANCE)
                portfolio_history, actions_taken, signals = test_agent(agent, env)
                final_value = portfolio_history[-1]
                return_pct = ((final_value / TEST_INITIAL_BALANCE) - 1) * 100
                print(f"Initial Balance: ${TEST_INITIAL_BALANCE:.2f}")
                print(f"Final Portfolio Value: ${final_value:.2f}")
                print(f"Cumulative Return: {return_pct:.2f}%")
                
                title_portfolio = f"{index_name} Portfolio Evolution (Return: {return_pct:.2f}%)"
                plot_portfolio(portfolio_history, signals, title_portfolio)
                
                title_price = f"{index_name} Price with Buy/Sell Signals"
                # Use the original test_data (not reset) for price plotting with the same sequential order.
                test_data_reset = test_data.reset_index(drop=True)
                plot_price_signals(test_data_reset, signals, title_price)
            except Exception as inner_ex:
                print(f"Error testing on {ticker}: {inner_ex}")
        
    except Exception as e:
        print(f"Error during testing: {e}")
        raise


[*********************100%***********************]  1 of 1 completed

Using device: cuda
Pre-trained model loaded successfully.

Testing on USA_S&P500 (^GSPC)

Fetching data for ^GSPC from 2024-01-01 to 2025-01-01...
Data shape after preparation: (252, 17)
Initial Balance: $10000.00
Final Portfolio Value: $12916.94
Cumulative Return: 29.17%





[*********************100%***********************]  1 of 1 completed


Testing on India_Nifty50 (^NSEI)

Fetching data for ^NSEI from 2024-01-01 to 2025-01-01...
Data shape after preparation: (246, 17)
Initial Balance: $10000.00
Final Portfolio Value: $10871.45
Cumulative Return: 8.71%





[*********************100%***********************]  1 of 1 completed


Testing on Japan_Nikkei225 (^N225)

Fetching data for ^N225 from 2024-01-01 to 2025-01-01...
Data shape after preparation: (245, 17)
Initial Balance: $10000.00
Final Portfolio Value: $13740.45
Cumulative Return: 37.40%





[*********************100%***********************]  1 of 1 completed


Testing on UK_FTSE100 (^FTSE)

Fetching data for ^FTSE from 2024-01-01 to 2025-01-01...
Data shape after preparation: (254, 17)
Initial Balance: $10000.00
Final Portfolio Value: $10602.08
Cumulative Return: 6.02%





[*********************100%***********************]  1 of 1 completed


Testing on France_CAC40 (^FCHI)

Fetching data for ^FCHI from 2024-01-01 to 2025-01-01...
Data shape after preparation: (256, 17)
Initial Balance: $10000.00
Final Portfolio Value: $10109.86
Cumulative Return: 1.10%



