### Importing Necessary Libraries

In [5]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

### Using CUDA/GPU

In [None]:
# Set device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {torch.cuda.get_device_name(device)}")


### DataHandler Class

In [None]:
# ---------------------------
# DataHandler Class
# ---------------------------
class DataHandler:
    def __init__(self):
        from sklearn.preprocessing import MinMaxScaler
        self.scaler = MinMaxScaler()
    
    def fetch_data(self, symbol, start_date, end_date):
        print(f"Fetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        df = df.copy()
        # Preserve the original closing price for portfolio simulation
        df['CloseOrig'] = df['Close'].astype(float)
        
        # Convert columns explicitly to one-dimensional arrays (to avoid TA-Lib errors)
        close_prices = np.array(df['Close'], dtype=np.float64).flatten()
        volume = np.array(df['Volume'], dtype=np.float64).flatten()
        
        # Compute technical indicators
        df['Returns'] = df['Close'].pct_change()
        df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
        df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
        df['RSI'] = talib.RSI(close_prices, timeperiod=14)
        macd, signal, _ = talib.MACD(close_prices)
        df['MACD'] = macd
        df['MACD_signal'] = signal
        bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
        df['BB_upper'] = bb_upper
        df['BB_middle'] = bb_middle
        df['BB_lower'] = bb_lower
        df['OBV'] = talib.OBV(close_prices, volume)
        df['MOM'] = talib.MOM(close_prices, timeperiod=14)
        
        # List of features to normalize (do not scale CloseOrig)
        features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                    'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 
                    'BB_lower', 'OBV', 'MOM']
        df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
        df[features] = self.scaler.fit_transform(df[features])
        df = df.dropna()
        print(f"Prepared data shape for {df.index[-1]} rows and {len(df.columns)} columns.")
        return df
    
    def fetch_multiple_data(self, symbols, start_date, end_date):
        data_dict = {}
        for symbol in symbols:
            try:
                data = self.fetch_data(symbol, start_date, end_date)
                data_dict[symbol] = data
            except Exception as e:
                print(f"Error fetching {symbol}: {e}")
        return data_dict


### Trading Env Class

In [None]:
# ---------------------------
# Trading Environment Class
# ---------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=100000):
        if data is None or data.empty:
            raise ValueError("Data cannot be empty")
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        # Features used as state (these are the normalized technical indicators)
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI',
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        # "CloseOrig" is used for actual portfolio value calculation.
        if 'CloseOrig' not in self.data.columns:
            raise ValueError("Missing 'CloseOrig' column in data.")
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # shares held
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Retrieve actual price from the unscaled "CloseOrig"
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]
        # Action space (5 discrete actions):
        # 0: Sell all, 1: Sell half, 2: Hold, 3: Buy with 50% cash, 4: Buy with full cash.
        if current_price > 0:
            if action == 0 and self.position > 0:
                # Sell all
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:
                # Sell half
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:
                # Buy with 50% cash
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= cash_to_use
            elif action == 4 and self.balance > 0:
                # Buy with full cash
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
            # Action 2 (Hold) does nothing.
        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0
        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done


### DQN Agent Class

In [None]:
# ---------------------------
# DQNAgent Class
# ---------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0  # initial exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32

        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            action_values = self(state_tensor)
            return torch.argmax(action_values).item()

    def replay(self):
        if len(self.memory) < self.batch_size:
            return 0
        minibatch = random.sample(self.memory, self.batch_size)
        states = torch.FloatTensor([m[0] for m in minibatch]).to(device)
        actions = torch.LongTensor([m[1] for m in minibatch]).to(device)
        rewards = torch.FloatTensor([m[2] for m in minibatch]).to(device)
        next_states = torch.FloatTensor([m[3] for m in minibatch]).to(device)
        dones = torch.FloatTensor([m[4] for m in minibatch]).to(device)
        current_q = self(states).gather(1, actions.unsqueeze(1)).squeeze()
        with torch.no_grad():
            next_q = self(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * self.gamma * next_q
        loss = self.criterion(current_q, target_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        return loss.item()


### Main file Component

In [None]:
# ---------------------------
# Aggregate Data Function
# ---------------------------
def aggregate_data(data_dict):
    """
    Given a dictionary of DataFrames (keyed by symbol), find the common date
    index and average the features.
    """
    common_index = None
    for df in data_dict.values():
        if common_index is None:
            common_index = df.index
        else:
            common_index = common_index.intersection(df.index)
    reindexed = []
    for symbol, df in data_dict.items():
        reindexed.append(df.loc[common_index])
    concatenated = pd.concat(reindexed, axis=1, keys=data_dict.keys())
    # Average features across symbols; this applies to every column including CloseOrig.
    aggregated = concatenated.groupby(axis=1, level=1).mean()
    return aggregated

# ---------------------------
# Training Function
# ---------------------------
def train_agent(agent, training_datasets, episodes):
    training_losses = []
    best_reward = float('-inf')
    
    # Training proceeds across episodes.
    # In each episode we randomly choose one aggregated dataset (i.e. one country) 
    # and simulate a complete trading episode over its available historical period.
    for episode in tqdm(range(episodes), desc="Training Episodes"):
        # Randomly choose one country's aggregated data
        country, data = random.choice(list(training_datasets.items()))
        env = TradingEnvironment(data, initial_balance=100000)
        state = env.reset()
        total_reward = 0
        episode_losses = []
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            loss = agent.replay()
            if loss:
                episode_losses.append(loss)
            state = next_state
            total_reward += reward
            if done:
                avg_loss = np.mean(episode_losses) if episode_losses else 0
                training_losses.append(avg_loss)
                if total_reward > best_reward:
                    best_reward = total_reward
                    torch.save(agent.state_dict(), 'final_model.pth')
                print(f"\nEpisode {episode+1}/{episodes} | Country: {country} | Total Reward: {total_reward:.2f} | Avg Loss: {avg_loss:.4f} | Epsilon: {agent.epsilon:.4f} | Final Portfolio: ${env.portfolio_value_history[-1]:.2f}")
                break
    return training_losses

# ---------------------------
# Main Routine for Training on Multiple Indexes
# ---------------------------
if __name__ == '__main__':
    try:
        # Define training period with a larger dataset
        TRAIN_START_DATE = '1995-01-01'
        TRAIN_END_DATE   = '2024-01-01'
        EPISODES = 100  # adjust number of episodes as needed
        
        # Dictionary of indexes and their top 10 companies (tickers)
        index_companies = {
            "USA": ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'GOOGL', 'META', 'TSLA', 'BRK-B', 'UNH', 'XOM'],
            "India": ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS', 'KOTAKBANK.NS', 'LT.NS', 'AXISBANK.NS', 'ITC.NS', 'HINDUNILVR.NS'],
            "Japan": ['7203.T', '6758.T', '9984.T', '8306.T', '6902.T', '9432.T', '7267.T', '7974.T', '6501.T', '8801.T'],
            "UK": ['HSBA.L', 'BP.L', 'VOD.L', 'GSK.L', 'RIO.L', 'BT-A.L', 'ULVR.L', 'DGE.L', 'AZN.L', 'BATS.L'],
            "France": ['OR.PA', 'MC.PA', 'SAN.PA', 'AI.PA', 'BNP.PA', 'DG.PA', 'EN.PA', 'RI.PA', 'KER.PA', 'SU.PA']
        }
        
        # ... rest of your training code remains unchanged ...
        
        data_handler = DataHandler()
        training_datasets = {}
        
        # For each country/index, fetch data for its top companies and aggregate.
        for country, tickers in index_companies.items():
            print(f"\nProcessing {country} data:")
            company_data = data_handler.fetch_multiple_data(tickers, TRAIN_START_DATE, TRAIN_END_DATE)
            if company_data:
                agg_data = aggregate_data(company_data)
                training_datasets[country] = agg_data
            else:
                print(f"No valid data fetched for {country}.")
        
        if not training_datasets:
            raise ValueError("No aggregated training data available from any index!")
        
        # All training datasets must have the expected technical indicator columns.
        # We assume each aggregated DataFrame contains at least the following columns:
        # ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 'BB_lower', 'OBV', 'MOM', 'CloseOrig']
        
        # Initialize the training environment parameters are set within the episode loop.
        state_size = 12  # corresponds to the feature vector (excluding CloseOrig)
        action_size = 5  # five possible actions
        
        # Initialize the DQN agent
        agent = DQNAgent(state_size, action_size)
        
        print("\nStarting training on multiple indexes...")
        training_losses = train_agent(agent, training_datasets, EPISODES)
        
        print("\nTraining Completed. Best model saved as final_model.pth")
    
    except Exception as e:
        print(f"Error during training: {e}")
        raise


# Testing Phase

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

# Set the device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------------
# DataHandler: Downloads and Prepares Test Data
# -------------------------
class DataHandler:
    def __init__(self):
        from sklearn.preprocessing import MinMaxScaler
        self.scaler = MinMaxScaler()
        
    def fetch_data(self, symbol, start_date, end_date):
        print(f"\nFetching data for {symbol} from {start_date} to {end_date}...")
        df = yf.download(symbol, start=start_date, end=end_date)
        if df.empty:
            raise ValueError(f"No data found for {symbol}")
        return self.prepare_data(df)
    
    def prepare_data(self, df):
        df = df.copy()
        # Preserve original closing price for portfolio calculations
        df['CloseOrig'] = df['Close'].astype(float)
        
        # Convert to one-dimensional float64 numpy arrays
        close_prices = np.array(df['Close'], dtype=np.float64).flatten()
        volume = np.array(df['Volume'], dtype=np.float64).flatten()
        
        # Compute technical indicators
        df['Returns'] = df['Close'].pct_change()
        df['SMA_20'] = talib.SMA(close_prices, timeperiod=20)
        df['EMA_20'] = talib.EMA(close_prices, timeperiod=20)
        df['RSI'] = talib.RSI(close_prices, timeperiod=14)
        macd, signal, _ = talib.MACD(close_prices)
        df['MACD'] = macd
        df['MACD_signal'] = signal
        bb_upper, bb_middle, bb_lower = talib.BBANDS(close_prices)
        df['BB_upper'] = bb_upper
        df['BB_middle'] = bb_middle
        df['BB_lower'] = bb_lower
        df['OBV'] = talib.OBV(close_prices, volume)
        df['MOM'] = talib.MOM(close_prices, timeperiod=14)
        
        # Define state representation features (do not scale CloseOrig)
        features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                    'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                    'BB_lower', 'OBV', 'MOM']
        # Fill missing values and normalize
        df[features] = df[features].fillna(method='ffill').fillna(method='bfill')
        df[features] = self.scaler.fit_transform(df[features])
        df = df.dropna()
        print(f"Data shape after preparation: {df.shape}")
        return df

# -------------------------
# Trading Environment for Testing
# -------------------------
class TradingEnvironment:
    def __init__(self, data, initial_balance=10000):
        if data.empty:
            raise ValueError("Data cannot be empty")
        # Reset index for sequential simulation
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        # Must match training state features
        self.features = ['Close', 'Returns', 'SMA_20', 'EMA_20', 'RSI', 
                         'MACD', 'MACD_signal', 'BB_upper', 'BB_middle',
                         'BB_lower', 'OBV', 'MOM']
        if 'CloseOrig' not in self.data.columns:
            raise ValueError("Missing 'CloseOrig' column.")
        self.reset()
    
    def reset(self):
        self.balance = self.initial_balance
        self.position = 0.0  # Number of shares held
        self.current_step = 0
        self.portfolio_value_history = [self.initial_balance]
        return self._get_state()
    
    def _get_state(self):
        return self.data.iloc[self.current_step][self.features].values
    
    def step(self, action):
        # Retrieve the unscaled current price for trading decisions.
        current_price = float(self.data.iloc[self.current_step]['CloseOrig'])
        prev_value = self.portfolio_value_history[-1]
        
        # Action mapping (5 discrete actions):
        # 0: Sell all, 1: Sell half, 2: Hold
        # 3: Buy with 50% available cash, 4: Buy with full available cash.
        if current_price > 0:
            if action == 0 and self.position > 0:
                self.balance += current_price * self.position
                self.position = 0.0
            elif action == 1 and self.position > 0:
                shares_to_sell = self.position * 0.5
                self.balance += current_price * shares_to_sell
                self.position -= shares_to_sell
            elif action == 3 and self.balance > 0:
                cash_to_use = self.balance * 0.5
                shares_to_buy = cash_to_use / current_price
                self.position += shares_to_buy
                self.balance -= cash_to_use
            elif action == 4 and self.balance > 0:
                shares_to_buy = self.balance / current_price
                self.position += shares_to_buy
                self.balance = 0
                
        new_value = self.balance + (self.position * current_price)
        self.portfolio_value_history.append(new_value)
        
        reward = (new_value - prev_value) / prev_value if prev_value > 0 else 0
        
        self.current_step += 1
        done = (self.current_step >= len(self.data) - 1)
        next_state = self._get_state() if not done else np.zeros(len(self.features))
        return next_state, reward, done

# -------------------------
# DQNAgent: Defines the Network & Action Selection (Greedy for Testing)
# -------------------------
class DQNAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQNAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        # Set epsilon=0 for testing (greedy policy)
        self.epsilon = 0.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 32
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, action_size)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()
        self.to(device)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    
    def act(self, state):
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            action_values = self(state_tensor)
            return torch.argmax(action_values).item()

# -------------------------
# Test Simulation Function: Returns Portfolio History, Actions, and Signals
# -------------------------
def test_agent(agent, env):
    state = env.reset()
    done = False
    actions_taken = []
    signals = []  # record "buy", "sell", or "hold" for each step.
    while not done:
        action = agent.act(state)
        actions_taken.append(action)
        # Define buy signals for actions 3 & 4, sell signals for actions 0 & 1.
        if action in [3, 4]:
            signals.append("buy")
        elif action in [0, 1]:
            signals.append("sell")
        else:
            signals.append("hold")
        state, reward, done = env.step(action)
    return env.portfolio_value_history, actions_taken, signals

# -------------------------
# Plotting Functions
# -------------------------
def plot_portfolio(portfolio_values, signals, title):
    # Prepare markers for buy and sell signals along the portfolio evolution
    buy_indices = [i for i, s in enumerate(signals) if s == "buy"]
    sell_indices = [i for i, s in enumerate(signals) if s == "sell"]
    buy_values = [portfolio_values[i] for i in buy_indices]
    sell_values = [portfolio_values[i] for i in sell_indices]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(range(len(portfolio_values))),
        y=portfolio_values,
        mode='lines+markers',
        name='Portfolio Value'
    ))
    fig.add_trace(go.Scatter(
        x=buy_indices,
        y=buy_values,
        mode='markers',
        name='Buy Signal',
        marker=dict(color='green', size=10, symbol='triangle-up')
    ))
    fig.add_trace(go.Scatter(
        x=sell_indices,
        y=sell_values,
        mode='markers',
        name='Sell Signal',
        marker=dict(color='red', size=10, symbol='triangle-down')
    ))
    fig.update_layout(title=title,
                      xaxis_title='Trading Steps',
                      yaxis_title='Portfolio Value ($)')
    fig.show()

def plot_price_signals(test_data, signals, title):
    # Plot price (CloseOrig) with buy and sell markers.
    # Use the environment's reset data (i.e. a sequential index)
    prices = test_data['CloseOrig'].values
    steps = list(range(len(prices)))
    
    buy_indices = [i for i, s in enumerate(signals) if s == "buy"]
    sell_indices = [i for i, s in enumerate(signals) if s == "sell"]
    buy_prices = [prices[i] for i in buy_indices]
    sell_prices = [prices[i] for i in sell_indices]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=steps,
        y=prices,
        mode='lines',
        name='Price'
    ))
    fig.add_trace(go.Scatter(
        x=buy_indices,
        y=buy_prices,
        mode='markers',
        name='Buy Signal',
        marker=dict(color='green', size=10, symbol='triangle-up')
    ))
    fig.add_trace(go.Scatter(
        x=sell_indices,
        y=sell_prices,
        mode='markers',
        name='Sell Signal',
        marker=dict(color='red', size=10, symbol='triangle-down')
    ))
    fig.update_layout(title=title,
                      xaxis_title='Trading Steps',
                      yaxis_title='Price ($)')
    fig.show()

# -------------------------
# Main Testing Routine
# -------------------------
if __name__ == '__main__':
    try:
        # Dictionary of indexes to test: name -> ticker symbol
        indexes = {
            "USA_S&P500": "^GSPC",        # S&P 500 Index
            "India_Nifty50": "^NSEI",      # Nifty 50 Index
            "Japan_Nikkei225": "^N225",    # Nikkei 225 Index
            "UK_FTSE100": "^FTSE",         # FTSE 100 Index
            "France_CAC40": "^FCHI"        # CAC 40 Index
        }
        # Testing period of one year:
        TEST_START_DATE = '2024-01-01'
        TEST_END_DATE   = '2025-01-01'
        TEST_INITIAL_BALANCE = 10000
        
        # Load the pre-trained model.
        state_size = 12  # Must match state dimensions used in training.
        action_size = 5  # Five discrete actions.
        agent = DQNAgent(state_size, action_size)
        model_path = 'final_model.pth'
        checkpoint = torch.load(model_path, map_location=device)
        agent.load_state_dict(checkpoint)  # Adjust if checkpoint is nested.
        agent.eval()
        print("Pre-trained model loaded successfully.")
        
        # Initialize the DataHandler.
        data_handler = DataHandler()
        
        # Test the model on each index.
        for index_name, ticker in indexes.items():
            try:
                print("\n============================================")
                print(f"Testing on {index_name} ({ticker})")
                test_data = data_handler.fetch_data(ticker, TEST_START_DATE, TEST_END_DATE)
                # Initialize the TradingEnvironment using the prepared data.
                env = TradingEnvironment(test_data, initial_balance=TEST_INITIAL_BALANCE)
                portfolio_history, actions_taken, signals = test_agent(agent, env)
                final_value = portfolio_history[-1]
                return_pct = ((final_value / TEST_INITIAL_BALANCE) - 1) * 100
                print(f"Initial Balance: ${TEST_INITIAL_BALANCE:.2f}")
                print(f"Final Portfolio Value: ${final_value:.2f}")
                print(f"Cumulative Return: {return_pct:.2f}%")
                
                title_portfolio = f"{index_name} Portfolio Evolution (Return: {return_pct:.2f}%)"
                plot_portfolio(portfolio_history, signals, title_portfolio)
                
                title_price = f"{index_name} Price with Buy/Sell Signals"
                # Use the original test_data (not reset) for price plotting with the same sequential order.
                test_data_reset = test_data.reset_index(drop=True)
                plot_price_signals(test_data_reset, signals, title_price)
            except Exception as inner_ex:
                print(f"Error testing on {ticker}: {inner_ex}")
        
    except Exception as e:
        print(f"Error during testing: {e}")
        raise
