In [68]:
## required libraries ##

In [10]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from datetime import datetime, timedelta
import os
import logging
from entmax import sparsemax


In [11]:
# Set device
# Set up logging

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logging.basicConfig(filename='outliers.log', level=logging.INFO,
                    format='%(asctime)s - %(message)s')

In [13]:
# Data Preprocessing

In [14]:
def load_and_preprocess_data(file_paths, seq_len=10, date_column='Date', feature_columns=['Open', 'Close', 'High', 'Low']):
    dfs = []
    for file in file_paths:
        df = pd.read_excel(file)
        df.columns = df.columns.str.strip().str.lower()
        required_columns = [date_column.lower()] + [col.lower() for col in feature_columns]

        def convert_date(date):
            if isinstance(date, (int, float)):
                base_date = datetime(1899, 12, 30)
                return pd.to_datetime(base_date + timedelta(days=date))
            elif isinstance(date, pd.Timestamp):
                return date
            else:
                return pd.to_datetime(date, errors='coerce')

        df['date'] = df['date'].apply(convert_date)
        df = df.set_index('date')

        for col in feature_columns:
            df[col.lower()] = df[col.lower()].round(2)

        if 'volume' in df.columns:
            df = df[df['volume'] != 0]
            df = df.drop(columns=['volume'])

        df = df[required_columns[1:]].dropna()
        dfs.append(df)

    common_dates = pd.concat([df[['close']] for df in dfs], axis=1).dropna().index
    dfs = [df.loc[common_dates] for df in dfs]

    data = np.stack([df[[col.lower() for col in feature_columns]].values for df in dfs], axis=1)
    sequences = [data[i:i + seq_len] for i in range(len(data) - seq_len + 1)]
    sequences = torch.tensor(np.array(sequences), dtype=torch.float32).to(device)
    print(f"Final sequence tensor shape: {sequences.shape}")
    return sequences

In [15]:
# Dilated Causal Convolution Layer

In [16]:
class DCC(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super(DCC, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,
                              padding=(kernel_size - 1) * dilation, dilation=dilation)
        self.kernel_size = kernel_size
        self.dilation = dilation

    def forward(self, x):
        x = self.conv(x)
        padding = (self.kernel_size - 1) * self.dilation
        x = x[:, :, :-padding] if padding > 0 else x
        return F.relu(x)

In [17]:
# Graph Attention Layer

In [18]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.1):
        super(GATLayer, self).__init__()
        self.Wg = nn.Linear(in_features, out_features, bias=False)
        self.Wa = nn.Linear(2 * in_features, 1, bias=False)
        self.dropout = nn.Dropout(dropout)

    def forward(self, node_features, edge_info):
        batch_size, num_nodes, in_features = node_features.size()
        h = self.Wg(node_features)

        attention = torch.zeros(batch_size, num_nodes, num_nodes, device=node_features.device)
        for i in range(num_nodes):
            for j in range(num_nodes):
                concat_features = torch.cat([node_features[:, i, :], node_features[:, j, :]], dim=-1)
                gate = torch.tanh(self.Wa(concat_features))
                attention[:, i, j] = edge_info[:, i, j] * gate.squeeze()

        attention = F.softmax(attention, dim=-1)
        attention = self.dropout(attention)

        h_prime = torch.bmm(attention, h)
        return F.elu(h_prime)


In [19]:
# Portfolio Optimization Model

In [20]:
class PortfolioOptimization(nn.Module):
    def __init__(self, num_assets, num_features=4, hidden_dim=64, input_seq_len=9):
        super(PortfolioOptimization, self).__init__()
        self.num_assets = num_assets
        self.hidden_dim = hidden_dim
        self.input_seq_len = input_seq_len
        
        self.dcc1 = DCC(num_features, hidden_dim, kernel_size=3, dilation=1)
        self.dcc2 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=2)
        self.dcc3 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=4)

        self.Wq = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wk = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wv = nn.Linear(num_features * input_seq_len, hidden_dim)

        self.gat = GATLayer(hidden_dim, hidden_dim)

        self.Wr = nn.Linear(hidden_dim, hidden_dim)
        self.We = nn.Linear(hidden_dim, hidden_dim)

        self.dcc_pred = DCC(hidden_dim * 3, num_features, kernel_size=3, dilation=1)

        self.Wf = nn.Linear(hidden_dim * 3, hidden_dim)
        self.conv_policy = nn.Conv1d(hidden_dim, hidden_dim, kernel_size=1)
        self.Wt = nn.Linear(hidden_dim + num_features, hidden_dim)
        self.Ww = nn.Linear(hidden_dim, num_assets)


        self.temperature = nn.Parameter(torch.tensor(.5))  # Learnable scalar


    def forward(self, x, prev_weights=None):
        if x.dim() == 3:
            x = x.unsqueeze(0)
        batch_size, seq_len, num_assets, num_features = x.size()

        x_reshaped = x.permute(0, 2, 3, 1).reshape(batch_size * num_assets, num_features, seq_len)
        fe = self.dcc1(x_reshaped)
        fe = self.dcc2(fe)
        fe = self.dcc3(fe)
        fe = fe[:, :, -1].reshape(batch_size, num_assets, self.hidden_dim)

        patches_flat = x.reshape(batch_size, num_assets, -1)
        q = self.Wq(patches_flat)
        k = self.Wk(patches_flat)
        v = self.Wv(patches_flat)
        attention = torch.bmm(q, k.transpose(1, 2)) / (self.hidden_dim ** 0.5)
        attention = F.softmax(attention, dim=-1)

        fr = self.gat(fe, attention)

        fm = self.Wr(fr) + self.We(fe)
        fm = fm.mean(dim=1, keepdim=True).expand(-1, num_assets, -1)

        f = torch.cat([fe, fr, fm], dim=-1)
        f_reshaped = f.permute(0, 2, 1)
        x_pred = self.dcc_pred(f_reshaped)
        x_pred = x_pred.permute(0, 2, 1)

        f_policy = F.relu(self.conv_policy(self.Wf(f).permute(0, 2, 1))).permute(0, 2, 1)
        f_policy = self.Wt(torch.cat([f_policy, x_pred], dim=-1))
        
        if prev_weights is not None:
            f_policy = f_policy + prev_weights.unsqueeze(-1)
        
        # Select last time step for weight generation
        raw_weights = self.Ww(f_policy[:, -1]) / self.temperature
        raw_scores = self.Ww(f_policy[:, -1])  # [batch_size, num_assets]

        # weights = F.softmax(raw_weights, dim=-1)
        # weights = 0.05 + 0.80 * weights  # 5% min, 95% max per asset
        weights = F.softmax(raw_weights / (self.temperature + 1e-8), dim=-1)  # Add temperature
        weights = 0.10 + 0.60 * weights  # New: 10% min, 70% max (tighter bounds)
        weights = weights / torch.sum(weights, dim=-1, keepdim=True)  # Renormalize

        
        return x_pred, weights


# ------------------------
# Loss Functions
# ------------------------
def prediction_loss(pred, target):
    return F.mse_loss(pred, target)

def transaction_cost(prev_weights, new_weights, cost_rate=0.009):
    if prev_weights is None:
        return torch.zeros(new_weights.size(0), device=new_weights.device)
    return cost_rate * torch.sum(torch.abs(new_weights - prev_weights), dim=-1)
def entropy_loss(weights):
    return -torch.sum((weights+ 1e-6) * torch.log(weights + 1e-8), dim=-1)  # per sample


In [21]:
# Training Loop

In [22]:
def train_model(model, sequences, num_epochs=15, lr=0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    prev_weights = None
    for epoch in range(num_epochs):
        total_pred_loss = 0
        total_reward = 0
        for batch in sequences:
            x_input = batch[:-1].unsqueeze(0)
            target = batch[1:].unsqueeze(0)[:, -1]

            x_pred, weights = model(x_input, prev_weights)
            pred_loss = prediction_loss(x_pred, target)

            relative_prices = target[:, :, 1] / batch[-2, :, 1]
            portfolio_return = torch.sum(relative_prices * weights.squeeze(1), dim=-1)
            transaction_costs = transaction_cost(prev_weights, weights.squeeze(1), cost_rate=0.0003)
            u_t = 1 - transaction_costs
            log_return = torch.log(u_t * portfolio_return + 1e-8)
            entropy = entropy_loss(weights.squeeze(1))


            lambda_entropy = 100 # tune this value
            portfolio_variance = torch.var(relative_prices * weights.squeeze(1))
            lambda_variance = 0.6  # Risk-return tradeoff

            hhi_penalty = torch.sum(weights**2)  # Measures concentration
            lambda_hhi = 1.0  # Strength of HHI penalty

            rl_loss = (-torch.mean(log_return) + lambda_entropy * torch.mean(entropy) 
                       + lambda_variance * portfolio_variance+ lambda_hhi * hhi_penalty)  # New penalty
            #rl_loss = -torch.mean(log_return) + lambda_entropy * torch.mean(entropy) + lambda_variance * portfolio_variance            
            
            lambda_pred = 0.1  # Prediction loss weight
            loss = lambda_pred * pred_loss + rl_loss  # Focus on returns/risk

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_pred_loss += pred_loss.item()
            total_reward += log_return.mean().item()
            prev_weights = weights.detach().squeeze(1)

        print(f"Epoch {epoch + 1}, Pred Loss: {total_pred_loss / len(sequences):.4f}, Avg Return: {total_reward / len(sequences):.4f}")


In [23]:
# Main Execution

In [24]:
def main():
    file_paths = [
        r'training/bpcl1100d.xlsx',
        r'training/hindpetro1100d.xlsx',
        r'training/nmdc1100d.xlsx',
        r'training/ongc1100d.xlsx',
        r'training/irfc1100d.xlsx',
        r'training/ioc1100d.xlsx',

    ]
    seq_len = 10
    sequences = load_and_preprocess_data(file_paths, seq_len=seq_len)

    num_assets = len(file_paths)
    model = PortfolioOptimization(num_assets=num_assets, num_features=4, hidden_dim=128, input_seq_len=seq_len - 1).to(device)
    train_model(model, sequences, num_epochs=5)

    model.eval()
    with torch.no_grad():
        test_input = sequences[0:1, :-1]
        x_pred, weights = model(test_input)
        print("Predicted Prices (Open, Close, High, Low):", x_pred.cpu().numpy())
        print("Portfolio Weights:", weights[0].cpu().numpy())
    torch.save(model.state_dict(), 'rl_portfolio_optimization.pth')
    print("Model saved to rl_portfolio_optimization.pth")
if __name__ == '__main__':
    main()

Final sequence tensor shape: torch.Size([831, 10, 6, 4])
Epoch 1, Pred Loss: 7828.3176, Avg Return: 0.0018
Epoch 2, Pred Loss: 7738.6075, Avg Return: 0.0018
Epoch 3, Pred Loss: 7733.0211, Avg Return: 0.0018
Epoch 4, Pred Loss: 7733.7358, Avg Return: 0.0018
Epoch 5, Pred Loss: 7729.0688, Avg Return: 0.0018
Predicted Prices (Open, Close, High, Low): [[[165.38034    0.       168.20473  164.30557 ]
  [179.40688    0.       183.73047  178.32265 ]
  [ 36.527283   0.        39.276367  37.80002 ]
  [116.40097    0.       119.67396  116.74182 ]
  [ 26.487701   0.        28.22962   27.306604]
  [ 66.38871    0.        68.580635  67.022   ]]]
Portfolio Weights: [0.08333332 0.08333332 0.08333332 0.08333332 0.5833333  0.08333332]
Model saved to rl_portfolio_optimization.pth


In [5]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from datetime import datetime, timedelta
import os
import logging
from entmax import sparsemax  # Make sure you have installed: pip install entmax
import matplotlib.pyplot as plt


import seaborn as sns
sns.set_theme()  # This will set default Seaborn style

plt.style.use('seaborn-v0_8')  # or 'ggplot', 'bmh', etc.
plt.rcParams['figure.figsize'] = (12, 8)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logging.basicConfig(filename='portfolio_optimization.log', level=logging.INFO,
                    format='%(asctime)s - %(message)s')

# Data Preprocessing
def load_and_preprocess_data(file_paths, seq_len=10, date_column='Date', feature_columns=['Open', 'Close', 'High', 'Low']):
    dfs = []
    for file in file_paths:
        df = pd.read_excel(file)
        df.columns = df.columns.str.strip().str.lower()
        required_columns = [date_column.lower()] + [col.lower() for col in feature_columns]

        def convert_date(date):
            if isinstance(date, (int, float)):
                base_date = datetime(1899, 12, 30)
                return pd.to_datetime(base_date + timedelta(days=date))
            elif isinstance(date, pd.Timestamp):
                return date
            else:
                return pd.to_datetime(date, errors='coerce')

        df['date'] = df['date'].apply(convert_date)
        df = df.set_index('date')

        for col in feature_columns:
            df[col.lower()] = df[col.lower()].round(2)

        if 'volume' in df.columns:
            df = df[df['volume'] != 0]
            df = df.drop(columns=['volume'])

        df = df[required_columns[1:]].dropna()
        dfs.append(df)

    common_dates = pd.concat([df[['close']] for df in dfs], axis=1).dropna().index
    dfs = [df.loc[common_dates] for df in dfs]

    data = np.stack([df[[col.lower() for col in feature_columns]].values for df in dfs], axis=1)
    sequences = [data[i:i + seq_len] for i in range(len(data) - seq_len + 1)]
    sequences = torch.tensor(np.array(sequences), dtype=torch.float32).to(device)
    print(f"Final sequence tensor shape: {sequences.shape}")
    return sequences

# Dilated Causal Convolution Layer
class DCC(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super(DCC, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,
                              padding=(kernel_size - 1) * dilation, dilation=dilation)
        self.kernel_size = kernel_size
        self.dilation = dilation

    def forward(self, x):
        x = self.conv(x)
        padding = (self.kernel_size - 1) * self.dilation
        x = x[:, :, :-padding] if padding > 0 else x
        return F.relu(x)

# Graph Attention Layer
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.1):
        super(GATLayer, self).__init__()
        self.Wg = nn.Linear(in_features, out_features, bias=False)
        self.Wa = nn.Linear(2 * in_features, 1, bias=False)
        self.dropout = nn.Dropout(dropout)

    def forward(self, node_features, edge_info):
        batch_size, num_nodes, in_features = node_features.size()
        h = self.Wg(node_features)

        attention = torch.zeros(batch_size, num_nodes, num_nodes, device=node_features.device)
        for i in range(num_nodes):
            for j in range(num_nodes):
                concat_features = torch.cat([node_features[:, i, :], node_features[:, j, :]], dim=-1)
                gate = torch.tanh(self.Wa(concat_features))
                attention[:, i, j] = edge_info[:, i, j] * gate.squeeze()

        attention = F.softmax(attention, dim=-1)
        attention = self.dropout(attention)

        h_prime = torch.bmm(attention, h)
        return F.elu(h_prime)

# Portfolio Optimization Model
class PortfolioOptimization(nn.Module):
    def __init__(self, num_assets, num_features=4, hidden_dim=64, input_seq_len=9):
        super(PortfolioOptimization, self).__init__()
        self.num_assets = num_assets
        self.hidden_dim = hidden_dim
        self.input_seq_len = input_seq_len
        
        # Feature extraction
        self.dcc1 = DCC(num_features, hidden_dim, kernel_size=3, dilation=1)
        self.dcc2 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=2)
        self.dcc3 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=4)

        # Attention mechanism
        self.Wq = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wk = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wv = nn.Linear(num_features * input_seq_len, hidden_dim)

        # Graph attention
        self.gat = GATLayer(hidden_dim, hidden_dim)

        # Feature combination
        self.Wr = nn.Linear(hidden_dim, hidden_dim)
        self.We = nn.Linear(hidden_dim, hidden_dim)

        # Prediction head
        self.dcc_pred = DCC(hidden_dim * 3, num_features, kernel_size=3, dilation=1)

        # Policy head
        self.Wf = nn.Linear(hidden_dim * 3, hidden_dim)
        self.conv_policy = nn.Conv1d(hidden_dim, hidden_dim, kernel_size=1)
        self.Wt = nn.Linear(hidden_dim + num_features, hidden_dim)
        self.Ww = nn.Linear(hidden_dim, num_assets)

        # Allocation parameters
        self.min_weight = 0.05
        self.rank_power = 1.0
        self.temperature = nn.Parameter(torch.tensor(1.0))  # Learnable temperature
        self.price_scale = nn.Parameter(torch.ones(num_features))
        self.price_bias = nn.Parameter(torch.zeros(num_features))

    def forward(self, x, prev_weights=None):
       
        if x.dim() == 3:
            x = x.unsqueeze(0)
        batch_size, seq_len, num_assets, num_features = x.size()
    
        # Feature extraction
        x_reshaped = x.permute(0, 2, 3, 1).reshape(batch_size * num_assets, num_features, seq_len)
        fe = self.dcc1(x_reshaped)
        fe = self.dcc2(fe)
        fe = self.dcc3(fe)
        fe = fe[:, :, -1].reshape(batch_size, num_assets, self.hidden_dim)
    
        # Cross-asset attention
        patches_flat = x.reshape(batch_size, num_assets, -1)
        q = self.Wq(patches_flat)
        k = self.Wk(patches_flat)
        v = self.Wv(patches_flat)
        attention = torch.bmm(q, k.transpose(1, 2)) / (self.hidden_dim ** 0.5)
        attention = F.softmax(attention, dim=-1)
    
        # Graph attention
        fr = self.gat(fe, attention)
    
        # Market context
        fm = self.Wr(fr) + self.We(fe)
        fm = fm.mean(dim=1, keepdim=True).expand(-1, num_assets, -1)
    
        # Combined features
        f = torch.cat([fe, fr, fm], dim=-1)
        f_reshaped = f.permute(0, 2, 1)
        x_pred = self.dcc_pred(f_reshaped)
        x_pred = x_pred.permute(0, 2, 1)
        
        # Ensure positive predictions using modified softplus
        x_pred = F.softplus(x_pred) * 1.5  # Scale to prevent predictions from being too small
    
        # Policy features
        f_policy = F.relu(self.conv_policy(self.Wf(f).permute(0, 2, 1))).permute(0, 2, 1)
        f_policy = self.Wt(torch.cat([f_policy, x_pred], dim=-1))
        
        if prev_weights is not None:
            f_policy = f_policy + prev_weights.unsqueeze(-1)
    
        # Raw score for allocation
        raw_scores = self.Ww(f_policy[:, -1])  # shape: [B, A]
        
        # Get predicted returns from the Close prices (index 1 in features)
        predicted_returns = x_pred[:, :, 1]  # shape: [B, A]
        
        # Combine raw scores with predicted returns for ranking
        combined_scores = raw_scores + predicted_returns.detach()  # Detach to prevent double counting
        
        # Create differentiable rank weights
        ranks = torch.argsort(torch.argsort(combined_scores, dim=1, descending=True))
        rank_weights = 1.0 / (ranks.float() + 1)  # 1/rank weighting
        
        # Normalize rank weights
        rank_weights = rank_weights / rank_weights.sum(dim=1, keepdim=True)
        
        # Apply temperature-scaled sparsemax to combined scores
        scaled_scores = combined_scores / (self.temperature + 1e-8)
        sparse_weights = sparsemax(scaled_scores, dim=-1)
        
        # Blend sparse weights with rank weights (adjust ratio as needed)
        weights = 0.7 * sparse_weights + 0.3 * rank_weights
        
        # Apply min allocation constraint and normalize
        weights = self.min_weight + (1.0 - self.min_weight * num_assets) * weights
        weights = weights / weights.sum(dim=1, keepdim=True)
        
        return x_pred, weights
# Training Loop
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

# Add this after the imports
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (12, 8)

# Modified Training Loop with Visualization
def train_model(model, sequences, num_epochs=15, lr=0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    prev_weights = None
    
    # Initialize tracking variables
    train_losses = []
    train_returns = []
    pred_losses = []
    weight_entropies = []
    max_weights = []
    min_weights = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        epoch_return = 0
        epoch_pred_loss = 0
        epoch_entropy = 0
        epoch_max_weight = 0
        epoch_min_weight = 0

        for batch in sequences:
            optimizer.zero_grad()
            
            x_input = batch[:-1].unsqueeze(0)
            target = batch[1:].unsqueeze(0)[:, -1]

            x_pred, weights = model(x_input, prev_weights)

            # Calculate metrics
            with torch.no_grad():
                # Price prediction loss
                pred_loss = F.mse_loss(x_pred, target)
                
                # Portfolio metrics
                price_ratios = target[:, :, 1] / batch[-2, :, 1]  # Close-to-close returns
                portfolio_return = torch.sum(price_ratios * weights, dim=-1)
                
                # Transaction costs
                if prev_weights is not None:
                    turnover = torch.sum(torch.abs(weights - prev_weights), dim=-1)
                    transaction_costs = 0.0003 * turnover
                    net_return = portfolio_return - transaction_costs
                else:
                    net_return = portfolio_return
                
                # Weight statistics
                entropy = -torch.sum(weights * torch.log(weights + 1e-8), dim=-1).mean()
                max_weight = weights.max(dim=-1)[0].mean()
                min_weight = weights.min(dim=-1)[0].mean()

            # Combined loss
            return_loss = -torch.mean(torch.log(net_return + 1e-6))
            loss = return_loss + 0.1 * pred_loss
            
            loss.backward()
            optimizer.step()

            # Accumulate metrics
            epoch_loss += loss.item()
            epoch_return += torch.mean(net_return).item()
            epoch_pred_loss += pred_loss.item()
            epoch_entropy += entropy.item()
            epoch_max_weight += max_weight.item()
            epoch_min_weight += min_weight.item()
            
            prev_weights = weights.detach()

        # Store epoch metrics
        n_batches = len(sequences)
        train_losses.append(epoch_loss / n_batches)
        train_returns.append(epoch_return / n_batches)
        pred_losses.append(epoch_pred_loss / n_batches)
        weight_entropies.append(epoch_entropy / n_batches)
        max_weights.append(epoch_max_weight / n_batches)
        min_weights.append(epoch_min_weight / n_batches)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"  Total Loss: {train_losses[-1]:.4f}")
        print(f"  Avg Return: {train_returns[-1]:.4f}")
        print(f"  Pred Loss: {pred_losses[-1]:.4f}")
        print(f"  Weight Entropy: {weight_entropies[-1]:.4f}")
        print(f"  Max Weight: {max_weights[-1]:.4f}, Min Weight: {min_weights[-1]:.4f}")

    # Plotting functions
    def plot_metrics():
        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 15))
        
        # Loss curves
        ax1.plot(train_losses, label='Total Loss', color='tab:blue')
        ax1.plot(pred_losses, label='Prediction Loss', color='tab:orange', linestyle='--')
        ax1.set_title('Training Losses')
        ax1.set_ylabel('Loss')
        ax1.legend()
        ax1.grid(True)
        ax1.xaxis.set_major_locator(MaxNLocator(integer=True))
        
        # Returns
        ax2.plot(train_returns, label='Portfolio Return', color='tab:green')
        ax2.set_title('Portfolio Returns')
        ax2.set_ylabel('Return')
        ax2.grid(True)
        ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
        
        # Weight statistics
        ax3.plot(max_weights, label='Max Weight', color='tab:red')
        ax3.plot(min_weights, label='Min Weight', color='tab:purple')
        ax3.plot(weight_entropies, label='Weight Entropy', color='tab:brown')
        ax3.set_title('Weight Distribution Metrics')
        ax3.set_xlabel('Epoch')
        ax3.set_ylabel('Value')
        ax3.legend()
        ax3.grid(True)
        ax3.xaxis.set_major_locator(MaxNLocator(integer=True))
        
        plt.tight_layout()
        plt.savefig('training_metrics.png')
        plt.show()
    
    # Plot final weights for last batch
    def plot_final_weights(weights):
        assets = ['HINDPETRO', 'NMDC', 'ONGC', 'IRFC', 'IOC']
        weights = weights[0].cpu().numpy()
        
        plt.figure(figsize=(10, 6))
        bars = plt.bar(assets, weights, color='skyblue')
        plt.title('Final Portfolio Weights')
        plt.ylabel('Allocation Percentage')
        plt.ylim(0, 1)
        
        # Add value labels
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:.2%}',
                    ha='center', va='bottom')
        
        plt.grid(axis='y', linestyle='--')
        plt.savefig('final_weights.png')
        plt.show()
    
    # Generate plots
    plot_metrics()
    
    # Plot weights from last batch
    with torch.no_grad():
        test_input = sequences[0:1, :-1]
        _, final_weights = model(test_input)
        plot_final_weights(final_weights)
    
    return {
        'losses': train_losses,
        'returns': train_returns,
        'pred_losses': pred_losses,
        'entropies': weight_entropies,
        'max_weights': max_weights,
        'min_weights': min_weights
    }

# Modified Main Execution
def main():
    file_paths = [
        r'training/hindpetro1100d.xlsx',
        r'training/nmdc1100d.xlsx',
        r'training/ongc1100d.xlsx',
        r'training/irfc1100d.xlsx',
        r'training/ioc1100d.xlsx'
    ]

    seq_len = 10
    sequences = load_and_preprocess_data(file_paths, seq_len=seq_len)
    num_assets = len(file_paths)

    model = PortfolioOptimization(num_assets=num_assets, num_features=4, hidden_dim=128, input_seq_len=seq_len-1).to(device)
    
    # Train and get metrics
    metrics = train_model(model, sequences, num_epochs=5)  # Increased epochs for better plots
    
    model.eval()
    with torch.no_grad():
        test_input = sequences[0:1, :-1]
        x_pred, weights = model(test_input)
        print("\nFinal Predictions and Weights:")
        print("Predicted Prices:", x_pred[0].cpu().numpy())
        print("Portfolio Weights:", weights[0].cpu().numpy())

    torch.save(model.state_dict(), 'rl_portfolio_optimization.pth')
    print("\nModel saved to rl_portfolio_optimization.pth")

    # Plot predicted prices
    def plot_predicted_prices(preds):
        assets = ['HINDPETRO', 'NMDC', 'ONGC', 'IRFC', 'IOC']
        features = ['Open', 'Close', 'High', 'Low']
        preds = preds[0].cpu().numpy()
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        axes = axes.flatten()
        
        for i, (ax, feature) in enumerate(zip(axes, features)):
            ax.bar(assets, preds[:, i], color=plt.cm.tab20(i))
            ax.set_title(f'Predicted {feature} Prices')
            ax.set_ylabel('Price')
            ax.grid(True)
            
            # Add value labels
            for j, asset in enumerate(assets):
                ax.text(j, preds[j, i], f'{preds[j, i]:.2f}',
                       ha='center', va='bottom')
        
        plt.tight_layout()
        plt.savefig('predicted_prices.png')
        plt.show()
    
    plot_predicted_prices(x_pred)

if __name__ == '__main__':
    main()

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)