In [68]:
## required libraries ##

In [10]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from datetime import datetime, timedelta
import os
import logging
from entmax import sparsemax


In [11]:
# Set device
# Set up logging

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logging.basicConfig(filename='outliers.log', level=logging.INFO,
                    format='%(asctime)s - %(message)s')

In [13]:
# Data Preprocessing

In [14]:
def load_and_preprocess_data(file_paths, seq_len=10, date_column='Date', feature_columns=['Open', 'Close', 'High', 'Low']):
    dfs = []
    for file in file_paths:
        df = pd.read_excel(file)
        df.columns = df.columns.str.strip().str.lower()
        required_columns = [date_column.lower()] + [col.lower() for col in feature_columns]

        def convert_date(date):
            if isinstance(date, (int, float)):
                base_date = datetime(1899, 12, 30)
                return pd.to_datetime(base_date + timedelta(days=date))
            elif isinstance(date, pd.Timestamp):
                return date
            else:
                return pd.to_datetime(date, errors='coerce')

        df['date'] = df['date'].apply(convert_date)
        df = df.set_index('date')

        for col in feature_columns:
            df[col.lower()] = df[col.lower()].round(2)

        if 'volume' in df.columns:
            df = df[df['volume'] != 0]
            df = df.drop(columns=['volume'])

        df = df[required_columns[1:]].dropna()
        dfs.append(df)

    common_dates = pd.concat([df[['close']] for df in dfs], axis=1).dropna().index
    dfs = [df.loc[common_dates] for df in dfs]

    data = np.stack([df[[col.lower() for col in feature_columns]].values for df in dfs], axis=1)
    sequences = [data[i:i + seq_len] for i in range(len(data) - seq_len + 1)]
    sequences = torch.tensor(np.array(sequences), dtype=torch.float32).to(device)
    print(f"Final sequence tensor shape: {sequences.shape}")
    return sequences

In [15]:
# Dilated Causal Convolution Layer

In [16]:
class DCC(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super(DCC, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,
                              padding=(kernel_size - 1) * dilation, dilation=dilation)
        self.kernel_size = kernel_size
        self.dilation = dilation

    def forward(self, x):
        x = self.conv(x)
        padding = (self.kernel_size - 1) * self.dilation
        x = x[:, :, :-padding] if padding > 0 else x
        return F.relu(x)

In [17]:
# Graph Attention Layer

In [18]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.1):
        super(GATLayer, self).__init__()
        self.Wg = nn.Linear(in_features, out_features, bias=False)
        self.Wa = nn.Linear(2 * in_features, 1, bias=False)
        self.dropout = nn.Dropout(dropout)

    def forward(self, node_features, edge_info):
        batch_size, num_nodes, in_features = node_features.size()
        h = self.Wg(node_features)

        attention = torch.zeros(batch_size, num_nodes, num_nodes, device=node_features.device)
        for i in range(num_nodes):
            for j in range(num_nodes):
                concat_features = torch.cat([node_features[:, i, :], node_features[:, j, :]], dim=-1)
                gate = torch.tanh(self.Wa(concat_features))
                attention[:, i, j] = edge_info[:, i, j] * gate.squeeze()

        attention = F.softmax(attention, dim=-1)
        attention = self.dropout(attention)

        h_prime = torch.bmm(attention, h)
        return F.elu(h_prime)


In [19]:
# Portfolio Optimization Model

In [20]:
class PortfolioOptimization(nn.Module):
    def __init__(self, num_assets, num_features=4, hidden_dim=64, input_seq_len=9):
        super(PortfolioOptimization, self).__init__()
        self.num_assets = num_assets
        self.hidden_dim = hidden_dim
        self.input_seq_len = input_seq_len
        
        self.dcc1 = DCC(num_features, hidden_dim, kernel_size=3, dilation=1)
        self.dcc2 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=2)
        self.dcc3 = DCC(hidden_dim, hidden_dim, kernel_size=3, dilation=4)

        self.Wq = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wk = nn.Linear(num_features * input_seq_len, hidden_dim)
        self.Wv = nn.Linear(num_features * input_seq_len, hidden_dim)

        self.gat = GATLayer(hidden_dim, hidden_dim)

        self.Wr = nn.Linear(hidden_dim, hidden_dim)
        self.We = nn.Linear(hidden_dim, hidden_dim)

        self.dcc_pred = DCC(hidden_dim * 3, num_features, kernel_size=3, dilation=1)

        self.Wf = nn.Linear(hidden_dim * 3, hidden_dim)
        self.conv_policy = nn.Conv1d(hidden_dim, hidden_dim, kernel_size=1)
        self.Wt = nn.Linear(hidden_dim + num_features, hidden_dim)
        self.Ww = nn.Linear(hidden_dim, num_assets)


        self.temperature = nn.Parameter(torch.tensor(.5))  # Learnable scalar


    def forward(self, x, prev_weights=None):
        if x.dim() == 3:
            x = x.unsqueeze(0)
        batch_size, seq_len, num_assets, num_features = x.size()

        x_reshaped = x.permute(0, 2, 3, 1).reshape(batch_size * num_assets, num_features, seq_len)
        fe = self.dcc1(x_reshaped)
        fe = self.dcc2(fe)
        fe = self.dcc3(fe)
        fe = fe[:, :, -1].reshape(batch_size, num_assets, self.hidden_dim)

        patches_flat = x.reshape(batch_size, num_assets, -1)
        q = self.Wq(patches_flat)
        k = self.Wk(patches_flat)
        v = self.Wv(patches_flat)
        attention = torch.bmm(q, k.transpose(1, 2)) / (self.hidden_dim ** 0.5)
        attention = F.softmax(attention, dim=-1)

        fr = self.gat(fe, attention)

        fm = self.Wr(fr) + self.We(fe)
        fm = fm.mean(dim=1, keepdim=True).expand(-1, num_assets, -1)

        f = torch.cat([fe, fr, fm], dim=-1)
        f_reshaped = f.permute(0, 2, 1)
        x_pred = self.dcc_pred(f_reshaped)
        x_pred = x_pred.permute(0, 2, 1)

        f_policy = F.relu(self.conv_policy(self.Wf(f).permute(0, 2, 1))).permute(0, 2, 1)
        f_policy = self.Wt(torch.cat([f_policy, x_pred], dim=-1))
        
        if prev_weights is not None:
            f_policy = f_policy + prev_weights.unsqueeze(-1)
        
        # Select last time step for weight generation
        raw_weights = self.Ww(f_policy[:, -1]) / self.temperature
        raw_scores = self.Ww(f_policy[:, -1])  # [batch_size, num_assets]

        # weights = F.softmax(raw_weights, dim=-1)
        # weights = 0.05 + 0.80 * weights  # 5% min, 95% max per asset
        weights = F.softmax(raw_weights / (self.temperature + 1e-8), dim=-1)  # Add temperature
        weights = 0.10 + 0.60 * weights  # New: 10% min, 70% max (tighter bounds)
        weights = weights / torch.sum(weights, dim=-1, keepdim=True)  # Renormalize

        
        return x_pred, weights


# ------------------------
# Loss Functions
# ------------------------
def prediction_loss(pred, target):
    return F.mse_loss(pred, target)

def transaction_cost(prev_weights, new_weights, cost_rate=0.009):
    if prev_weights is None:
        return torch.zeros(new_weights.size(0), device=new_weights.device)
    return cost_rate * torch.sum(torch.abs(new_weights - prev_weights), dim=-1)
def entropy_loss(weights):
    return -torch.sum((weights+ 1e-6) * torch.log(weights + 1e-8), dim=-1)  # per sample


In [21]:
# Training Loop

In [22]:
def train_model(model, sequences, num_epochs=15, lr=0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    prev_weights = None
    for epoch in range(num_epochs):
        total_pred_loss = 0
        total_reward = 0
        for batch in sequences:
            x_input = batch[:-1].unsqueeze(0)
            target = batch[1:].unsqueeze(0)[:, -1]

            x_pred, weights = model(x_input, prev_weights)
            pred_loss = prediction_loss(x_pred, target)

            relative_prices = target[:, :, 1] / batch[-2, :, 1]
            portfolio_return = torch.sum(relative_prices * weights.squeeze(1), dim=-1)
            transaction_costs = transaction_cost(prev_weights, weights.squeeze(1), cost_rate=0.0003)
            u_t = 1 - transaction_costs
            log_return = torch.log(u_t * portfolio_return + 1e-8)
            entropy = entropy_loss(weights.squeeze(1))


            lambda_entropy = 100 # tune this value
            portfolio_variance = torch.var(relative_prices * weights.squeeze(1))
            lambda_variance = 0.6  # Risk-return tradeoff

            hhi_penalty = torch.sum(weights**2)  # Measures concentration
            lambda_hhi = 1.0  # Strength of HHI penalty

            rl_loss = (-torch.mean(log_return) + lambda_entropy * torch.mean(entropy) 
                       + lambda_variance * portfolio_variance+ lambda_hhi * hhi_penalty)  # New penalty
            #rl_loss = -torch.mean(log_return) + lambda_entropy * torch.mean(entropy) + lambda_variance * portfolio_variance            
            
            lambda_pred = 0.1  # Prediction loss weight
            loss = lambda_pred * pred_loss + rl_loss  # Focus on returns/risk

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_pred_loss += pred_loss.item()
            total_reward += log_return.mean().item()
            prev_weights = weights.detach().squeeze(1)

        print(f"Epoch {epoch + 1}, Pred Loss: {total_pred_loss / len(sequences):.4f}, Avg Return: {total_reward / len(sequences):.4f}")


In [23]:
# Main Execution

In [24]:
def main():
    file_paths = [
        r'training/bpcl1100d.xlsx',
        r'training/hindpetro1100d.xlsx',
        r'training/nmdc1100d.xlsx',
        r'training/ongc1100d.xlsx',
        r'training/irfc1100d.xlsx',
        r'training/ioc1100d.xlsx',

    ]
    seq_len = 10
    sequences = load_and_preprocess_data(file_paths, seq_len=seq_len)

    num_assets = len(file_paths)
    model = PortfolioOptimization(num_assets=num_assets, num_features=4, hidden_dim=128, input_seq_len=seq_len - 1).to(device)
    train_model(model, sequences, num_epochs=5)

    model.eval()
    with torch.no_grad():
        test_input = sequences[0:1, :-1]
        x_pred, weights = model(test_input)
        print("Predicted Prices (Open, Close, High, Low):", x_pred.cpu().numpy())
        print("Portfolio Weights:", weights[0].cpu().numpy())
    torch.save(model.state_dict(), 'rl_portfolio_optimization.pth')
    print("Model saved to rl_portfolio_optimization.pth")
if __name__ == '__main__':
    main()

Final sequence tensor shape: torch.Size([831, 10, 6, 4])
Epoch 1, Pred Loss: 7828.3176, Avg Return: 0.0018
Epoch 2, Pred Loss: 7738.6075, Avg Return: 0.0018
Epoch 3, Pred Loss: 7733.0211, Avg Return: 0.0018
Epoch 4, Pred Loss: 7733.7358, Avg Return: 0.0018
Epoch 5, Pred Loss: 7729.0688, Avg Return: 0.0018
Predicted Prices (Open, Close, High, Low): [[[165.38034    0.       168.20473  164.30557 ]
  [179.40688    0.       183.73047  178.32265 ]
  [ 36.527283   0.        39.276367  37.80002 ]
  [116.40097    0.       119.67396  116.74182 ]
  [ 26.487701   0.        28.22962   27.306604]
  [ 66.38871    0.        68.580635  67.022   ]]]
Portfolio Weights: [0.08333332 0.08333332 0.08333332 0.08333332 0.5833333  0.08333332]
Model saved to rl_portfolio_optimization.pth


Final sequence tensor shape: torch.Size([831, 10, 5, 4])
Epoch 1, Loss: 20.0268, Return: 1.0012, Pred Loss: 200.2791
Epoch 2, Loss: 8.5020, Return: 1.0012, Pred Loss: 85.0305
Epoch 3, Loss: 7.9447, Return: 1.0013, Pred Loss: 79.4588
Epoch 4, Loss: 8.2472, Return: 1.0013, Pred Loss: 82.4830
Epoch 5, Loss: 8.3282, Return: 1.0013, Pred Loss: 83.2936
Predicted Prices: [[188.58778  189.73128  191.99306  187.52914 ]
 [ 45.407887  45.283485  45.406166  45.41267 ]
 [126.42849  126.88731  128.71248  125.734604]
 [ 34.13533   34.83085   35.84842   33.88716 ]
 [ 76.29765   76.8075    78.13028   75.876015]]
Portfolio Weights: [0.6735402  0.07463504 0.06970803 0.08284672 0.09927008]
Model saved to rl_portfolio_optimization.pth
