In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Load data
data = pd.read_csv("./CSV/general_langevin_discrete_10000_75_training.csv").to_numpy()
data = pd.read_csv("./CSV/GLEtt2_10000_100_training.csv").to_numpy()
print(f"Data shape: {data.shape}")

# Define parameters
number_of_bins = int(np.max(data)) + 1
total_number_steps = data.shape[1]
previous_steps = 2

# Prepare features and labels
X = []
Y = []

for i in range(-previous_steps, total_number_steps - previous_steps):
    if i < 0:
        X.append(number_of_bins * np.ones((data.shape[0], previous_steps), dtype=np.int64))
        Y.append(data[:, i + previous_steps])
    else:
        X.append(data[:, i:i + previous_steps])
        Y.append(data[:, i + previous_steps])

X = np.array(X).transpose(1, 0, 2).reshape(-1, previous_steps)
Y = np.array(Y).T.reshape(-1)

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.1, random_state=42)

print(data.shape, number_of_bins)
print(f"X shape: {X.shape}, X_train: {X_train.shape}, X_val: {X_val.shape}")
print(f"Y shape: {Y.shape}, Y_train: {Y_train.shape}, X_val: {Y_val.shape}")

# Convert to PyTorch tensors
X_train = torch.from_numpy(X_train).long()
Y_train = torch.from_numpy(Y_train).long()
X_val = torch.from_numpy(X_val).long()
Y_val = torch.from_numpy(Y_val).long()

class NGramModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_neurons, context_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.fc1 = nn.Linear(context_size * embed_dim, hidden_neurons)
        self.fc2 = nn.Linear(hidden_neurons, vocab_size)
    
    def forward(self, x):
        # x shape: (batch_size, context_size)
        embeds = self.embedding(x)  # (batch_size, context_size, embed_dim)
        embeds = embeds.view(embeds.size(0), -1)  # (batch_size, context_size * embed_dim)
        hidden = torch.tanh(self.fc1(embeds))
        logits = self.fc2(hidden)
        return logits

# Hyperparameters
embed_dim = 10
hidden_neurons = [30,30,100]
batch_size = 200
class NGramModel(nn.Module):
    """Deep feedforward neural network for n-gram modeling"""
    def __init__(self, vocab_size, embed_dim, hidden_layers, context_size, dropout=0.0):
        """
        Args:
            vocab_size: Size of vocabulary
            embed_dim: Dimension of embeddings
            hidden_layers: List of hidden layer sizes, e.g. [200, 100] for 2 hidden layers
            context_size: Number of previous tokens to consider
            dropout: Dropout probability (0 = no dropout)
        """
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        # Build deep network
        layers = []
        input_size = context_size * embed_dim
        
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.Tanh())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            input_size = hidden_size
        
        # Output layer
        layers.append(nn.Linear(input_size, vocab_size))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        # x shape: (batch_size, context_size)
        embeds = self.embedding(x)  # (batch_size, context_size, embed_dim)
        embeds = embeds.view(embeds.size(0), -1)  # (batch_size, context_size * embed_dim)
        logits = self.network(embeds)
        return logits

torch.manual_seed(69)

model = NGramModel(
     vocab_size=number_of_bins + 1,
     embed_dim=embed_dim,
     hidden_layers=[100],  # 3 hidden layers with decreasing size
     context_size=previous_steps,
     dropout=0  # Add dropout for regularization
 )

#model = model.double()  # Use float64 to match TensorFlow

# Initialize with normal distribution (mean=0, std=1)
with torch.no_grad():
    for param in model.parameters():
        param.normal_(mean=0.0, std=1.0)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {total_params}")

# Define loss and optimizers
criterion = nn.CrossEntropyLoss()
learning_rates = [0.1, 0.01, 0.001, 0.0001]
epochs_list = [1]

losses = []
val_losses = []

# Training loop
for lr, num_epochs in zip(learning_rates, epochs_list):
    optimizer = torch.optim.Adamax(model.parameters(), lr=lr)
    
    for epoch in range(1):
        # Training
        model.train()
        batch_idx = torch.randint(0, X_train.size(0), (batch_size,))
        X_batch = X_train[batch_idx]
        Y_batch = Y_train[batch_idx]
        
        # Forward pass
        logits = model(X_batch)
        # TODO: introduce the loss function comparing with Y_batch
        loss =
        losses.append(loss.item())
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Validation loss (compute every 10 epochs to save time)
        if epoch % 10 == 0:
            model.eval()
            with torch.no_grad():
                logits_val = model(X_val)
                loss_val = criterion(logits_val, Y_val)
                val_losses.append(loss_val.item())

# Plot learning curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5))

# Training loss
val_epochs = [i * 10 for i in range(len(val_losses))]
ax1.plot(range(len(losses)), np.log(losses))
ax1.plot(val_epochs, np.log(val_losses), color='orange')
ax1.set_title("Log-Loss for minibatches (Training)")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Log-Loss")
ax1.grid(True)

# Validation loss

plt.tight_layout()
plt.savefig("training_validation_loss.png")
print("Training and validation loss plots saved")

# Evaluate validation loss
with torch.no_grad():
    logits_val = model(X_val)
    loss_val = criterion(logits_val, Y_val)
    print(f"Validation data loss: {loss_val.item()}")


Data shape: (10000, 75)
(10000, 75) 96
X shape: (750000, 2), X_train: (675000, 2), X_val: (75000, 2)
Y shape: (750000,), Y_train: (675000,), X_val: (75000,)
Trainable parameters: 12867
aaa
torch.Size([200, 2]) torch.Size([200])


RuntimeError: Boolean value of Tensor with more than one value is ambiguous