In [1]:
import torch

# Check if CUDA is available, otherwise fall back to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(CustomLSTMModel, self).__init__()
        assert num_layers >= 2, "num_layers must be at least 2 for this architecture"
        
        # Define hidden sizes for each LSTM layer
        self.hidden_size1 = 1024  # Hidden size for the first LSTM layer
        self.hidden_size2 = hidden_size  # Hidden size for the second LSTM layer (512 as given)

        # Initialize the LSTM layers with different hidden sizes
        self.lstm1 = nn.LSTM(input_size, self.hidden_size1, batch_first=True)
        self.lstm2 = nn.LSTM(input_size + self.hidden_size1, self.hidden_size2, batch_first=True)

        # Fully connected layer to map the output of the last LSTM layer to class scores
        self.fc = nn.Linear(self.hidden_size2, num_classes)

    def forward(self, x):
        # Initialize hidden and cell states for both LSTMs
        h0_1 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        c0_1 = torch.zeros(1, x.size(0), self.hidden_size1).to(x.device)
        h0_2 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)
        c0_2 = torch.zeros(1, x.size(0), self.hidden_size2).to(x.device)

        out1, _ = self.lstm1(x, (h0_1, c0_1))
        combined_input = torch.cat((x, out1), dim=2)  

        out2, _ = self.lstm2(combined_input, (h0_2, c0_2))
        final_output = self.fc(out2[:, -1, :])

        final_output = F.softmax(final_output, dim=1)
        
        return final_output

# Parameters and Model Initialization
input_size = 443  # Number of input features
hidden_size = 1024  # Hidden size for the second LSTM layer
num_layers = 2 
num_classes = 100  # Number of output classes / songs

model = CustomLSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

# Print the model structure
print(model)


CustomLSTMModel(
  (lstm1): LSTM(443, 1024, batch_first=True)
  (lstm2): LSTM(1467, 1024, batch_first=True)
  (fc): Linear(in_features=1024, out_features=100, bias=True)
)


In [None]:
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

# Assuming model is already defined and moved to the correct device
# model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss function
criterion = nn.CrossEntropyLoss()
# Optimizer 
optimizer = optim.RMSprop(model.parameters(), lr=0.0001)
num_epochs = 300

train_losses = []
val_losses = []  # Assuming you have a validation dataloader named `val_dataloader`


for epoch in range(num_epochs):  # Loop over the dataset multiple times
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        # Transfer inputs and labels to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Calculate and store average losses for the epoch
    avg_train_loss = running_loss / len(train_dataloader)
    train_losses.append(avg_train_loss)

    # Validation phase
    model.eval()
    with torch.no_grad():
        running_val_loss = 0.0
        for inputs, labels in valid_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()

        avg_val_loss = running_val_loss / len(valid_dataloader)
        val_losses.append(avg_val_loss)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

# Plotting the learning curve
plt.figure(figsize=(10, 5))
plt.plot(np.arange(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(np.arange(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Learning Curve (Epoch = 300 Learning Rate = 0.0001 (100 dataset, 1024)')
plt.legend()
plt.show()


In [None]:
import numpy as np
import torch

model.eval()
model.to(device)
#valid_dataloader
#train_dataloader
# Stores whether the relevant item is in the top k predictions for accuracy calculation
is_relevant_in_top_k = []

# Stores the reciprocal ranks for MRR calculation
reciprocal_ranks = []

with torch.no_grad():
    for inputs, labels in train_dataloader:  # Assuming you want to evaluate on the validation set
        # Forward pass to get the model outputs
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Get the top k predictions for each input
        _, top_k_predictions = outputs.topk(10, 1, True, True)  # Adjust k as needed

        # Check if the true labels are in the top k predictions for accuracy
        matches = top_k_predictions == labels.view(-1, 1)
        is_relevant_in_top_k.extend(matches.any(dim=1).cpu().numpy())

        # Calculate reciprocal rank for MRR
        for i in range(labels.size(0)):
            label = labels[i].item()
            predictions = top_k_predictions[i].cpu().numpy()

            if label in predictions:
                rank = np.where(predictions == label)[0][0] + 1  # Rank is 1-based
                reciprocal_ranks.append(1 / rank)
            else:
                reciprocal_ranks.append(0)

# Calculate Precision at k
accuracy_at_k = np.mean(is_relevant_in_top_k) * 100
print(f'Accuracy at 5: {accuracy_at_k:.2f}%')

# Calculate Mean Reciprocal Rank
mrr = np.mean(reciprocal_ranks)
print(f'Mean Reciprocal Rank (MRR): {mrr:.4f}')


In [None]:
# Define the directory where you want to save the model
save_dir = r'E:\\new_training'
os.makedirs(save_dir, exist_ok=True)  # Create the directory if it doesn't exist

# Save the model state dictionary to the specified directory
model_path = os.path.join(save_dir, 'model_state.pth')
torch.save(model.state_dict(), model_path)

print(f'Model saved to {model_path}')