# Data Preprocessing

In [9]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import random
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



def load_and_shuffle_mnist():
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5, ))])
    mnist_train = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    mnist_test = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

    train_data = list(mnist_train)
    test_data = list(mnist_test)

    random.shuffle(train_data)
    random.shuffle(test_data)

    return train_data, test_data

In [10]:
def create_sequences(mnist_data, window_size):
    sequences = []
    labels = []

    for i in range(len(mnist_data) - window_size + 1):
        window = mnist_data[i:i + window_size]
        sequence_images = torch.stack([img for img, _ in window])

        # Default label is 'null' (using a specific number to represent 'null', e.g., 10)
        label = 10  # Assuming 10 represents 'null'

        # Check for the condition in the last window_size elements
        last_digit = window[-1][1]
        for _, prev_label in window[:-1]:
            if prev_label == last_digit:
                label = last_digit  # Label is the digit itself if condition is met
                break

        sequences.append(sequence_images)
        labels.append(label)

    return sequences, labels

In [11]:
class MNISTSequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels  # Labels are already numerical, no need for a map

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        # Ensure label is a tensor of dtype torch.long
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)
        return sequence, label_tensor

# LSMT over CNN Definition

In [12]:
class LSTMOverCNN(nn.Module):

    """A CNN is used to process each image in a sequence and the extracted features are passed to the LSTM,
    which, therefore, processes sequences of CNN-extracted features."""

    def __init__(self, hidden_size, num_classes):
        super(LSTMOverCNN, self).__init__()

        # The CNN architecture is a standard one consisting of two
        # convolutional layers, each followed by ReLU and max pooling.
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Compute the output size after the CNN. This is necessary to set the input size of the LSTM correctly.
        cnn_output_size = self._get_cnn_output_size()

        # LSTM part
        self.lstm = nn.LSTM(cnn_output_size, hidden_size, batch_first=True)

        # Fully connected layer for classification
        self.fc = nn.Linear(hidden_size, num_classes)

        # Dropout layer
        self.dropout = nn.Dropout(0.2)

    def _get_cnn_output_size(self):
        # Dummy pass to compute CNN output size
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, 28, 28)
            dummy_output = self.cnn(dummy_input)
            return dummy_output.view(1, -1).size(1)

    def forward(self, x):
        # x shape: (batch_size, sequence_length, channels, height, width)
        batch_size, sequence_length, _, _, _ = x.size()

        # Process each frame with the CNN
        cnn_features = []
        for t in range(sequence_length):
            # Extract features for each image in the sequence
            img_features = self.cnn(x[:, t, :, :, :])
            img_features = img_features.view(batch_size, -1)
            cnn_features.append(img_features)
        cnn_features = torch.stack(cnn_features, dim=1)

        # LSTM output
        lstm_out, _ = self.lstm(cnn_features)
        lstm_out = self.dropout(lstm_out[:, -1, :])  # Apply dropout to the output of the last time step

        # Classifier
        out = self.fc(lstm_out)

        return out




In [13]:
def balanced_sampling(mnist_data, size, num_classes=11):
    print('balanced_sampling')
    # Group data by labels
    grouped_data = {label: [] for label in range(num_classes)}
    for img, label in mnist_data:
        grouped_data[label].append((img, label))

    # Calculate the number of samples per class
    samples_per_class = size // num_classes

    # Initialize list for sampled data
    sampled_data = []

    for label, data in grouped_data.items():
        data_len = len(data)
        if data_len == 0:
            print(f"Warning: No data for class {label}. Skipping this class.")
            continue
        if data_len >= samples_per_class:
            sampled_data.extend(random.sample(data, samples_per_class))
        else:
            # Efficiently replicate data to meet the required number of samples
            repeats = samples_per_class // data_len
            remainder = samples_per_class % data_len
            sampled_data.extend(data * repeats + random.sample(data, remainder))

    # Shuffle the final dataset using Python's random.shuffle for compatibility
    random.shuffle(sampled_data)
    print('Done sampling.')
    return sampled_data


# Train and Testing without batches

In [14]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import random
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report, f1_score


def train_model(model, train_data, criterion, optimizer, num_epochs, test_data, eval_interval=100):
    logger = {'time': {}, 'loss': {}, 'F1': {}, 'Accuracy': {}}

    for epoch in range(num_epochs):
        epoch_start_time = time.time()

        for i, (sequence, label) in enumerate(train_data):
            # Convert sequence and label to tensors if they are not already
            if not isinstance(sequence, torch.Tensor):
                sequence = torch.tensor(sequence, dtype=torch.float32)
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            # Forward pass
            outputs = model(sequence.unsqueeze(0))
            loss = criterion(outputs, label.unsqueeze(0))

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            if i % 1000 == 0:
                logger['time'][i] = time.time() - epoch_start_time
                logger['loss'][i] = loss.item()

                # Evaluate model performance on test data
                f1_score_value, accuracy = evaluate_model(model, test_data)
                logger['F1'][i] = f1_score_value
                logger['Accuracy'][i] = accuracy

                print(f"Epoch {epoch + 1}, Step {i}, Loss: {loss.item()}, F1: {f1_score_value}, Accuracy: {accuracy}, Time Elapsed: {time.time() - epoch_start_time} seconds")

        epoch_end_time = time.time()
        print(f"Epoch {epoch + 1} completed. Time: {epoch_end_time - epoch_start_time}")

        # Print detailed logger information
        print("Logger", logger)
        
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import numpy as np

def evaluate_model(model, test_data):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for sequence, label in test_data:
            # Ensure label is a tensor
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            outputs = model(sequence.unsqueeze(0))
            _, predicted = torch.max(outputs.data, 1)
            
            all_predictions.append(predicted.item())
            all_labels.append(label.item())

    # Calculate confusion matrix and metrics
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    accuracy = np.trace(conf_matrix) / np.sum(conf_matrix)
    f1_score_value = f1_score(all_labels, all_predictions, average='macro')

    # Print classification report
    print(classification_report(all_labels, all_predictions))

    return f1_score_value, accuracy

# Load and shuffle MNIST dataset
train_data, test_data = load_and_shuffle_mnist()

# Set LSTM parameters
input_size = 784  # 28x28
hidden_size = 128
num_classes = 11  # Digits 0-9 and 'null'
num_epochs = 5
batch_size = 100
learning_rate = 0.001
window_size = 10  # Example window size

# Create model
# model = SimpleLSTM(input_size, hidden_size, num_classes)
model = LSTMOverCNN(hidden_size, num_classes)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Example of training with a single data point
for size in [32000]:
    print(f"Training with dataset size: {size}")
    sampled_train_data = balanced_sampling(train_data, size)
    sampled_test_data = balanced_sampling(test_data, size // 2)

    train_sequences, train_labels = create_sequences(sampled_train_data, window_size)
    test_sequences, test_labels = create_sequences(sampled_test_data, window_size)

    # Convert sequences and labels into a list of tuples for easier iteration
    train_data_tuples = list(zip(train_sequences, train_labels))
    test_data_tuples = list(zip(test_sequences, test_labels))

    train_model(model, train_data_tuples, criterion, optimizer, num_epochs, test_data_tuples)


Training with dataset size: 32000
balanced_sampling
Done sampling.
balanced_sampling
Done sampling.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       877
           1       0.00      0.00      0.00       880
           2       0.00      0.00      0.00       901
           3       0.00      0.00      0.00       871
           4       0.00      0.00      0.00       897
           5       0.00      0.00      0.00       879
           6       0.00      0.00      0.00       892
           7       0.06      1.00      0.12       897
           8       0.00      0.00      0.00       885
           9       0.00      0.00      0.00       902
          10       0.00      0.00      0.00      5650

    accuracy                           0.06     14531
   macro avg       0.01      0.09      0.01     14531
weighted avg       0.00      0.06      0.01     14531

Epoch 1, Step 0, Loss: 2.280029296875, F1: 0.010571098592877177, Accuracy: 0.06173009428119194, Time Elapsed: 172.87274074554443 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.56      0.92      0.70       877
           1       0.60      0.96      0.74       880
           2       0.61      0.38      0.47       901
           3       0.60      0.31      0.41       871
           4       0.58      0.90      0.70       897
           5       0.57      0.57      0.57       879
           6       0.62      0.04      0.07       892
           7       0.60      0.74      0.66       897
           8       0.80      0.01      0.02       885
           9       0.00      0.00      0.00       902
          10       0.38      0.50      0.43      5650

    accuracy                           0.49     14531
   macro avg       0.54      0.48      0.43     14531
weighted avg       0.49      0.49      0.43     14531

Epoch 1, Step 1000, Loss: 1.410060167312622, F1: 0.4341099744373069, Accuracy: 0.4863395499277407, Time Elapsed: 400.2401030063629 seconds
              precision    recall  f1-score   su