In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
from train_basic import normalize_and_init_dataset
from data_loader import get_training_dataloader

In [118]:
class SimpleMLP(nn.Module):
    def __init__(self, seq_len, d_model):
        super().__init__()

        self.mlp = nn.Sequential(
            nn.Linear(seq_len * d_model, 200),
            nn.ReLU(),
            nn.Linear(200, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
        )

    def forward(self, x, targets=None):
        logits = self.mlp(torch.flatten(x, start_dim=1, end_dim=2))

        return logits


In [3]:
train_dataset_merged_file = f"v1_merged_training_users_data_tw10ms.pickle"
val_dataset_merged_file = f"v1_merged_validation_users_data_tw10ms.pickle"
test_dataset_merged_file = f"v1_merged_test_users_data_tw10ms.pickle"


# Loading the preprocessed merged objects
with open(test_dataset_merged_file, "rb") as infile:
    train_dataset = pickle.load(infile)

# with open(val_dataset_merged_file, "rb") as infile:
#     val_dataset = pickle.load(infile)

# with open(test_dataset_merged_file, "rb") as infile:
#     test_dataset = pickle.load(infile)


print("Starting Normalization 1")
# Normalizing the datasets
train_sequences, train_user_ids, train_user_to_indices = normalize_and_init_dataset(train_dataset, screen_dim_x=0, screen_dim_y=0, split="train")
# val_sequences, val_user_ids, val_user_to_indices = normalize_and_init_dataset(val_dataset, screen_dim_x=0, screen_dim_y=0, split="val")
# test_sequences, test_user_ids, test_user_to_indices = normalize_and_init_dataset(test_dataset, screen_dim_x=0, screen_dim_y=0, split="val")
print("Normalization Complete")

print("Train sequences", len(train_user_ids))

Starting Normalization 1
Normalization Complete
Train sequences 36634


In [4]:
# Identifying Device
device = "cpu"
if torch.cuda.is_available(): # GPU
    device="cuda"
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): # Apple Silicon
    device="mps"
print("Device: ", device)

Device:  cuda


In [7]:
import numpy as np

X = train_sequences
y = list(train_user_ids)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

In [140]:
len(X), len(X_train), len(X_test), set(y_test)

(36634, 29307, 7327, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

In [129]:
from data_loader import get_validation_dataloader

training_dataloader = get_validation_dataloader(X_train, y_train, 32, 200, 0)
test_dataloader = get_validation_dataloader(X_test, y_test, 32, 200, 0)

In [130]:
model = SimpleMLP(200, 46)
model.to(device)
num_of_parameters = sum(p.numel() for p in model.parameters())
print("Num. of parameters", num_of_parameters)

Num. of parameters 1861310


In [131]:
lr = 1e-3
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

for _ in range(200):
    for step, batch in enumerate(training_dataloader):
        optimizer.zero_grad() # Zeroing the gradients
        sequences = batch['sequences'].to(device) # (batch_size (B), sequence_length (T), embedding size (C))
        labels = batch['user_ids'].to(device) # User IDs (batch_size (B))

        # print("Sequences", sequences)
        # print("Labels", labels)
        logits = model(x=sequences, targets=labels)

        loss = F.cross_entropy(logits, target=labels)
        # print("Loss", loss)

        # Backprop
        loss.backward()

        # Updating the weights
        # determine and set the learning rate for this iteration
        for param_group in optimizer.param_groups: # Setting the learning rate in the optimizer
            param_group['lr'] = lr
        optimizer.step() # Updating the weights

        print(f"step {step} | lr: {lr} | loss: {loss.item():.6f} | ")
        print("--")



step 0 | lr: 0.001 | loss: 2.264046 | 
--
step 1 | lr: 0.001 | loss: 2.251728 | 
--
step 2 | lr: 0.001 | loss: 2.126432 | 
--
step 3 | lr: 0.001 | loss: 2.223337 | 
--
step 4 | lr: 0.001 | loss: 2.017024 | 
--
step 5 | lr: 0.001 | loss: 2.219315 | 
--
step 6 | lr: 0.001 | loss: 2.021766 | 
--
step 7 | lr: 0.001 | loss: 2.230006 | 
--
step 8 | lr: 0.001 | loss: 1.967548 | 
--
step 9 | lr: 0.001 | loss: 1.951293 | 
--
step 10 | lr: 0.001 | loss: 1.842406 | 
--
step 11 | lr: 0.001 | loss: 1.847314 | 
--
step 12 | lr: 0.001 | loss: 1.716030 | 
--
step 13 | lr: 0.001 | loss: 1.650763 | 
--
step 14 | lr: 0.001 | loss: 1.822456 | 
--
step 15 | lr: 0.001 | loss: 1.887107 | 
--
step 16 | lr: 0.001 | loss: 1.865128 | 
--
step 17 | lr: 0.001 | loss: 1.813435 | 
--
step 18 | lr: 0.001 | loss: 1.520197 | 
--
step 19 | lr: 0.001 | loss: 1.548204 | 
--
step 20 | lr: 0.001 | loss: 1.508279 | 
--
step 21 | lr: 0.001 | loss: 1.422719 | 
--
step 22 | lr: 0.001 | loss: 1.892697 | 
--
step 23 | lr: 0.001 |

KeyboardInterrupt: 

In [132]:
with torch.no_grad():
    model.eval()
    correct = 0
    total = 0
    for step, batch in enumerate(training_dataloader):
        sequences = batch['sequences'].to(device) # (batch_size (B), sequence_length (T), embedding size (C))
        labels = batch['user_ids'].to(device) # User IDs (batch_size (B))

        # print("Sequences", sequences)
        # print("Labels", labels)
        logits = model(x=sequences, targets=labels)

        probs = F.softmax(logits, dim=1)
        
        preds = probs.argmax(dim=1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

    print(f"Train Accuracy: {(correct/total):.4f}")
    model.train()

Train Accuracy: 1.0000


In [133]:
with torch.no_grad():
    model.eval()
    correct = 0
    total = 0
    for step, batch in enumerate(test_dataloader):
        sequences = batch['sequences'].to(device) # (batch_size (B), sequence_length (T), embedding size (C))
        labels = batch['user_ids'].to(device) # User IDs (batch_size (B))

        # print("Sequences", sequences)
        # print("Labels", labels)
        logits = model(x=sequences, targets=labels)

        probs = F.softmax(logits, dim=1)
        
        preds = probs.argmax(dim=1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

    print(f"Test Accuracy: {(correct/total):.4f}")
    model.train()

Test Accuracy: 0.9394


In [21]:
losses = []
with torch.no_grad():
    for step, batch in enumerate(training_dataloader):
        optimizer.zero_grad() # Zeroing the gradients
        sequences = batch['sequences'].to(device) # (batch_size (B), sequence_length (T), embedding size (C))
        labels = batch['user_ids'].to(device) # User IDs (batch_size (B))
        modality_mask = batch['modality_mask'].to(device) # (B,T, 2)

        emb, loss = model(x=sequences, targets=labels)

        losses.append(loss.detach().cpu())


print("Validation Loss: ", torch.mean(torch.stack(losses), dim=0).item())

Validation Loss:  2.002769947052002


In [None]:
len(train_user_ids), len(train_sequences), len(set(train_user_ids))

(431356, 431356, 99, list)

In [8]:
import numpy as np

X = [a.flatten() for a in train_sequences]
y = list(train_user_ids)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)


In [9]:
len(X_train), len(y_train),  len(X_test), len(y_test), len(set(y_test))

(345084, 345084, 86272, 86272, 99)

In [15]:
type(X_train), type(X_train[0]), X_train[0].dtype

(list, numpy.ndarray, dtype('float32'))

In [10]:


from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', max_iter=1000)
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))


MemoryError: Unable to allocate 23.7 GiB for an array with shape (345084, 9200) and data type float64