In [3]:
!pip install transformers tqdm scikit-learn

import gzip
import random
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizer, DistilBertModel, AdamW
from tqdm import tqdm
import warnings





In [4]:
import pandas as pd

def load_checkins_txt(file_path):
    """
    Load Gowalla check-in data from a plain text file.
    """
    # Column names based on Gowalla data description
    columns = ["user_id", "check_in_time", "latitude", "longitude", "location_id"]
    data = pd.read_csv(file_path, sep="\t", names=columns)
    # Convert timestamp to datetime for easier manipulation
    data["check_in_time"] = pd.to_datetime(data["check_in_time"])
    return data

checkins_file = "/kaggle/input/raw-gowalla-checkins/Gowalla_totalCheckins.txt"  
gowalla_data = load_checkins_txt(checkins_file)
print(gowalla_data.head())


   user_id             check_in_time   latitude  longitude  location_id
0        0 2010-10-19 23:55:27+00:00  30.235909 -97.795140        22847
1        0 2010-10-18 22:17:43+00:00  30.269103 -97.749395       420315
2        0 2010-10-17 23:42:03+00:00  30.255731 -97.763386       316637
3        0 2010-10-17 19:26:05+00:00  30.263418 -97.757597        16516
4        0 2010-10-16 18:50:42+00:00  30.274292 -97.740523      5535878


In [5]:
def add_spatio_temporal_features(df, lat_multiplier=10, lon_multiplier=10):
    """
    Given a DataFrame with columns:
      - check_in_time (datetime)
      - latitude, longitude (floats)
    This function adds columns:
      - day_of_week
      - hour_of_day
      - lat_bucket
      - lon_bucket
    """
    df["day_of_week"] = df["check_in_time"].dt.dayofweek
    df["hour_of_day"] = df["check_in_time"].dt.hour
    
    # Example: Multiply lat/lon by 10, then floor to convert to integer buckets
    df["lat_bucket"] = (df["latitude"] * lat_multiplier).astype(int)
    df["lon_bucket"] = (df["longitude"] * lon_multiplier).astype(int)
    
    return df

gowalla_data = add_spatio_temporal_features(gowalla_data)
print(gowalla_data.head())


   user_id             check_in_time   latitude  longitude  location_id  \
0        0 2010-10-19 23:55:27+00:00  30.235909 -97.795140        22847   
1        0 2010-10-18 22:17:43+00:00  30.269103 -97.749395       420315   
2        0 2010-10-17 23:42:03+00:00  30.255731 -97.763386       316637   
3        0 2010-10-17 19:26:05+00:00  30.263418 -97.757597        16516   
4        0 2010-10-16 18:50:42+00:00  30.274292 -97.740523      5535878   

   day_of_week  hour_of_day  lat_bucket  lon_bucket  
0            1           23         302        -977  
1            0           22         302        -977  
2            6           23         302        -977  
3            6           19         302        -977  
4            5           18         302        -977  


In [6]:
def construct_trajectories(data, time_window="1D"):
    """
    Group check-ins into user trajectories based on a time window.
    time_window is a Pandas time frequency string (e.g., '1D' for daily).
    """
    trajectories = []

    grouped = data.groupby("user_id")
    for user_id, group in grouped:
        group = group.sort_values("check_in_time")
        # Identify new trajectory if gap > time_window
        group["trajectory_id"] = (group["check_in_time"].diff() > pd.Timedelta(time_window)).cumsum()

        for traj_id, traj_group in group.groupby("trajectory_id"):
            trajectory = {
                "user_id": user_id,
                "trajectory_id": traj_id,
                "timestamps": traj_group["check_in_time"].tolist(),
                "locations": traj_group["location_id"].tolist(),
                "day_of_week": traj_group["day_of_week"].tolist(),
                "hour_of_day": traj_group["hour_of_day"].tolist(),
                "lat_bucket": traj_group["lat_bucket"].tolist(),
                "lon_bucket": traj_group["lon_bucket"].tolist()
            }
            trajectories.append(trajectory)

    return pd.DataFrame(trajectories)

trajectories_df = construct_trajectories(gowalla_data, time_window="1D")
trajectories_df.head()


Unnamed: 0,user_id,trajectory_id,timestamps,locations,day_of_week,hour_of_day,lat_bucket,lon_bucket
0,0,0,"[2010-05-22 02:49:04+00:00, 2010-05-22 17:50:5...","[608105, 8977, 18574, 17269, 1161876, 1163401,...","[5, 5, 5, 6, 6, 6, 6]","[2, 17, 19, 16, 17, 22, 23]","[302, 392, 389, 390, 390, 390, 390]","[-977, -947, -946, -945, -945, -945, -945]"
1,0,1,[2010-05-26 14:16:56+00:00],[21714],[2],[14],[302],[-977]
2,0,2,[2010-05-27 22:39:52+00:00],[420315],[3],[22],[302],[-977]
3,0,3,[2010-05-30 06:17:57+00:00],[9073],[6],[6],[377],[-1223]
4,0,4,"[2010-05-31 20:10:37+00:00, 2010-06-01 14:38:2...","[18417, 480992, 15326, 420315]","[0, 1, 2, 2]","[20, 14, 0, 15]","[302, 302, 302, 302]","[-977, -977, -977, -977]"


In [7]:
def encode_pois(trajectories):
    """
    Encode POIs into unique numeric IDs.
    """
    all_locations = set(loc for traj in trajectories["locations"] for loc in traj)
    location_mapping = {loc: idx for idx, loc in enumerate(all_locations)}

    trajectories["encoded_locations"] = trajectories["locations"].apply(
        lambda locs: [location_mapping[loc] for loc in locs]
    )
    return trajectories, location_mapping

trajectories_df, poi_mapping = encode_pois(trajectories_df)
print(trajectories_df.head())

# trajectories_df.to_csv("gowalla_trajectories_spatiotemp.csv", index=False)



   user_id  trajectory_id                                         timestamps  \
0        0              0  [2010-05-22 02:49:04+00:00, 2010-05-22 17:50:5...   
1        0              1                        [2010-05-26 14:16:56+00:00]   
2        0              2                        [2010-05-27 22:39:52+00:00]   
3        0              3                        [2010-05-30 06:17:57+00:00]   
4        0              4  [2010-05-31 20:10:37+00:00, 2010-06-01 14:38:2...   

                                           locations            day_of_week  \
0  [608105, 8977, 18574, 17269, 1161876, 1163401,...  [5, 5, 5, 6, 6, 6, 6]   
1                                            [21714]                    [2]   
2                                           [420315]                    [3]   
3                                             [9073]                    [6]   
4                     [18417, 480992, 15326, 420315]           [0, 1, 2, 2]   

                   hour_of_day              

In [4]:
# loaded saved preprocessed trajectories
trajectories_df = pd.read_csv("/kaggle/input/spatio/gowalla_trajectories_spatiotemp.csv")

In [6]:
print(trajectories_df.head())

   user_id  trajectory_id                                         timestamps  \
0        0              0  [Timestamp('2010-05-22 02:49:04+0000', tz='UTC...   
1        0              1  [Timestamp('2010-05-26 14:16:56+0000', tz='UTC')]   
2        0              2  [Timestamp('2010-05-27 22:39:52+0000', tz='UTC')]   
3        0              3  [Timestamp('2010-05-30 06:17:57+0000', tz='UTC')]   
4        0              4  [Timestamp('2010-05-31 20:10:37+0000', tz='UTC...   

                                           locations            day_of_week  \
0  [608105, 8977, 18574, 17269, 1161876, 1163401,...  [5, 5, 5, 6, 6, 6, 6]   
1                                            [21714]                    [2]   
2                                           [420315]                    [3]   
3                                             [9073]                    [6]   
4                     [18417, 480992, 15326, 420315]           [0, 1, 2, 2]   

                   hour_of_day              

In [7]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 201)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

train_df, val_df = train_test_split(filtered_df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)


Number of selected users: 201


In [8]:
def build_mixed_trajectory(loc_ids, days, hours, lat_buckets, lon_buckets):
    """
    Convert each check-in to a single token that includes 
    (POI, day_of_week, hour_of_day, lat_bucket, lon_bucket).
    e.g., "POI123_DAY2_H14_LAT39_LON116"
    """
    tokens = []
    for loc_id, d, h, latb, lonb in zip(loc_ids, days, hours, lat_buckets, lon_buckets):
        token = f"POI{loc_id}_DAY{d}_H{h}_LAT{latb}_LON{lonb}"
        tokens.append(token)
    return " ".join(tokens)


In [9]:
class SpatioTemporalGowallaDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=128):
        """
        df should contain:
          - encoded_locations (list of location IDs)
          - day_of_week (list of day indices)
          - hour_of_day (list of hour indices)
          - lat_bucket (list of lat bucket ints)
          - lon_bucket (list of lon bucket ints)
          - label (integer-encoded user ID)
        """
        self.df = df
        self.tokenizer = tokenizer
        self.max_length = max_length

        self.labels = df["label"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        loc_ids = row["encoded_locations"]
        days = row["day_of_week"]
        hours = row["hour_of_day"]
        latb = row["lat_bucket"]
        lonb = row["lon_bucket"]

        # Build spatio-temporal token string
        trajectory_str = build_mixed_trajectory(loc_ids, days, hours, latb, lonb)

        # Tokenize
        inputs = self.tokenizer(
            trajectory_str,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors="pt",
        )

        label = self.labels[idx]

        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'label': torch.tensor(label, dtype=torch.long)
        }


In [10]:
# Instantiate the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Create dataset objects
train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=128)
val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=128)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits




tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



In [11]:
# Initialize model
model = TrajectoryDistilBERT(num_users)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer and loss
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]



In [13]:
def compute_metrics(outputs, labels, top_ks=[1, 5]):
    metrics = {}
    with torch.no_grad():
        for k in top_ks:
            _, topk_indices = torch.topk(outputs, k=k, dim=1)
            correct_topk = 0
            for i in range(labels.size(0)):
                if labels[i].item() in topk_indices[i]:
                    correct_topk += 1
            metrics[f"ACC@{k}"] = correct_topk / labels.size(0)

    # For F1 and classification report
    _, preds = torch.max(outputs, dim=1)
    metrics["top1_preds"] = preds
    return metrics

from sklearn.metrics import classification_report

def evaluate_model(model, loader, device, loss_fn):
    model.eval()
    total_loss = 0.0
    all_preds = []
    all_labels = []
    acc_metrics = {"ACC@1": 0, "ACC@5": 0}
    total_samples = 0

    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            batch_size = labels.size(0)
            total_loss += loss.item() * batch_size

            batch_metrics = compute_metrics(outputs, labels)
            acc_metrics["ACC@1"] += batch_metrics["ACC@1"] * batch_size
            acc_metrics["ACC@5"] += batch_metrics["ACC@5"] * batch_size

            all_preds.extend(batch_metrics["top1_preds"].cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_samples += batch_size

    avg_loss = total_loss / total_samples
    acc_metrics["ACC@1"] /= total_samples
    acc_metrics["ACC@5"] /= total_samples

    classification_metrics = classification_report(all_labels, all_preds, output_dict=True)
    return avg_loss, acc_metrics, classification_metrics


In [14]:
def train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=5):
    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        train_iter = tqdm(train_loader, desc=f"Epoch {epoch+1} [Training]", leave=False)
        
        for batch in train_iter:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            train_iter.set_description(f"Epoch {epoch+1} [Training] loss: {loss.item():.4f}")

        # Evaluate on train and val sets
        train_loss, train_acc_metrics, train_classification_metrics = evaluate_model(model, train_loader, device, loss_fn)
        val_loss, val_acc_metrics, val_classification_metrics = evaluate_model(model, val_loader, device, loss_fn)

        # Print epoch summary
        print(f"\nEpoch {epoch+1}:")
        print(f"  Train Loss: {train_loss:.4f}, Train ACC@1: {train_acc_metrics['ACC@1']:.4f}, ACC@5: {train_acc_metrics['ACC@5']:.4f}")
        print(f"  Val   Loss: {val_loss:.4f},   Val ACC@1:   {val_acc_metrics['ACC@1']:.4f}, ACC@5: {val_acc_metrics['ACC@5']:.4f}")
        print(f"  Val Macro F1: {val_classification_metrics['macro avg']['f1-score']:.4f}, Weighted F1: {val_classification_metrics['weighted avg']['f1-score']:.4f}")


In [14]:
# epochs = 1
# train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


In [21]:
filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].copy()
filtered_df.reset_index(drop=True, inplace=True)

label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])


# Grid search for finetuning

In [16]:
def run_experiment(
    dropout_rate, 
    lr, 
    batch_size, 
    max_length,
    train_df,
    val_df,
    epochs=5,
    device='cuda'
):
    # 1. Recreate datasets/loaders for each batch_size/max_length
    train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=max_length)
    val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=max_length)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # 2. Initialize the model with dropout
    num_users = len(label_encoder.classes_)
    model = TrajectoryDistilBERT(num_users, dropout_rate=dropout_rate)
    model.to(device)

    # 3. Create optimizer and loss
    optimizer = AdamW(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    # 4. Train
    train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)

    # 5. Evaluate final metrics
    final_loss, acc_metrics, cls_report = evaluate_model(model, val_loader, device, loss_fn)
    
    # 6. Return  metrics
    return {
        "final_loss": final_loss,
        "ACC@1": acc_metrics["ACC@1"],
        "ACC@5": acc_metrics["ACC@5"],
        "macro_f1": cls_report["macro avg"]["f1-score"]
    }


In [17]:
selected_users_df = gowalla_data[gowalla_data["user_id"].isin(selected_users)].copy()

# Label-encode user_id again in this filtered dataframe
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
selected_users_df["label"] = label_encoder.fit_transform(selected_users_df["user_id"])


In [18]:
import itertools

param_grid = {
    "dropout_rate": [0.0, 0.1],
    "lr": [2e-5, 3e-5],
    "batch_size": [16, 32],
    "max_length": [64, 128]
}

keys = list(param_grid.keys())
values = list(param_grid.values())

results = []
for combo in itertools.product(*values):  # All combos
    params = dict(zip(keys, combo))  
    print(f"\nRunning experiment with {params}")

    metrics = run_experiment(
        dropout_rate=params["dropout_rate"],
        lr=params["lr"],
        batch_size=params["batch_size"],
        max_length=params["max_length"],
        train_df=train_df,  
        val_df=val_df,
        epochs=5,  # or 10
        device=device
    )

    # Store the results
    all_results = {**params, **metrics}
    results.append(all_results)

    print(f"Metrics => {metrics}")



Running experiment with {'dropout_rate': 0.0, 'lr': 2e-05, 'batch_size': 16, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.1250, Train ACC@1: 0.1978, ACC@5: 0.4458
  Val   Loss: 4.2373,   Val ACC@1:   0.1603, ACC@5: 0.4129
  Val Macro F1: 0.0356, Weighted F1: 0.0884


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.4463, Train ACC@1: 0.5603, ACC@5: 0.8157
  Val   Loss: 2.6533,   Val ACC@1:   0.5261, ACC@5: 0.7666
  Val Macro F1: 0.2645, Weighted F1: 0.4264


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.7834, Train ACC@1: 0.6449, ACC@5: 0.8841
  Val   Loss: 2.0427,   Val ACC@1:   0.5993, ACC@5: 0.8380
  Val Macro F1: 0.3647, Weighted F1: 0.5264


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4565, Train ACC@1: 0.6924, ACC@5: 0.9185
  Val   Loss: 1.7789,   Val ACC@1:   0.6394, ACC@5: 0.8659
  Val Macro F1: 0.4252, Weighted F1: 0.5823


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2561, Train ACC@1: 0.7194, ACC@5: 0.9333
  Val   Loss: 1.6061,   Val ACC@1:   0.6551, ACC@5: 0.8815
  Val Macro F1: 0.4397, Weighted F1: 0.6013


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.6060781981471524, 'ACC@1': 0.6550522648083623, 'ACC@5': 0.8815331010452961, 'macro_f1': 0.43970781067216286}

Running experiment with {'dropout_rate': 0.0, 'lr': 2e-05, 'batch_size': 16, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.8998, Train ACC@1: 0.3490, ACC@5: 0.5329
  Val   Loss: 4.0305,   Val ACC@1:   0.3084, ACC@5: 0.4686
  Val Macro F1: 0.0791, Weighted F1: 0.1895


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.3189, Train ACC@1: 0.5900, ACC@5: 0.8296
  Val   Loss: 2.5346,   Val ACC@1:   0.5296, ACC@5: 0.7770
  Val Macro F1: 0.2887, Weighted F1: 0.4378


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.7303, Train ACC@1: 0.6462, ACC@5: 0.9007
  Val   Loss: 1.9898,   Val ACC@1:   0.5923, ACC@5: 0.8641
  Val Macro F1: 0.3574, Weighted F1: 0.5172


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4335, Train ACC@1: 0.6932, ACC@5: 0.9233
  Val   Loss: 1.7342,   Val ACC@1:   0.6307, ACC@5: 0.8815
  Val Macro F1: 0.4155, Weighted F1: 0.5641


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2277, Train ACC@1: 0.7198, ACC@5: 0.9373
  Val   Loss: 1.5743,   Val ACC@1:   0.6446, ACC@5: 0.8955
  Val Macro F1: 0.4339, Weighted F1: 0.5825


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.5742824110835272, 'ACC@1': 0.6445993031358885, 'ACC@5': 0.8954703832752613, 'macro_f1': 0.43394296843707886}

Running experiment with {'dropout_rate': 0.0, 'lr': 2e-05, 'batch_size': 32, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.4653, Train ACC@1: 0.0954, ACC@5: 0.2932
  Val   Loss: 4.5539,   Val ACC@1:   0.0836, ACC@5: 0.2648
  Val Macro F1: 0.0111, Weighted F1: 0.0357


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.2340, Train ACC@1: 0.4026, ACC@5: 0.6815
  Val   Loss: 3.4064,   Val ACC@1:   0.3693, ACC@5: 0.6376
  Val Macro F1: 0.1355, Weighted F1: 0.2748


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.3479, Train ACC@1: 0.5447, ACC@5: 0.8131
  Val   Loss: 2.5738,   Val ACC@1:   0.4965, ACC@5: 0.7544
  Val Macro F1: 0.2441, Weighted F1: 0.3899


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.8899, Train ACC@1: 0.6314, ACC@5: 0.8767
  Val   Loss: 2.1450,   Val ACC@1:   0.5662, ACC@5: 0.8293
  Val Macro F1: 0.3363, Weighted F1: 0.4853


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.6051, Train ACC@1: 0.6850, ACC@5: 0.9124
  Val   Loss: 1.8929,   Val ACC@1:   0.6202, ACC@5: 0.8537
  Val Macro F1: 0.3926, Weighted F1: 0.5511


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.8929426869448884, 'ACC@1': 0.6202090592334495, 'ACC@5': 0.8536585365853658, 'macro_f1': 0.39260838204536663}

Running experiment with {'dropout_rate': 0.0, 'lr': 2e-05, 'batch_size': 32, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.3713, Train ACC@1: 0.2183, ACC@5: 0.3939
  Val   Loss: 4.4505,   Val ACC@1:   0.2125, ACC@5: 0.3728
  Val Macro F1: 0.0464, Weighted F1: 0.1272


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.0045, Train ACC@1: 0.4553, ACC@5: 0.7399
  Val   Loss: 3.1536,   Val ACC@1:   0.4216, ACC@5: 0.7160
  Val Macro F1: 0.1776, Weighted F1: 0.3327


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.2698, Train ACC@1: 0.5856, ACC@5: 0.8401
  Val   Loss: 2.4709,   Val ACC@1:   0.5209, ACC@5: 0.8101
  Val Macro F1: 0.2843, Weighted F1: 0.4336


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.8316, Train ACC@1: 0.6444, ACC@5: 0.8885
  Val   Loss: 2.0898,   Val ACC@1:   0.5662, ACC@5: 0.8467
  Val Macro F1: 0.3393, Weighted F1: 0.4874


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.5671, Train ACC@1: 0.6802, ACC@5: 0.9155
  Val   Loss: 1.8689,   Val ACC@1:   0.6202, ACC@5: 0.8676
  Val Macro F1: 0.4035, Weighted F1: 0.5492


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.868913314068359, 'ACC@1': 0.6202090592334495, 'ACC@5': 0.867595818815331, 'macro_f1': 0.40348346871734725}

Running experiment with {'dropout_rate': 0.0, 'lr': 3e-05, 'batch_size': 16, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.5161, Train ACC@1: 0.3577, ACC@5: 0.5795
  Val   Loss: 3.6741,   Val ACC@1:   0.3345, ACC@5: 0.5418
  Val Macro F1: 0.0956, Weighted F1: 0.2156


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.9919, Train ACC@1: 0.6122, ACC@5: 0.8693
  Val   Loss: 2.2509,   Val ACC@1:   0.5505, ACC@5: 0.8240
  Val Macro F1: 0.3146, Weighted F1: 0.4673


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.4858, Train ACC@1: 0.6728, ACC@5: 0.9168
  Val   Loss: 1.7631,   Val ACC@1:   0.6254, ACC@5: 0.8711
  Val Macro F1: 0.3908, Weighted F1: 0.5544


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.2055, Train ACC@1: 0.7124, ACC@5: 0.9399
  Val   Loss: 1.5715,   Val ACC@1:   0.6446, ACC@5: 0.8798
  Val Macro F1: 0.4590, Weighted F1: 0.5999


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.0142, Train ACC@1: 0.7595, ACC@5: 0.9590
  Val   Loss: 1.4453,   Val ACC@1:   0.6707, ACC@5: 0.8955
  Val Macro F1: 0.4944, Weighted F1: 0.6326


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.4453261135347213, 'ACC@1': 0.6707317073170732, 'ACC@5': 0.8954703832752613, 'macro_f1': 0.49438308696901356}

Running experiment with {'dropout_rate': 0.0, 'lr': 3e-05, 'batch_size': 16, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.3448, Train ACC@1: 0.4131, ACC@5: 0.6641
  Val   Loss: 3.4764,   Val ACC@1:   0.3780, ACC@5: 0.6115
  Val Macro F1: 0.1401, Weighted F1: 0.2651


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.8763, Train ACC@1: 0.6192, ACC@5: 0.8741
  Val   Loss: 2.1129,   Val ACC@1:   0.5662, ACC@5: 0.8380
  Val Macro F1: 0.3305, Weighted F1: 0.4816


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.3971, Train ACC@1: 0.6950, ACC@5: 0.9216
  Val   Loss: 1.6870,   Val ACC@1:   0.6324, ACC@5: 0.8746
  Val Macro F1: 0.4412, Weighted F1: 0.5832


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.1473, Train ACC@1: 0.7438, ACC@5: 0.9442
  Val   Loss: 1.5153,   Val ACC@1:   0.6725, ACC@5: 0.8885
  Val Macro F1: 0.4841, Weighted F1: 0.6322


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 0.9599, Train ACC@1: 0.7725, ACC@5: 0.9630
  Val   Loss: 1.3637,   Val ACC@1:   0.6951, ACC@5: 0.9059
  Val Macro F1: 0.5092, Weighted F1: 0.6522


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.3637361680173707, 'ACC@1': 0.6951219512195121, 'ACC@5': 0.9059233449477352, 'macro_f1': 0.5092049996654855}

Running experiment with {'dropout_rate': 0.0, 'lr': 3e-05, 'batch_size': 32, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.1298, Train ACC@1: 0.2214, ACC@5: 0.4453
  Val   Loss: 4.2054,   Val ACC@1:   0.2038, ACC@5: 0.4233
  Val Macro F1: 0.0510, Weighted F1: 0.1297


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.5214, Train ACC@1: 0.5176, ACC@5: 0.8083
  Val   Loss: 2.7241,   Val ACC@1:   0.4739, ACC@5: 0.7334
  Val Macro F1: 0.2304, Weighted F1: 0.3709


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.8391, Train ACC@1: 0.6449, ACC@5: 0.8723
  Val   Loss: 2.1097,   Val ACC@1:   0.5627, ACC@5: 0.8293
  Val Macro F1: 0.3290, Weighted F1: 0.4801


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4781, Train ACC@1: 0.6797, ACC@5: 0.9176
  Val   Loss: 1.7964,   Val ACC@1:   0.6132, ACC@5: 0.8641
  Val Macro F1: 0.3947, Weighted F1: 0.5528


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2721, Train ACC@1: 0.7168, ACC@5: 0.9333
  Val   Loss: 1.6199,   Val ACC@1:   0.6446, ACC@5: 0.8711
  Val Macro F1: 0.4242, Weighted F1: 0.5873


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.6198505931615, 'ACC@1': 0.6445993031358885, 'ACC@5': 0.8710801393728222, 'macro_f1': 0.42421351492242204}

Running experiment with {'dropout_rate': 0.0, 'lr': 3e-05, 'batch_size': 32, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.3371, Train ACC@1: 0.1669, ACC@5: 0.3895
  Val   Loss: 4.4333,   Val ACC@1:   0.1463, ACC@5: 0.3746
  Val Macro F1: 0.0380, Weighted F1: 0.0953


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.6272, Train ACC@1: 0.5098, ACC@5: 0.7939
  Val   Loss: 2.8532,   Val ACC@1:   0.4477, ACC@5: 0.7491
  Val Macro F1: 0.2020, Weighted F1: 0.3436


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.8403, Train ACC@1: 0.6575, ACC@5: 0.8837
  Val   Loss: 2.0779,   Val ACC@1:   0.6167, ACC@5: 0.8415
  Val Macro F1: 0.3962, Weighted F1: 0.5374


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4710, Train ACC@1: 0.6867, ACC@5: 0.9163
  Val   Loss: 1.7565,   Val ACC@1:   0.6411, ACC@5: 0.8693
  Val Macro F1: 0.4252, Weighted F1: 0.5811


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2264, Train ACC@1: 0.7285, ACC@5: 0.9355
  Val   Loss: 1.5530,   Val ACC@1:   0.6603, ACC@5: 0.8955
  Val Macro F1: 0.4445, Weighted F1: 0.6079


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.5529568062426737, 'ACC@1': 0.6602787456445993, 'ACC@5': 0.8954703832752613, 'macro_f1': 0.44451867631349523}

Running experiment with {'dropout_rate': 0.1, 'lr': 2e-05, 'batch_size': 16, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.8624, Train ACC@1: 0.3150, ACC@5: 0.5464
  Val   Loss: 3.9807,   Val ACC@1:   0.2770, ACC@5: 0.4878
  Val Macro F1: 0.0664, Weighted F1: 0.1623


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.3712, Train ACC@1: 0.5516, ACC@5: 0.8183
  Val   Loss: 2.5827,   Val ACC@1:   0.4895, ACC@5: 0.7735
  Val Macro F1: 0.2534, Weighted F1: 0.3950


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.8035, Train ACC@1: 0.6353, ACC@5: 0.8819
  Val   Loss: 2.0637,   Val ACC@1:   0.5662, ACC@5: 0.8240
  Val Macro F1: 0.3273, Weighted F1: 0.4816


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.5003, Train ACC@1: 0.6828, ACC@5: 0.9120
  Val   Loss: 1.7909,   Val ACC@1:   0.6289, ACC@5: 0.8502
  Val Macro F1: 0.4100, Weighted F1: 0.5607


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.3144, Train ACC@1: 0.6928, ACC@5: 0.9325
  Val   Loss: 1.6616,   Val ACC@1:   0.6202, ACC@5: 0.8659
  Val Macro F1: 0.4047, Weighted F1: 0.5606


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.661585557336176, 'ACC@1': 0.6202090592334495, 'ACC@5': 0.8658536585365854, 'macro_f1': 0.4046546650760556}

Running experiment with {'dropout_rate': 0.1, 'lr': 2e-05, 'batch_size': 16, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.1012, Train ACC@1: 0.2553, ACC@5: 0.4575
  Val   Loss: 4.1942,   Val ACC@1:   0.2352, ACC@5: 0.4303
  Val Macro F1: 0.0544, Weighted F1: 0.1501


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.5158, Train ACC@1: 0.5381, ACC@5: 0.8013
  Val   Loss: 2.7205,   Val ACC@1:   0.4756, ACC@5: 0.7544
  Val Macro F1: 0.2531, Weighted F1: 0.3825


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.8563, Train ACC@1: 0.6192, ACC@5: 0.8745
  Val   Loss: 2.1038,   Val ACC@1:   0.5714, ACC@5: 0.8415
  Val Macro F1: 0.3373, Weighted F1: 0.4796


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.5067, Train ACC@1: 0.6885, ACC@5: 0.9115
  Val   Loss: 1.7829,   Val ACC@1:   0.6289, ACC@5: 0.8676
  Val Macro F1: 0.4090, Weighted F1: 0.5559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2978, Train ACC@1: 0.7185, ACC@5: 0.9342
  Val   Loss: 1.6194,   Val ACC@1:   0.6498, ACC@5: 0.8833
  Val Macro F1: 0.4339, Weighted F1: 0.5876


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.6193975762623112, 'ACC@1': 0.6498257839721254, 'ACC@5': 0.8832752613240418, 'macro_f1': 0.43388826220590393}

Running experiment with {'dropout_rate': 0.1, 'lr': 2e-05, 'batch_size': 32, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.6218, Train ACC@1: 0.1303, ACC@5: 0.2754
  Val   Loss: 4.6705,   Val ACC@1:   0.1045, ACC@5: 0.2666
  Val Macro F1: 0.0129, Weighted F1: 0.0393


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.2682, Train ACC@1: 0.3908, ACC@5: 0.6135
  Val   Loss: 3.4323,   Val ACC@1:   0.3624, ACC@5: 0.5714
  Val Macro F1: 0.1136, Weighted F1: 0.2514


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.3993, Train ACC@1: 0.5438, ACC@5: 0.8309
  Val   Loss: 2.6256,   Val ACC@1:   0.4756, ACC@5: 0.7840
  Val Macro F1: 0.2454, Weighted F1: 0.3853


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.9525, Train ACC@1: 0.6017, ACC@5: 0.8758
  Val   Loss: 2.2056,   Val ACC@1:   0.5331, ACC@5: 0.8345
  Val Macro F1: 0.2995, Weighted F1: 0.4408


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.6569, Train ACC@1: 0.6492, ACC@5: 0.8972
  Val   Loss: 1.9200,   Val ACC@1:   0.5923, ACC@5: 0.8537
  Val Macro F1: 0.3760, Weighted F1: 0.5199


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.920020979871318, 'ACC@1': 0.5923344947735192, 'ACC@5': 0.8536585365853658, 'macro_f1': 0.37595338804240236}

Running experiment with {'dropout_rate': 0.1, 'lr': 2e-05, 'batch_size': 32, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.3532, Train ACC@1: 0.2092, ACC@5: 0.4301
  Val   Loss: 4.4335,   Val ACC@1:   0.1847, ACC@5: 0.3868
  Val Macro F1: 0.0403, Weighted F1: 0.1140


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.0146, Train ACC@1: 0.4566, ACC@5: 0.7277
  Val   Loss: 3.1517,   Val ACC@1:   0.4286, ACC@5: 0.6847
  Val Macro F1: 0.1692, Weighted F1: 0.3186


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.3005, Train ACC@1: 0.5647, ACC@5: 0.8270
  Val   Loss: 2.4954,   Val ACC@1:   0.5174, ACC@5: 0.7892
  Val Macro F1: 0.2800, Weighted F1: 0.4229


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.9136, Train ACC@1: 0.6083, ACC@5: 0.8745
  Val   Loss: 2.1481,   Val ACC@1:   0.5523, ACC@5: 0.8275
  Val Macro F1: 0.3265, Weighted F1: 0.4694


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.6398, Train ACC@1: 0.6610, ACC@5: 0.9107
  Val   Loss: 1.9021,   Val ACC@1:   0.5958, ACC@5: 0.8676
  Val Macro F1: 0.3713, Weighted F1: 0.5184


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.902134543927289, 'ACC@1': 0.5958188153310104, 'ACC@5': 0.867595818815331, 'macro_f1': 0.37130594043981}

Running experiment with {'dropout_rate': 0.1, 'lr': 3e-05, 'batch_size': 16, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.3926, Train ACC@1: 0.3847, ACC@5: 0.6209
  Val   Loss: 3.5102,   Val ACC@1:   0.3693, ACC@5: 0.5749
  Val Macro F1: 0.1269, Weighted F1: 0.2645


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.1137, Train ACC@1: 0.5756, ACC@5: 0.8410
  Val   Loss: 2.3462,   Val ACC@1:   0.5383, ACC@5: 0.7875
  Val Macro F1: 0.2839, Weighted F1: 0.4428


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.5962, Train ACC@1: 0.6501, ACC@5: 0.9085
  Val   Loss: 1.8793,   Val ACC@1:   0.6028, ACC@5: 0.8519
  Val Macro F1: 0.3553, Weighted F1: 0.5233


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.3044, Train ACC@1: 0.6959, ACC@5: 0.9316
  Val   Loss: 1.6357,   Val ACC@1:   0.6324, ACC@5: 0.8798
  Val Macro F1: 0.4210, Weighted F1: 0.5747


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.1085, Train ACC@1: 0.7268, ACC@5: 0.9508
  Val   Loss: 1.5048,   Val ACC@1:   0.6533, ACC@5: 0.8937
  Val Macro F1: 0.4555, Weighted F1: 0.6041


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.5047642524234093, 'ACC@1': 0.6533101045296167, 'ACC@5': 0.8937282229965157, 'macro_f1': 0.45548453599769495}

Running experiment with {'dropout_rate': 0.1, 'lr': 3e-05, 'batch_size': 16, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.3059, Train ACC@1: 0.3791, ACC@5: 0.6174
  Val   Loss: 3.4586,   Val ACC@1:   0.3258, ACC@5: 0.5662
  Val Macro F1: 0.1078, Weighted F1: 0.2175


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.0245, Train ACC@1: 0.6100, ACC@5: 0.8562
  Val   Loss: 2.2565,   Val ACC@1:   0.5575, ACC@5: 0.8084
  Val Macro F1: 0.3135, Weighted F1: 0.4703


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.4858, Train ACC@1: 0.6876, ACC@5: 0.9102
  Val   Loss: 1.7684,   Val ACC@1:   0.6324, ACC@5: 0.8606
  Val Macro F1: 0.4127, Weighted F1: 0.5697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.2226, Train ACC@1: 0.7137, ACC@5: 0.9403
  Val   Loss: 1.5673,   Val ACC@1:   0.6551, ACC@5: 0.8850
  Val Macro F1: 0.4556, Weighted F1: 0.5993


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.0105, Train ACC@1: 0.7590, ACC@5: 0.9547
  Val   Loss: 1.4078,   Val ACC@1:   0.6794, ACC@5: 0.8972
  Val Macro F1: 0.4689, Weighted F1: 0.6256


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.4077703578962266, 'ACC@1': 0.6794425087108014, 'ACC@5': 0.8972125435540069, 'macro_f1': 0.4689140509541902}

Running experiment with {'dropout_rate': 0.1, 'lr': 3e-05, 'batch_size': 32, 'max_length': 64}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.4313, Train ACC@1: 0.1595, ACC@5: 0.3307
  Val   Loss: 4.4945,   Val ACC@1:   0.1516, ACC@5: 0.3118
  Val Macro F1: 0.0263, Weighted F1: 0.0756


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.8019, Train ACC@1: 0.4680, ACC@5: 0.7564
  Val   Loss: 2.9699,   Val ACC@1:   0.4425, ACC@5: 0.7160
  Val Macro F1: 0.1950, Weighted F1: 0.3365


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.9997, Train ACC@1: 0.6092, ACC@5: 0.8575
  Val   Loss: 2.2166,   Val ACC@1:   0.5470, ACC@5: 0.8240
  Val Macro F1: 0.3180, Weighted F1: 0.4642


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.6096, Train ACC@1: 0.6680, ACC@5: 0.9124
  Val   Loss: 1.8734,   Val ACC@1:   0.6150, ACC@5: 0.8519
  Val Macro F1: 0.3784, Weighted F1: 0.5457


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.3662, Train ACC@1: 0.7054, ACC@5: 0.9281
  Val   Loss: 1.6721,   Val ACC@1:   0.6585, ACC@5: 0.8624
  Val Macro F1: 0.4260, Weighted F1: 0.5980


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Metrics => {'final_loss': 1.6721406381720034, 'ACC@1': 0.6585365853658537, 'ACC@5': 0.8623693379790941, 'macro_f1': 0.4259966161415615}

Running experiment with {'dropout_rate': 0.1, 'lr': 3e-05, 'batch_size': 32, 'max_length': 128}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.7200, Train ACC@1: 0.3281, ACC@5: 0.5813
  Val   Loss: 3.8339,   Val ACC@1:   0.2805, ACC@5: 0.5331
  Val Macro F1: 0.0923, Weighted F1: 0.1863


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 2.3841, Train ACC@1: 0.5603, ACC@5: 0.8292
  Val   Loss: 2.5763,   Val ACC@1:   0.5087, ACC@5: 0.7997
  Val Macro F1: 0.2628, Weighted F1: 0.4097


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.8030, Train ACC@1: 0.6410, ACC@5: 0.8911
  Val   Loss: 2.0229,   Val ACC@1:   0.5923, ACC@5: 0.8537
  Val Macro F1: 0.3591, Weighted F1: 0.5122


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4715, Train ACC@1: 0.6893, ACC@5: 0.9237
  Val   Loss: 1.7448,   Val ACC@1:   0.6359, ACC@5: 0.8728
  Val Macro F1: 0.4268, Weighted F1: 0.5722


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.2856, Train ACC@1: 0.7120, ACC@5: 0.9359
  Val   Loss: 1.6038,   Val ACC@1:   0.6376, ACC@5: 0.8763
  Val Macro F1: 0.4429, Weighted F1: 0.5875
Metrics => {'final_loss': 1.603756724749708, 'ACC@1': 0.6376306620209059, 'ACC@5': 0.8763066202090593, 'macro_f1': 0.44286839678279816}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
best = max(results, key=lambda x: x["ACC@1"])  # or "macro_f1", etc.
print("Best config by ACC@1:", best)


Best config by ACC@1: {'dropout_rate': 0.0, 'lr': 3e-05, 'batch_size': 16, 'max_length': 128, 'final_loss': 1.3637361680173707, 'ACC@1': 0.6951219512195121, 'ACC@5': 0.9059233449477352, 'macro_f1': 0.5092049996654855}


# Using best parameters for 201 users


In [55]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 201)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

train_df, val_df = train_test_split(filtered_df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)


Number of selected users: 201


In [56]:
# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=best_max_length)

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)


In [57]:
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        # DistilBERT's last_hidden_state: (batch_size, seq_len, hidden_size)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits


In [58]:
# Build the model 
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TrajectoryDistilBERT(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Li

In [59]:
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()




In [60]:
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.1399, Train ACC@1: 0.3669, ACC@5: 0.6675
  Val   Loss: 3.2669,   Val ACC@1:   0.3519, ACC@5: 0.6394
  Val Macro F1: 0.1179, Weighted F1: 0.2472


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.9786, Train ACC@1: 0.6113, ACC@5: 0.8588
  Val   Loss: 2.2305,   Val ACC@1:   0.5505, ACC@5: 0.8171
  Val Macro F1: 0.3110, Weighted F1: 0.4667


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.5073, Train ACC@1: 0.6601, ACC@5: 0.9224
  Val   Loss: 1.7811,   Val ACC@1:   0.6080, ACC@5: 0.8728
  Val Macro F1: 0.3866, Weighted F1: 0.5314


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.2188, Train ACC@1: 0.7251, ACC@5: 0.9416
  Val   Loss: 1.5463,   Val ACC@1:   0.6638, ACC@5: 0.8798
  Val Macro F1: 0.4534, Weighted F1: 0.6068


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.0352, Train ACC@1: 0.7521, ACC@5: 0.9569
  Val   Loss: 1.4315,   Val ACC@1:   0.6760, ACC@5: 0.8885
  Val Macro F1: 0.4826, Weighted F1: 0.6230


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 0.8736, Train ACC@1: 0.7987, ACC@5: 0.9660
  Val   Loss: 1.3470,   Val ACC@1:   0.6864, ACC@5: 0.8990
  Val Macro F1: 0.5011, Weighted F1: 0.6445


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 0.7680, Train ACC@1: 0.8096, ACC@5: 0.9734
  Val   Loss: 1.3138,   Val ACC@1:   0.6934, ACC@5: 0.9059
  Val Macro F1: 0.5165, Weighted F1: 0.6561


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 0.6725, Train ACC@1: 0.8331, ACC@5: 0.9804
  Val   Loss: 1.2796,   Val ACC@1:   0.6934, ACC@5: 0.9059
  Val Macro F1: 0.5271, Weighted F1: 0.6640


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 0.5543, Train ACC@1: 0.8667, ACC@5: 0.9847
  Val   Loss: 1.2026,   Val ACC@1:   0.7160, ACC@5: 0.9059
  Val Macro F1: 0.5453, Weighted F1: 0.6839


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 0.4668, Train ACC@1: 0.8924, ACC@5: 0.9887
  Val   Loss: 1.2117,   Val ACC@1:   0.7160, ACC@5: 0.9077
  Val Macro F1: 0.5543, Weighted F1: 0.6908


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# WITH TEST SET

In [17]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 291)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(filtered_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Reset indices for all sets
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

# Create datasets
train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=best_max_length)
test_dataset = SpatioTemporalGowallaDataset(test_df, tokenizer, max_length=best_max_length)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

def evaluate_test_set(model, loader, device, loss_fn):
    model.eval()
    total_loss = 0.0
    all_preds, all_labels = [], []
    acc_metrics = {"ACC@1": 0, "ACC@5": 0}
    total_samples = 0

    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            batch_size = labels.size(0)
            total_loss += loss.item() * batch_size

            batch_metrics = compute_metrics(outputs, labels)
            acc_metrics["ACC@1"] += batch_metrics["ACC@1"] * batch_size
            acc_metrics["ACC@5"] += batch_metrics["ACC@5"] * batch_size

            all_preds.extend(batch_metrics["top1_preds"].cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_samples += batch_size

    # Compute averages
    avg_loss = total_loss / total_samples
    acc_metrics["ACC@1"] /= total_samples
    acc_metrics["ACC@5"] /= total_samples

    # Generate classification report
    cls_metrics = classification_report(all_labels, all_preds, output_dict=True)
    
    print(f"Test Loss: {avg_loss:.4f}")
    print(f"ACC@1: {acc_metrics['ACC@1']:.4f}")
    print(f"ACC@5: {acc_metrics['ACC@5']:.4f}")
    
    return avg_loss, acc_metrics, cls_metrics



# Build the model
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
model.to(device)

# Optimizer, loss function, and training
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()

# Train the model
epochs = 10
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)

# Evaluate the test set
print("Evaluating the model on the test set...")
evaluate_test_set(model, test_loader, device, loss_fn)


Number of selected users: 291


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 4.4952, Train ACC@1: 0.1168, ACC@5: 0.3108
  Val   Loss: 4.5678,   Val ACC@1:   0.1179, ACC@5: 0.3134
  Val Macro F1: 0.0197, Weighted F1: 0.0469


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.5636, Train ACC@1: 0.2367, ACC@5: 0.5471
  Val   Loss: 3.6956,   Val ACC@1:   0.2213, ACC@5: 0.5331
  Val Macro F1: 0.0534, Weighted F1: 0.1158


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.8851, Train ACC@1: 0.3777, ACC@5: 0.7121
  Val   Loss: 3.0626,   Val ACC@1:   0.3586, ACC@5: 0.6656
  Val Macro F1: 0.1485, Weighted F1: 0.2454


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 2.4419, Train ACC@1: 0.4501, ACC@5: 0.7741
  Val   Loss: 2.6815,   Val ACC@1:   0.4023, ACC@5: 0.7383
  Val Macro F1: 0.1844, Weighted F1: 0.2880


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 2.1633, Train ACC@1: 0.4747, ACC@5: 0.8136
  Val   Loss: 2.4669,   Val ACC@1:   0.4459, ACC@5: 0.7674
  Val Macro F1: 0.2498, Weighted F1: 0.3469


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 1.9894, Train ACC@1: 0.4969, ACC@5: 0.8354
  Val   Loss: 2.3324,   Val ACC@1:   0.4459, ACC@5: 0.7868
  Val Macro F1: 0.2641, Weighted F1: 0.3654


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 1.8652, Train ACC@1: 0.5104, ACC@5: 0.8486
  Val   Loss: 2.2291,   Val ACC@1:   0.4733, ACC@5: 0.7916
  Val Macro F1: 0.2851, Weighted F1: 0.3771


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 1.7686, Train ACC@1: 0.5246, ACC@5: 0.8649
  Val   Loss: 2.1597,   Val ACC@1:   0.4863, ACC@5: 0.8013
  Val Macro F1: 0.3030, Weighted F1: 0.4068


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 1.6590, Train ACC@1: 0.5461, ACC@5: 0.8732
  Val   Loss: 2.0984,   Val ACC@1:   0.4847, ACC@5: 0.8094
  Val Macro F1: 0.3109, Weighted F1: 0.4121


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 1.5737, Train ACC@1: 0.5662, ACC@5: 0.8846
  Val   Loss: 2.0550,   Val ACC@1:   0.5024, ACC@5: 0.8126
  Val Macro F1: 0.3187, Weighted F1: 0.4280
Evaluating the model on the test set...
Test Loss: 2.0699
ACC@1: 0.4976
ACC@5: 0.8174


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


(2.0699333462846106,
 {'ACC@1': 0.4975767366720517, 'ACC@5': 0.8174474959612278},
 {'1': {'precision': 0.45,
   'recall': 0.6923076923076923,
   'f1-score': 0.5454545454545455,
   'support': 13},
  '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2},
  '3': {'precision': 0.5,
   'recall': 1.0,
   'f1-score': 0.6666666666666666,
   'support': 3},
  '4': {'precision': 0.25,
   'recall': 0.2,
   'f1-score': 0.22222222222222224,
   'support': 5},
  '5': {'precision': 0.5555555555555556,
   'recall': 0.4166666666666667,
   'f1-score': 0.4761904761904762,
   'support': 12},
  '6': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2},
  '7': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12},
  '9': {'precision': 0.38095238095238093,
   'recall': 0.5714285714285714,
   'f1-score': 0.4571428571428571,
   'support': 14},
  '10': {'precision': 1.0,
   'recall': 0.5,
   'f1-score': 0.6666666666666666,
   'support': 2},
  '11': {'precision': 0.0, 'recal

# Using best parameters for 112 users

In [15]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 112)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(filtered_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Reset indices for all sets
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)



Number of selected users: 112


In [16]:
# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=best_max_length)
test_dataset = SpatioTemporalGowallaDataset(test_df, tokenizer, max_length=best_max_length)


train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)



In [None]:
def evaluate_test_set(model, test_loader, device):
    model.eval()
    total_loss, total_correct = 0, 0
    all_predictions, all_labels = [], []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs, labels)

            total_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)
            total_correct += (predictions == labels).sum().item()

            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = total_correct / len(test_loader.dataset)
    print(f"Test Loss: {total_loss / len(test_loader):.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")
    print(classification_report(all_labels, all_predictions))

# Evaluate the test set
evaluate_test_set(model, test_loader, device)


In [51]:
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        # DistilBERT's last_hidden_state: (batch_size, seq_len, hidden_size)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits


In [52]:
# Build the model
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TrajectoryDistilBERT(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Li

In [53]:
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()




In [54]:
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.4679, Train ACC@1: 0.3078, ACC@5: 0.5286
  Val   Loss: 3.5049,   Val ACC@1:   0.3368, ACC@5: 0.5193
  Val Macro F1: 0.0773, Weighted F1: 0.2083


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.8978, Train ACC@1: 0.6183, ACC@5: 0.8742
  Val   Loss: 2.0399,   Val ACC@1:   0.6070, ACC@5: 0.8246
  Val Macro F1: 0.3052, Weighted F1: 0.5294


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.2832, Train ACC@1: 0.7397, ACC@5: 0.9314
  Val   Loss: 1.5189,   Val ACC@1:   0.6772, ACC@5: 0.8807
  Val Macro F1: 0.4217, Weighted F1: 0.6053


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.0326, Train ACC@1: 0.7608, ACC@5: 0.9551
  Val   Loss: 1.3514,   Val ACC@1:   0.6772, ACC@5: 0.9053
  Val Macro F1: 0.4564, Weighted F1: 0.6134


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 0.8804, Train ACC@1: 0.7810, ACC@5: 0.9727
  Val   Loss: 1.2025,   Val ACC@1:   0.7018, ACC@5: 0.9263
  Val Macro F1: 0.4797, Weighted F1: 0.6372


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 0.7603, Train ACC@1: 0.8127, ACC@5: 0.9789
  Val   Loss: 1.1295,   Val ACC@1:   0.7263, ACC@5: 0.9053
  Val Macro F1: 0.5324, Weighted F1: 0.6798


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 0.6796, Train ACC@1: 0.8250, ACC@5: 0.9850
  Val   Loss: 1.1148,   Val ACC@1:   0.7228, ACC@5: 0.9298
  Val Macro F1: 0.5345, Weighted F1: 0.6693


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 0.5807, Train ACC@1: 0.8505, ACC@5: 0.9903
  Val   Loss: 1.0586,   Val ACC@1:   0.7298, ACC@5: 0.9298
  Val Macro F1: 0.5504, Weighted F1: 0.6851


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 0.5440, Train ACC@1: 0.8531, ACC@5: 0.9894
  Val   Loss: 1.0333,   Val ACC@1:   0.7228, ACC@5: 0.9368
  Val Macro F1: 0.5455, Weighted F1: 0.6802


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 0.4714, Train ACC@1: 0.8760, ACC@5: 0.9921
  Val   Loss: 1.0010,   Val ACC@1:   0.7404, ACC@5: 0.9509
  Val Macro F1: 0.5813, Weighted F1: 0.6931


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# WITH TEST SET

In [18]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 112)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(filtered_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Reset indices for all sets
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

# Create datasets
train_dataset = SpatioTemporalGowallaDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalGowallaDataset(val_df, tokenizer, max_length=best_max_length)
test_dataset = SpatioTemporalGowallaDataset(test_df, tokenizer, max_length=best_max_length)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

def evaluate_test_set(model, loader, device, loss_fn):
    model.eval()
    total_loss = 0.0
    all_preds, all_labels = [], []
    acc_metrics = {"ACC@1": 0, "ACC@5": 0}
    total_samples = 0

    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            batch_size = labels.size(0)
            total_loss += loss.item() * batch_size

            batch_metrics = compute_metrics(outputs, labels)
            acc_metrics["ACC@1"] += batch_metrics["ACC@1"] * batch_size
            acc_metrics["ACC@5"] += batch_metrics["ACC@5"] * batch_size

            all_preds.extend(batch_metrics["top1_preds"].cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_samples += batch_size

    # Compute averages
    avg_loss = total_loss / total_samples
    acc_metrics["ACC@1"] /= total_samples
    acc_metrics["ACC@5"] /= total_samples

    # Generate classification report
    cls_metrics = classification_report(all_labels, all_preds, output_dict=True)
    
    print(f"Test Loss: {avg_loss:.4f}")
    print(f"ACC@1: {acc_metrics['ACC@1']:.4f}")
    print(f"ACC@5: {acc_metrics['ACC@5']:.4f}")
    
    return avg_loss, acc_metrics, cls_metrics

# Build the model
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
model.to(device)

# Optimizer, loss function, and training
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()

# Train the model
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)

# Evaluate the test set
print("Evaluating the model on the test set...")
evaluate_test_set(model, test_loader, device, loss_fn)


Number of selected users: 112


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.9666, Train ACC@1: 0.1286, ACC@5: 0.3920
  Val   Loss: 4.0299,   Val ACC@1:   0.1080, ACC@5: 0.3662
  Val Macro F1: 0.0089, Weighted F1: 0.0234


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 3.0880, Train ACC@1: 0.2704, ACC@5: 0.6472
  Val   Loss: 3.1399,   Val ACC@1:   0.2488, ACC@5: 0.6479
  Val Macro F1: 0.0697, Weighted F1: 0.1212


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 2.4988, Train ACC@1: 0.3889, ACC@5: 0.7668
  Val   Loss: 2.5924,   Val ACC@1:   0.3662, ACC@5: 0.7230
  Val Macro F1: 0.1356, Weighted F1: 0.2577


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 2.0175, Train ACC@1: 0.5005, ACC@5: 0.8402
  Val   Loss: 2.1495,   Val ACC@1:   0.4648, ACC@5: 0.8357
  Val Macro F1: 0.2292, Weighted F1: 0.3604


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.6897, Train ACC@1: 0.5910, ACC@5: 0.8553
  Val   Loss: 1.8632,   Val ACC@1:   0.5634, ACC@5: 0.8357
  Val Macro F1: 0.3234, Weighted F1: 0.4633


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 1.5097, Train ACC@1: 0.6080, ACC@5: 0.9015
  Val   Loss: 1.7055,   Val ACC@1:   0.5822, ACC@5: 0.8545
  Val Macro F1: 0.3177, Weighted F1: 0.4791


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 1.3473, Train ACC@1: 0.6432, ACC@5: 0.9226
  Val   Loss: 1.5532,   Val ACC@1:   0.5915, ACC@5: 0.8826
  Val Macro F1: 0.3598, Weighted F1: 0.5029


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 1.2155, Train ACC@1: 0.6734, ACC@5: 0.9367
  Val   Loss: 1.4745,   Val ACC@1:   0.6056, ACC@5: 0.8967
  Val Macro F1: 0.3777, Weighted F1: 0.5156


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 1.1317, Train ACC@1: 0.6814, ACC@5: 0.9487
  Val   Loss: 1.4359,   Val ACC@1:   0.6056, ACC@5: 0.8967
  Val Macro F1: 0.3767, Weighted F1: 0.5150


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 1.0609, Train ACC@1: 0.6884, ACC@5: 0.9608
  Val   Loss: 1.3931,   Val ACC@1:   0.5869, ACC@5: 0.9014
  Val Macro F1: 0.3509, Weighted F1: 0.5072
Evaluating the model on the test set...
Test Loss: 1.6244
ACC@1: 0.6262
ACC@5: 0.8411


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


(1.6243614481988353,
 {'ACC@1': 0.6261682242990654, 'ACC@5': 0.8411214953271028},
 {'1': {'precision': 0.8,
   'recall': 1.0,
   'f1-score': 0.888888888888889,
   'support': 8},
  '3': {'precision': 0.6666666666666666,
   'recall': 0.8571428571428571,
   'f1-score': 0.75,
   'support': 7},
  '4': {'precision': 0.125,
   'recall': 0.3333333333333333,
   'f1-score': 0.18181818181818182,
   'support': 3},
  '7': {'precision': 0.6666666666666666,
   'recall': 1.0,
   'f1-score': 0.8,
   'support': 4},
  '8': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1},
  '10': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1},
  '11': {'precision': 0.6666666666666666,
   'recall': 1.0,
   'f1-score': 0.8,
   'support': 2},
  '12': {'precision': 1.0,
   'recall': 0.75,
   'f1-score': 0.8571428571428571,
   'support': 8},
  '13': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1},
  '14': {'precision': 0.4,
   'recall': 1.0,
   'f1-score': 0.5714285714285715,