In [1]:
!pip install transformers tqdm scikit-learn  

import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from transformers import DistilBertTokenizer, DistilBertModel, AdamW
from tqdm import tqdm




In [3]:
checkins_file = "/kaggle/input/4square/dataset_TIST2015_Checkins.txt"
df_tist = pd.read_csv(
    checkins_file,
    sep="\s+",
    header=None,  # no header in the file
    names=[
        "user_id",
        "venue_id",
        "dow",
        "month",
        "day",
        "clock",
        "tz_sign",   # or "year" if your order differs
        "year",
        "offset_minutes",
    ],
)
print(df_tist.head(5))


   user_id                  venue_id  dow month  day     clock  tz_sign  \
0    50756  4f5e3a72e4b053fd6a4313f6  Tue   Apr  3.0  18:00:06      0.0   
1   190571  4b4b87b5f964a5204a9f26e3  Tue   Apr  3.0  18:00:07      0.0   
2   221021  4a85b1b3f964a520eefe1fe3  Tue   Apr  3.0  18:00:08      0.0   
3    66981  4b4606f2f964a520751426e3  Tue   Apr  3.0  18:00:08      0.0   
4    21010  4c2b4e8a9a559c74832f0de2  Tue   Apr  3.0  18:00:09      0.0   

     year  offset_minutes  
0  2012.0           240.0  
1  2012.0           180.0  
2  2012.0          -240.0  
3  2012.0          -300.0  
4  2012.0           240.0  


In [5]:
pois_file = "/kaggle/input/4square/dataset_TIST2015_POIs.txt"
df_pois = pd.read_csv(
    pois_file,
    sep="\t",
    header=None,
    names=["venue_id","latitude","longitude","category","country"]
)
print(df_pois.head(5))


                   venue_id   latitude  longitude           category country
0  3fd66200f964a52000e71ee3  40.733596 -74.003139          Jazz Club      US
1  3fd66200f964a52000e81ee3  40.758102 -73.975734                Gym      US
2  3fd66200f964a52000ea1ee3  40.732456 -74.003755  Indian Restaurant      US
3  3fd66200f964a52000ec1ee3  42.345907 -71.087001  Indian Restaurant      US
4  3fd66200f964a52000ee1ee3  39.933178 -75.159262     Sandwich Place      US


In [6]:
df_merged = pd.merge(df_tist, df_pois, on="venue_id", how="inner")
print(df_merged.head(5))


   user_id                  venue_id  dow month  day     clock  tz_sign  \
0    50756  4f5e3a72e4b053fd6a4313f6  Tue   Apr  3.0  18:00:06      0.0   
1   190571  4b4b87b5f964a5204a9f26e3  Tue   Apr  3.0  18:00:07      0.0   
2   212251  4b4b87b5f964a5204a9f26e3  Wed   Apr  4.0  05:49:28      0.0   
3   227763  4b4b87b5f964a5204a9f26e3  Wed   Apr  4.0  12:10:31      0.0   
4   212251  4b4b87b5f964a5204a9f26e3  Wed   Apr  4.0  12:53:32      0.0   

     year  offset_minutes   latitude  longitude              category country  
0  2012.0           240.0  55.696132  37.557842           Comedy Club      RU  
1  2012.0           180.0  41.029717  28.974420  Other Great Outdoors      TR  
2  2012.0           180.0  41.029717  28.974420  Other Great Outdoors      TR  
3  2012.0           180.0  41.029717  28.974420  Other Great Outdoors      TR  
4  2012.0           180.0  41.029717  28.974420  Other Great Outdoors      TR  


In [None]:
print(df_merged.head(10))

In [None]:
def parse_tist_datetime(row):
    """
    Convert columns (dow, month, day, clock, year, offset_minutes)
    into a single datetime string like:
       "Tue Apr 03 18:00:06 +0400 2012"
    and parse with format="%a %b %d %H:%M:%S %z %Y".
    """
    # 1. Convert offset_minutes -> +HHMM or -HHMM
    offset = row.get("offset_minutes", 0.0)
    if pd.isna(offset):
        offset = 0.0
    offset = int(offset)  # e.g. 240 -> +4 hours
    hours = abs(offset) // 60
    mins = abs(offset) % 60
    sign = "+" if offset >= 0 else "-"
    offset_str = f"{sign}{hours:02d}{mins:02d}"  # e.g. "+0400" or "-0530"

    # 2. Convert day (float) -> int -> string
    day_val = row.get("day", 1.0)
    if pd.isna(day_val):
        day_val = 1.0
    day_val = int(day_val)  # e.g. 3.0 -> 3

    # 3. Convert year (float) -> int -> string
    year_val = row.get("year", 2012.0)
    if pd.isna(year_val):
        year_val = 2012.0
    year_val = int(year_val)  # e.g. 2012.0 -> 2012

    # 4. Build the final string
    #    Example: "Tue Apr 3 18:00:06 +0400 2012"
    dow_str = str(row.get("dow", "Mon"))        # "Tue"
    mon_str = str(row.get("month", "Jan"))      # "Apr"
    clk_str = str(row.get("clock", "00:00:00")) # "18:00:06"

    raw_str = f"{dow_str} {mon_str} {day_val} {clk_str} {offset_str} {year_val}"

    # 5. Parse using the matching format
    dt = pd.to_datetime(raw_str, format="%a %b %d %H:%M:%S %z %Y", errors="coerce")
    return dt


In [None]:
print(df_merged.columns)


In [None]:
def make_lat_lon_buckets(df, lat_multiplier=10, lon_multiplier=10):
    # 1. Convert lat/lon to numeric
    df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
    df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")

    # 2. Drop rows with missing or inf lat/lon
    df = df.dropna(subset=["latitude", "longitude"])
    df = df[~df["latitude"].isin([np.inf, -np.inf])]
    df = df[~df["longitude"].isin([np.inf, -np.inf])]

    # 3. Create lat/lon buckets
    df["lat_bucket"] = (df["latitude"] * lat_multiplier).astype(int)
    df["lon_bucket"] = (df["longitude"] * lon_multiplier).astype(int)
    return df

df_merged = make_lat_lon_buckets(df_merged)


In [None]:
df_merged['check_in_time'] = pd.to_datetime(df_merged['check_in_time'], utc=True)



In [None]:
df_merged['hour_of_day'] = df_merged['check_in_time'].dt.hour
df_merged['check_in_time'] = df_merged['check_in_time'].dt.tz_convert('America/New_York')
df_merged['hour_of_day'] = df_merged['check_in_time'].dt.hour

In [None]:
def fix_and_parse_datetime(df):
    df["check_in_time"] = df.apply(parse_tist_datetime, axis=1)
    df = df.dropna(subset=["check_in_time"]).copy()
    return df

df_merged = fix_and_parse_datetime(df_merged)
print(df_merged[["check_in_time", "dow", "month", "day", "clock", "offset_minutes"]].head(20))


In [None]:
def make_lat_lon_buckets(df, lat_multiplier=10, lon_multiplier=10):
    # 1. Convert lat/lon to numeric
    df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
    df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")

    # 2. Drop rows with missing or inf lat/lon
    df = df.dropna(subset=["latitude", "longitude"])
    df = df[~df["latitude"].isin([np.inf, -np.inf])]
    df = df[~df["longitude"].isin([np.inf, -np.inf])]

    # 3. Create lat/lon buckets
    df["lat_bucket"] = (df["latitude"] * lat_multiplier).astype(int)
    df["lon_bucket"] = (df["longitude"] * lon_multiplier).astype(int)
    return df

df_merged = make_lat_lon_buckets(df_merged)


In [None]:
def construct_trajectories(data, time_window="1D"):
    trajectories = []
    grouped = data.groupby("user_id", group_keys=False)
    for user_id, group in grouped:
        group = group.sort_values("check_in_time").copy()
        group["trajectory_id"] = (group["check_in_time"].diff() > pd.Timedelta(time_window)).cumsum()

        for traj_id, traj_group in group.groupby("trajectory_id"):
            trajectory = {
                "user_id": user_id,
                "trajectory_id": traj_id,
                "timestamps": traj_group["check_in_time"].tolist(),
                "locations": traj_group["venue_id"].tolist(),
                "day_of_week": traj_group["day_of_week"].tolist(),
                "hour_of_day": traj_group["hour_of_day"].tolist(),
                "lat_bucket": traj_group["lat_bucket"].tolist(),
                "lon_bucket": traj_group["lon_bucket"].tolist()
            }
            trajectories.append(trajectory)

    return pd.DataFrame(trajectories)

trajectories_df = construct_trajectories(df_merged, time_window="1D")
print(trajectories_df.head(5))


In [None]:
def encode_pois(trajectories_df):
    all_locations = set(loc for traj in trajectories_df["locations"] for loc in traj)
    location_mapping = {loc: idx for idx, loc in enumerate(all_locations)}

    trajectories_df["encoded_locations"] = trajectories_df["locations"].apply(
        lambda locs: [location_mapping[loc] for loc in locs]
    )
    return trajectories_df, location_mapping

trajectories_df, poi_mapping = encode_pois(trajectories_df)
trajectories_df.head()


In [3]:
# loaded saved preprocessed trajectories
trajectories_df = pd.read_csv("/kaggle/input/4sq-traject/foursquare_trajectories.csv")

In [4]:
print(trajectories_df.head())

   user_id  trajectory_id                                         timestamps  \
0        1              0  [Timestamp('2012-04-03 18:19:17-0400', tz='Ame...   
1        1              1  [Timestamp('2012-04-06 15:09:58-0400', tz='Ame...   
2        1              2  [Timestamp('2012-04-12 09:33:06-0400', tz='Ame...   
3        1              3  [Timestamp('2012-04-13 09:35:55-0400', tz='Ame...   
4        1              4  [Timestamp('2012-04-16 13:24:29-0400', tz='Ame...   

                                           locations  \
0  ['4e753db3c65bb91db4493d78', '4f691503e4b072f1...   
1  ['4beed97de24d20a1f2777314', '4b5c5402f964a520...   
2  ['4e753db3c65bb91db4493d78', '4c597bc36407d13a...   
3  ['4e753db3c65bb91db4493d78', '4bd3283041b9ef3b...   
4  ['4c15395a77cea593a6dcd160', '4bf20f1025afb713...   

                                         day_of_week  \
0                       ['Tue', 'Tue', 'Tue', 'Wed']   
1  ['Fri', 'Fri', 'Fri', 'Sat', 'Sat', 'Sat', 'Sa...   
2             

In [6]:
unique_users = trajectories_df["user_id"].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 270)

filtered_df = trajectories_df[trajectories_df["user_id"].isin(selected_users)].reset_index(drop=True)

label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Selected {num_users} users.")


Selected 270 users.


In [7]:
train_df, val_df = train_test_split(filtered_df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)


In [8]:
def build_mixed_trajectory(loc_ids, days, hours, lat_buckets, lon_buckets):
    tokens = []
    for loc_id, d, h, latb, lonb in zip(loc_ids, days, hours, lat_buckets, lon_buckets):
        token = f"POI{loc_id}_DAY{d}_H{h}_LAT{latb}_LON{lonb}"
        tokens.append(token)
    return " ".join(tokens)

class SpatioTemporalFoursquareDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=128):
        self.df = df
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.labels = df["label"].values  # integer user IDs

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        loc_ids = row["encoded_locations"]
        days = row["day_of_week"]
        hours = row["hour_of_day"]
        latb = row["lat_bucket"]
        lonb = row["lon_bucket"]

        # Build spatio-temporal token string
        trajectory_str = build_mixed_trajectory(loc_ids, days, hours, latb, lonb)

        # Tokenize
        inputs = self.tokenizer(
            trajectory_str,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors="pt",
        )

        label = self.labels[idx]

        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Instantiate tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Create datasets
train_dataset = SpatioTemporalFoursquareDataset(train_df, tokenizer, max_length=128)
val_dataset = SpatioTemporalFoursquareDataset(val_df, tokenizer, max_length=128)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



In [9]:
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]  # DistilBERT's "CLS" token
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits


In [10]:
def compute_metrics(outputs, labels, top_ks=[1, 5]):
    metrics = {}
    with torch.no_grad():
        for k in top_ks:
            _, topk_indices = torch.topk(outputs, k=k, dim=1)
            correct_topk = sum(labels[i].item() in topk_indices[i] for i in range(labels.size(0)))
            metrics[f"ACC@{k}"] = correct_topk / labels.size(0)

    _, preds = torch.max(outputs, dim=1)
    metrics["top1_preds"] = preds
    return metrics

from sklearn.metrics import classification_report

def evaluate_model(model, loader, device, loss_fn):
    model.eval()
    total_loss = 0.0
    all_preds, all_labels = [], []
    acc_metrics = {"ACC@1": 0, "ACC@5": 0}
    total_samples = 0

    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            batch_size = labels.size(0)
            total_loss += loss.item() * batch_size

            batch_metrics = compute_metrics(outputs, labels)
            acc_metrics["ACC@1"] += batch_metrics["ACC@1"] * batch_size
            acc_metrics["ACC@5"] += batch_metrics["ACC@5"] * batch_size

            all_preds.extend(batch_metrics["top1_preds"].cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_samples += batch_size

    avg_loss = total_loss / total_samples
    acc_metrics["ACC@1"] /= total_samples
    acc_metrics["ACC@5"] /= total_samples

    cls_metrics = classification_report(all_labels, all_preds, output_dict=True)
    return avg_loss, acc_metrics, cls_metrics


In [11]:
def train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=5):
    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        train_iter = tqdm(train_loader, desc=f"Epoch {epoch+1} [Training]", leave=False)

        for batch in train_iter:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            train_iter.set_description(f"Epoch {epoch+1} [Training] loss: {loss.item():.4f}")

        # Evaluate after each epoch
        train_loss, train_acc_metrics, _ = evaluate_model(model, train_loader, device, loss_fn)
        val_loss, val_acc_metrics, val_cls_report = evaluate_model(model, val_loader, device, loss_fn)

        print(f"\nEpoch {epoch+1}:")
        print(f"  Train Loss: {train_loss:.4f} | ACC@1: {train_acc_metrics['ACC@1']:.4f}, ACC@5: {train_acc_metrics['ACC@5']:.4f}")
        print(f"  Val   Loss: {val_loss:.4f}   | ACC@1: {val_acc_metrics['ACC@1']:.4f}, ACC@5: {val_acc_metrics['ACC@5']:.4f}")
        print(f"  Val Macro F1: {val_cls_report['macro avg']['f1-score']:.4f}, Weighted F1: {val_cls_report['weighted avg']['f1-score']:.4f}")


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = TrajectoryDistilBERT(num_users, dropout_rate=0.0) 
model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

epochs = 1
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 3.3036 | ACC@1: 0.3003, ACC@5: 0.6179
  Val   Loss: 3.3560   | ACC@1: 0.2862, ACC@5: 0.6142
  Val Macro F1: 0.1179, Weighted F1: 0.1788


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Using best parameters for 270 users


In [13]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 270)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

train_df, val_df = train_test_split(filtered_df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)


Number of selected users: 270


In [15]:
# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

train_dataset = SpatioTemporalFoursquareDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalFoursquareDataset(val_df, tokenizer, max_length=best_max_length)

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)


In [16]:
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        # DistilBERT's last_hidden_state: (batch_size, seq_len, hidden_size)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits


In [17]:
# Build the model 
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TrajectoryDistilBERT(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Li

In [18]:
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()



In [20]:
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 2.5334 | ACC@1: 0.4063, ACC@5: 0.7471
  Val   Loss: 2.6008   | ACC@1: 0.3930, ACC@5: 0.7294
  Val Macro F1: 0.2186, Weighted F1: 0.3016


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.8269 | ACC@1: 0.4818, ACC@5: 0.8410
  Val   Loss: 1.9086   | ACC@1: 0.4709, ACC@5: 0.8230
  Val Macro F1: 0.2979, Weighted F1: 0.3828


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.5708 | ACC@1: 0.5192, ACC@5: 0.8736
  Val   Loss: 1.6760   | ACC@1: 0.5098, ACC@5: 0.8555
  Val Macro F1: 0.3479, Weighted F1: 0.4402


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.4513 | ACC@1: 0.5238, ACC@5: 0.8850
  Val   Loss: 1.5904   | ACC@1: 0.4874, ACC@5: 0.8623
  Val Macro F1: 0.3482, Weighted F1: 0.4167


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.3680 | ACC@1: 0.5483, ACC@5: 0.8956
  Val   Loss: 1.5271   | ACC@1: 0.5283, ACC@5: 0.8683
  Val Macro F1: 0.3987, Weighted F1: 0.4699


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 1.3068 | ACC@1: 0.5575, ACC@5: 0.9058
  Val   Loss: 1.5058   | ACC@1: 0.5231, ACC@5: 0.8719
  Val Macro F1: 0.3981, Weighted F1: 0.4683


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 1.2235 | ACC@1: 0.5920, ACC@5: 0.9131
  Val   Loss: 1.4484   | ACC@1: 0.5452, ACC@5: 0.8764
  Val Macro F1: 0.4206, Weighted F1: 0.5041


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 1.1527 | ACC@1: 0.6121, ACC@5: 0.9210
  Val   Loss: 1.4065   | ACC@1: 0.5656, ACC@5: 0.8816
  Val Macro F1: 0.4443, Weighted F1: 0.5237


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 1.0636 | ACC@1: 0.6462, ACC@5: 0.9327
  Val   Loss: 1.3736   | ACC@1: 0.5733, ACC@5: 0.8892
  Val Macro F1: 0.4621, Weighted F1: 0.5415


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 0.9924 | ACC@1: 0.6692, ACC@5: 0.9403
  Val   Loss: 1.3331   | ACC@1: 0.5953, ACC@5: 0.8912
  Val Macro F1: 0.4847, Weighted F1: 0.5683


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# WITH TEST SET

In [13]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 270)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(filtered_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

# Create datasets
train_dataset = SpatioTemporalFoursquareDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalFoursquareDataset(val_df, tokenizer, max_length=best_max_length)
test_dataset = SpatioTemporalFoursquareDataset(test_df, tokenizer, max_length=best_max_length)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

# Define model
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits

# Build the model 
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
model.to(device)

# Optimizer and loss
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()

# Training function
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)

# Evaluation function for test set
from sklearn.metrics import classification_report

def evaluate_test_set(model, test_loader, device):
    model.eval()
    total_loss, total_correct = 0, 0
    all_predictions, all_labels = [], []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs, labels)

            total_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)
            total_correct += (predictions == labels).sum().item()

            # Collect predictions and labels for classification report
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate overall accuracy
    accuracy = total_correct / len(test_loader.dataset)
    print(f"Test Loss: {total_loss / len(test_loader):.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    # Classification metrics
    print(classification_report(all_labels, all_predictions, zero_division=0))

# Evaluate the test set
print("Evaluating the model on the test set...")
evaluate_test_set(model, test_loader, device)


Number of selected users: 270


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 2.7982 | ACC@1: 0.3591, ACC@5: 0.7089
  Val   Loss: 2.8647   | ACC@1: 0.3560, ACC@5: 0.6933
  Val Macro F1: 0.1701, Weighted F1: 0.2472


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.9820 | ACC@1: 0.4500, ACC@5: 0.8320
  Val   Loss: 2.0688   | ACC@1: 0.4347, ACC@5: 0.8121
  Val Macro F1: 0.2758, Weighted F1: 0.3456


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.6695 | ACC@1: 0.4901, ACC@5: 0.8660
  Val   Loss: 1.7905   | ACC@1: 0.4647, ACC@5: 0.8458
  Val Macro F1: 0.3193, Weighted F1: 0.3866


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 1.5075 | ACC@1: 0.5209, ACC@5: 0.8834
  Val   Loss: 1.6570   | ACC@1: 0.4946, ACC@5: 0.8587
  Val Macro F1: 0.3598, Weighted F1: 0.4245


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 1.4276 | ACC@1: 0.5361, ACC@5: 0.8838
  Val   Loss: 1.5992   | ACC@1: 0.5134, ACC@5: 0.8571
  Val Macro F1: 0.3921, Weighted F1: 0.4531


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 1.3455 | ACC@1: 0.5441, ACC@5: 0.9006
  Val   Loss: 1.5269   | ACC@1: 0.5278, ACC@5: 0.8667
  Val Macro F1: 0.4069, Weighted F1: 0.4757


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 1.2935 | ACC@1: 0.5687, ACC@5: 0.9067
  Val   Loss: 1.4956   | ACC@1: 0.5332, ACC@5: 0.8796
  Val Macro F1: 0.4102, Weighted F1: 0.4808


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 1.2345 | ACC@1: 0.5825, ACC@5: 0.9150
  Val   Loss: 1.4729   | ACC@1: 0.5428, ACC@5: 0.8790
  Val Macro F1: 0.4275, Weighted F1: 0.4905


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 1.1626 | ACC@1: 0.6021, ACC@5: 0.9228
  Val   Loss: 1.4307   | ACC@1: 0.5407, ACC@5: 0.8881
  Val Macro F1: 0.4327, Weighted F1: 0.4950


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 1.1066 | ACC@1: 0.6300, ACC@5: 0.9258
  Val   Loss: 1.4084   | ACC@1: 0.5696, ACC@5: 0.8849
  Val Macro F1: 0.4663, Weighted F1: 0.5413
Evaluating the model on the test set...
Test Loss: 1.3611
Test Accuracy: 0.5642
              precision    recall  f1-score   support

           0       1.00      0.87      0.93        15
           1       0.82      1.00      0.90        18
           2       0.75      1.00      0.86         3
           3       0.30      0.60      0.40         5
           4       1.00      1.00      1.00         8
           5       0.00      0.00      0.00         1
           6       1.00      1.00      1.00         6
           7       0.71      0.71      0.71         7
           8       0.63      0.77      0.69        22
           9       0.57      0.93      0.70        14
          10       0.22      0.10      0.14        20
          11       1.00      1.00      1.00         3
          13       1.00      0.67      0.80         3
  

# Using best parameters for 109 users

In [21]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 109)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

train_df, val_df = train_test_split(filtered_df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)


Number of selected users: 109


In [22]:
# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

train_dataset = SpatioTemporalFoursquareDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalFoursquareDataset(val_df, tokenizer, max_length=best_max_length)

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)


In [23]:
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        # DistilBERT's last_hidden_state: (batch_size, seq_len, hidden_size)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits


In [24]:
# Build the model 
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

TrajectoryDistilBERT(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Li

In [25]:
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()




In [26]:
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 2.1907 | ACC@1: 0.4137, ACC@5: 0.8037
  Val   Loss: 2.2956   | ACC@1: 0.3935, ACC@5: 0.7704
  Val Macro F1: 0.1942, Weighted F1: 0.2877


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.2377 | ACC@1: 0.6297, ACC@5: 0.9440
  Val   Loss: 1.4041   | ACC@1: 0.5878, ACC@5: 0.9166
  Val Macro F1: 0.4457, Weighted F1: 0.5204


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.0197 | ACC@1: 0.6601, ACC@5: 0.9639
  Val   Loss: 1.1884   | ACC@1: 0.6546, ACC@5: 0.9372
  Val Macro F1: 0.5251, Weighted F1: 0.6012


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 0.9246 | ACC@1: 0.6726, ACC@5: 0.9723
  Val   Loss: 1.1066   | ACC@1: 0.6330, ACC@5: 0.9441
  Val Macro F1: 0.5288, Weighted F1: 0.5890


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 0.8519 | ACC@1: 0.6886, ACC@5: 0.9774
  Val   Loss: 1.0433   | ACC@1: 0.6526, ACC@5: 0.9509
  Val Macro F1: 0.5292, Weighted F1: 0.6005


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 0.7699 | ACC@1: 0.7249, ACC@5: 0.9843
  Val   Loss: 0.9778   | ACC@1: 0.6860, ACC@5: 0.9529
  Val Macro F1: 0.5814, Weighted F1: 0.6481


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 0.7103 | ACC@1: 0.7502, ACC@5: 0.9855
  Val   Loss: 0.9747   | ACC@1: 0.6771, ACC@5: 0.9539
  Val Macro F1: 0.5741, Weighted F1: 0.6413


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 0.6540 | ACC@1: 0.7585, ACC@5: 0.9885
  Val   Loss: 0.9364   | ACC@1: 0.7046, ACC@5: 0.9539
  Val Macro F1: 0.6147, Weighted F1: 0.6826


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 0.6013 | ACC@1: 0.7948, ACC@5: 0.9902
  Val   Loss: 0.9178   | ACC@1: 0.7184, ACC@5: 0.9539
  Val Macro F1: 0.6223, Weighted F1: 0.7011


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 0.5591 | ACC@1: 0.8025, ACC@5: 0.9897
  Val   Loss: 0.9191   | ACC@1: 0.6977, ACC@5: 0.9549
  Val Macro F1: 0.6139, Weighted F1: 0.6836


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# WITH TEST SET

In [14]:
# Random subset of users
unique_users = trajectories_df['user_id'].unique()
random.seed(42)
selected_users = random.sample(list(unique_users), 109)

filtered_df = trajectories_df[trajectories_df['user_id'].isin(selected_users)].reset_index(drop=True)

# Label encode user_id
label_encoder = LabelEncoder()
filtered_df["label"] = label_encoder.fit_transform(filtered_df["user_id"])
num_users = len(label_encoder.classes_)
print(f"Number of selected users: {num_users}")

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(filtered_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# Best Hyperparams
best_dropout = 0.0
best_lr = 3e-5
best_batch_size = 16
best_max_length = 128

# Create datasets
train_dataset = SpatioTemporalFoursquareDataset(train_df, tokenizer, max_length=best_max_length)
val_dataset = SpatioTemporalFoursquareDataset(val_df, tokenizer, max_length=best_max_length)
test_dataset = SpatioTemporalFoursquareDataset(test_df, tokenizer, max_length=best_max_length)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

# Define model
class TrajectoryDistilBERT(nn.Module):
    def __init__(self, num_users, dropout_rate=0.1):
        super(TrajectoryDistilBERT, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.dim, num_users)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits

# Build the model 
model = TrajectoryDistilBERT(num_users, dropout_rate=best_dropout)
model.to(device)

# Optimizer and loss
optimizer = AdamW(model.parameters(), lr=best_lr)
loss_fn = nn.CrossEntropyLoss()

# Training function
epochs = 10  
train_model(model, train_loader, val_loader, device, loss_fn, optimizer, epochs=epochs)

# Evaluation function for test set
from sklearn.metrics import classification_report

def evaluate_test_set(model, test_loader, device):
    model.eval()
    total_loss, total_correct = 0, 0
    all_predictions, all_labels = [], []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs, labels)

            total_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)
            total_correct += (predictions == labels).sum().item()

            # Collect predictions and labels for classification report
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate overall accuracy
    accuracy = total_correct / len(test_loader.dataset)
    print(f"Test Loss: {total_loss / len(test_loader):.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    # Classification metrics
    print(classification_report(all_labels, all_predictions, zero_division=0))

# Evaluate the test set
print("Evaluating the model on the test set...")
evaluate_test_set(model, test_loader, device)


Number of selected users: 109


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 1:
  Train Loss: 2.3351 | ACC@1: 0.3871, ACC@5: 0.7877
  Val   Loss: 2.4152   | ACC@1: 0.3652, ACC@5: 0.7670
  Val Macro F1: 0.1667, Weighted F1: 0.2707


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 2:
  Train Loss: 1.4611 | ACC@1: 0.5641, ACC@5: 0.9150
  Val   Loss: 1.5248   | ACC@1: 0.5707, ACC@5: 0.8809
  Val Macro F1: 0.4033, Weighted F1: 0.5031


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 3:
  Train Loss: 1.1545 | ACC@1: 0.6457, ACC@5: 0.9532
  Val   Loss: 1.2654   | ACC@1: 0.6283, ACC@5: 0.9372
  Val Macro F1: 0.4682, Weighted F1: 0.5780


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 4:
  Train Loss: 0.9981 | ACC@1: 0.6628, ACC@5: 0.9644
  Val   Loss: 1.1087   | ACC@1: 0.6531, ACC@5: 0.9372
  Val Macro F1: 0.5315, Weighted F1: 0.6074


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 5:
  Train Loss: 0.9229 | ACC@1: 0.6628, ACC@5: 0.9753
  Val   Loss: 1.0501   | ACC@1: 0.6518, ACC@5: 0.9463
  Val Macro F1: 0.5426, Weighted F1: 0.6048


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 6:
  Train Loss: 0.8728 | ACC@1: 0.6858, ACC@5: 0.9798
  Val   Loss: 1.0194   | ACC@1: 0.6715, ACC@5: 0.9503
  Val Macro F1: 0.5662, Weighted F1: 0.6374


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 7:
  Train Loss: 0.8003 | ACC@1: 0.7150, ACC@5: 0.9826
  Val   Loss: 0.9641   | ACC@1: 0.6767, ACC@5: 0.9516
  Val Macro F1: 0.6033, Weighted F1: 0.6586


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 8:
  Train Loss: 0.7744 | ACC@1: 0.7158, ACC@5: 0.9837
  Val   Loss: 0.9360   | ACC@1: 0.6793, ACC@5: 0.9542
  Val Macro F1: 0.6015, Weighted F1: 0.6592


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 9:
  Train Loss: 0.7140 | ACC@1: 0.7377, ACC@5: 0.9877
  Val   Loss: 0.8953   | ACC@1: 0.7055, ACC@5: 0.9555
  Val Macro F1: 0.6332, Weighted F1: 0.6922


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Epoch 10:
  Train Loss: 0.6890 | ACC@1: 0.7520, ACC@5: 0.9885
  Val   Loss: 0.9105   | ACC@1: 0.7160, ACC@5: 0.9594
  Val Macro F1: 0.6452, Weighted F1: 0.7011
Evaluating the model on the test set...
Test Loss: 0.9355
Test Accuracy: 0.6993
              precision    recall  f1-score   support

           0       1.00      0.77      0.87        13
           1       1.00      1.00      1.00         6
           2       0.60      0.64      0.62        33
           3       1.00      1.00      1.00         4
           4       0.00      0.00      0.00         2
           5       0.94      1.00      0.97        17
           6       0.86      0.67      0.75         9
           7       0.67      0.82      0.74        17
           8       0.95      1.00      0.98        21
          10       1.00      1.00      1.00         8
          11       0.00      0.00      0.00         3
          12       0.92      0.85      0.88        13
          14       0.50      0.50      0.50         8
  