In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from datetime import datetime, timedelta
from typing import List, Tuple, Dict
import warnings
warnings.filterwarnings('ignore')

# Set device for Mac M1
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# ==================== Data Generation ====================

class POIDataGenerator:
    """Generate synthetic POI trajectory data"""
    
    def __init__(self):
        # POI categories with temporal patterns
        self.poi_types = {
            'home': 0,
            'office': 1,
            'restaurant': 2,
            'gym': 3,
            'coffee_shop': 4,
            'bar': 5,
            'grocery_store': 6,
            'park': 7,
            'shopping_mall': 8,
            'cinema': 9
        }
        
        # Time periods (simplified to 4 periods)
        self.time_periods = {
            'morning': 0,      # 6-12
            'afternoon': 1,    # 12-18
            'evening': 2,      # 18-22
            'night': 3        # 22-6
        }
        
        # Temporal patterns (what POIs are likely at what times)
        self.temporal_patterns = {
            'morning': ['home', 'coffee_shop', 'office', 'gym'],
            'afternoon': ['office', 'restaurant', 'coffee_shop', 'shopping_mall'],
            'evening': ['restaurant', 'bar', 'cinema', 'gym', 'grocery_store'],
            'night': ['bar', 'home', 'cinema']
        }
        
        # Transition patterns (likely next POIs)
        self.transition_patterns = {
            'home': ['coffee_shop', 'office', 'gym', 'grocery_store'],
            'office': ['restaurant', 'coffee_shop', 'home', 'gym'],
            'restaurant': ['office', 'bar', 'home', 'cinema'],
            'gym': ['home', 'office', 'restaurant', 'coffee_shop'],
            'coffee_shop': ['office', 'home', 'shopping_mall'],
            'bar': ['home', 'restaurant'],
            'grocery_store': ['home'],
            'park': ['home', 'coffee_shop', 'restaurant'],
            'shopping_mall': ['restaurant', 'home', 'cinema'],
            'cinema': ['restaurant', 'bar', 'home']
        }
        
        self.num_pois = len(self.poi_types)
        self.num_time_periods = len(self.time_periods)
        
    def get_time_period(self, hour: int) -> int:
        """Convert hour to time period"""
        if 6 <= hour < 12:
            return self.time_periods['morning']
        elif 12 <= hour < 18:
            return self.time_periods['afternoon']
        elif 18 <= hour < 22:
            return self.time_periods['evening']
        else:
            return self.time_periods['night']
    
    def generate_trajectory(self, seq_length: int = 10) -> Tuple[List[int], List[int]]:
        """Generate a single trajectory with temporal context"""
        trajectory = []
        time_contexts = []
        
        # Start from home in the morning
        current_poi = 'home'
        current_hour = 7
        
        for _ in range(seq_length):
            # Get time period
            time_period = self.get_time_period(current_hour)
            time_contexts.append(time_period)
            
            # Add current POI to trajectory
            trajectory.append(self.poi_types[current_poi])
            
            # Get possible next POIs based on transitions and time
            time_period_name = list(self.time_periods.keys())[time_period]
            possible_pois = list(set(self.transition_patterns[current_poi]) & 
                                set(self.temporal_patterns[time_period_name]))
            
            # If no intersection, use transition patterns
            if not possible_pois:
                possible_pois = self.transition_patterns[current_poi]
            
            # Choose next POI
            current_poi = random.choice(possible_pois)
            
            # Update time (add 1-3 hours)
            current_hour = (current_hour + random.randint(1, 3)) % 24
        
        return trajectory, time_contexts
    
    def generate_dataset(self, num_sequences: int = 500) -> Dict:
        """Generate dataset of trajectories"""
        sequences = []
        time_contexts = []
        
        for _ in range(num_sequences):
            seq, time_ctx = self.generate_trajectory()
            sequences.append(seq)
            time_contexts.append(time_ctx)
        
        return {
            'sequences': sequences,
            'time_contexts': time_contexts
        }


Using device: mps


In [2]:
# ==================== Model Architecture ====================
class PositionalEncoding(nn.Module):
    """Add positional encoding to embeddings"""
    
    def __init__(self, d_model: int, max_len: int = 100):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * 
                           (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))
    
    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class TrajectoryTransformer(nn.Module):
    """Transformer Decoder model for POI trajectory prediction"""
    
    def __init__(self, 
                 num_pois: int,
                 num_time_periods: int,
                 d_model: int = 64,
                 nhead: int = 4,
                 num_layers: int = 2,
                 dropout: float = 0.1):
        super().__init__()
        
        # Embeddings
        self.poi_embedding = nn.Embedding(num_pois, d_model)
        self.time_embedding = nn.Embedding(num_time_periods, d_model // 2)
        
        # Projection layer to combine POI and time embeddings
        self.input_projection = nn.Linear(d_model + d_model // 2, d_model)
        
        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model)
        
        # Transformer Decoder
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
        
        # Output layers
        self.dropout = nn.Dropout(dropout)
        self.output_layer = nn.Linear(d_model, num_pois)
        
        # Initialize weights
        self._init_weights()
        
    def _init_weights(self):
        """Initialize weights"""
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def generate_square_subsequent_mask(self, sz: int) -> torch.Tensor:
        """Generate causal mask for decoder"""
        mask = torch.triu(torch.ones(sz, sz), diagonal=1)
        mask = mask.masked_fill(mask == 1, float('-inf'))
        return mask
    
    def forward(self, poi_seq, time_seq):
        batch_size, seq_len = poi_seq.shape
        
        # Create causal mask
        tgt_mask = self.generate_square_subsequent_mask(seq_len).to(poi_seq.device)
        
        # Embed POIs and time
        poi_emb = self.poi_embedding(poi_seq)  # (batch, seq_len, d_model)
        time_emb = self.time_embedding(time_seq)  # (batch, seq_len, d_model//2)
        
        # Concatenate and project
        combined = torch.cat([poi_emb, time_emb], dim=-1)  # (batch, seq_len, d_model + d_model//2)
        x = self.input_projection(combined)  # (batch, seq_len, d_model)
        
        # Add positional encoding
        x = self.pos_encoder(x)
        
        # Apply transformer decoder (using self-attention only, no encoder-decoder attention)
        # In this case, we use the decoder as an autoregressive model
        memory = torch.zeros_like(x)  # Dummy memory for decoder
        x = self.transformer_decoder(
            tgt=x,
            memory=memory,
            tgt_mask=tgt_mask
        )
        
        # Apply dropout and output layer
        x = self.dropout(x)
        output = self.output_layer(x)  # (batch, seq_len, num_pois)
        
        return output

In [3]:
# ==================== Training Utils ====================

class TrajectoryDataset(torch.utils.data.Dataset):
    """Dataset for trajectory sequences"""
    
    def __init__(self, sequences, time_contexts, seq_length=8):
        self.sequences = sequences
        self.time_contexts = time_contexts
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        seq = self.sequences[idx]
        time_ctx = self.time_contexts[idx]
        
        # Create input and target sequences
        if len(seq) > self.seq_length + 1:
            start_idx = random.randint(0, len(seq) - self.seq_length - 1)
            input_seq = seq[start_idx:start_idx + self.seq_length]
            target_seq = seq[start_idx + 1:start_idx + self.seq_length + 1]
            input_time = time_ctx[start_idx:start_idx + self.seq_length]
        else:
            input_seq = seq[:-1]
            target_seq = seq[1:]
            input_time = time_ctx[:-1]
        
        return (torch.tensor(input_seq, dtype=torch.long),
                torch.tensor(input_time, dtype=torch.long),
                torch.tensor(target_seq, dtype=torch.long))

def train_model(model, train_loader, num_epochs=50, learning_rate=0.001):
    """Train the transformer model"""
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    losses = []
    
    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch_idx, (poi_seq, time_seq, targets) in enumerate(train_loader):
            poi_seq = poi_seq.to(device)
            time_seq = time_seq.to(device)
            targets = targets.to(device)
            
            # Forward pass
            outputs = model(poi_seq, time_seq)
            
            # Reshape for loss calculation
            loss = criterion(outputs.reshape(-1, outputs.size(-1)), targets.reshape(-1))
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(train_loader)
        losses.append(avg_loss)
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
    
    return losses

In [4]:
# ==================== Inference ====================

def predict_next_poi(model, poi_sequence, time_sequence, poi_names, top_k=3):
    """Predict the next POI given a sequence"""
    model.eval()
    
    with torch.no_grad():
        # Convert to tensors
        poi_tensor = torch.tensor(poi_sequence, dtype=torch.long).unsqueeze(0).to(device)
        time_tensor = torch.tensor(time_sequence, dtype=torch.long).unsqueeze(0).to(device)
        
        # Get predictions
        outputs = model(poi_tensor, time_tensor)
        
        # Get probabilities for the last position
        last_output = outputs[0, -1, :]
        probs = torch.softmax(last_output, dim=-1)
        
        # Get top-k predictions
        top_probs, top_indices = torch.topk(probs, k=top_k)
        
        predictions = []
        for prob, idx in zip(top_probs, top_indices):
            poi_name = poi_names[idx.item()]
            predictions.append((poi_name, prob.item()))
        
        return predictions

In [5]:
def main():
    print("=" * 60)
    print("POI Trajectory Prediction with Transformer")
    print("=" * 60)
    
    # Generate synthetic data
    print("\n1. Generating synthetic trajectory data...")
    generator = POIDataGenerator()
    data = generator.generate_dataset(num_sequences=300)  # Small dataset for Mac M1
    
    # Prepare data
    dataset = TrajectoryDataset(data['sequences'], data['time_contexts'])
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Create model
    print("\n2. Creating Transformer model...")
    model = TrajectoryTransformer(
        num_pois=generator.num_pois,
        num_time_periods=generator.num_time_periods,
        d_model=64,
        nhead=4,
        num_layers=2,
        dropout=0.1
    ).to(device)
    
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"   Model parameters: {num_params:,}")
    
    # Train model
    print("\n3. Training model...")
    losses = train_model(model, train_loader, num_epochs=50)
    print(f"   Final training loss: {losses[-1]:.4f}")
    
    # Inference examples
    print("\n4. Inference Examples:")
    print("-" * 60)
    
    # Reverse mapping for POI names
    poi_names = {v: k for k, v in generator.poi_types.items()}
    time_names = {v: k for k, v in generator.time_periods.items()}
    
    # Example 1: Morning routine
    print("\nExample 1: Morning Routine")
    poi_sequence = [
        generator.poi_types['home'],
        generator.poi_types['coffee_shop'],
        generator.poi_types['office']
    ]
    time_sequence = [
        generator.time_periods['morning'],
        generator.time_periods['morning'],
        generator.time_periods['morning']
    ]
    
    print("Input sequence: Home → Coffee Shop → Office")
    print("Time context: Morning → Morning → Morning")
    predictions = predict_next_poi(model, poi_sequence, time_sequence, poi_names)
    print("Next POI predictions:")
    for poi, prob in predictions:
        print(f"  - {poi}: {prob:.3f}")
    
    # Example 2: Evening activities
    print("\nExample 2: Evening Activities")
    poi_sequence = [
        generator.poi_types['office'],
        generator.poi_types['gym'],
        generator.poi_types['restaurant']
    ]
    time_sequence = [
        generator.time_periods['afternoon'],
        generator.time_periods['evening'],
        generator.time_periods['evening']
    ]
    
    print("Input sequence: Office → Gym → Restaurant")
    print("Time context: Afternoon → Evening → Evening")
    predictions = predict_next_poi(model, poi_sequence, time_sequence, poi_names)
    print("Next POI predictions:")
    for poi, prob in predictions:
        print(f"  - {poi}: {prob:.3f}")
    
    # Example 3: Weekend shopping
    print("\nExample 3: Weekend Shopping")
    poi_sequence = [
        generator.poi_types['home'],
        generator.poi_types['grocery_store'],
        generator.poi_types['shopping_mall']
    ]
    time_sequence = [
        generator.time_periods['morning'],
        generator.time_periods['afternoon'],
        generator.time_periods['afternoon']
    ]
    
    print("Input sequence: Home → Grocery Store → Shopping Mall")
    print("Time context: Morning → Afternoon → Afternoon")
    predictions = predict_next_poi(model, poi_sequence, time_sequence, poi_names)
    print("Next POI predictions:")
    for poi, prob in predictions:
        print(f"  - {poi}: {prob:.3f}")
    
    # Interactive prediction
    print("\n" + "=" * 60)
    print("Model trained successfully!")
    print("The model can now predict the next POI based on:")
    print("  • Historical trajectory sequence")
    print("  • Temporal context (time of day)")
    print("  • Learned transition patterns")
    
    return model, generator

In [6]:
model, generator = main()


POI Trajectory Prediction with Transformer

1. Generating synthetic trajectory data...

2. Creating Transformer model...
   Model parameters: 141,130

3. Training model...
Epoch [10/50], Loss: 0.7975
Epoch [20/50], Loss: 0.7693
Epoch [30/50], Loss: 0.7501
Epoch [40/50], Loss: 0.7362
Epoch [50/50], Loss: 0.7365
   Final training loss: 0.7365

4. Inference Examples:
------------------------------------------------------------

Example 1: Morning Routine
Input sequence: Home → Coffee Shop → Office
Time context: Morning → Morning → Morning
Next POI predictions:
  - home: 0.372
  - coffee_shop: 0.344
  - gym: 0.259

Example 2: Evening Activities
Input sequence: Office → Gym → Restaurant
Time context: Afternoon → Evening → Evening
Next POI predictions:
  - bar: 0.493
  - cinema: 0.479
  - home: 0.015

Example 3: Weekend Shopping
Input sequence: Home → Grocery Store → Shopping Mall
Time context: Morning → Afternoon → Afternoon
Next POI predictions:
  - restaurant: 0.996
  - office: 0.002
  - 

1. **Causal Masking**: The decoder uses a triangular mask to ensure the model can only attend to previous positions (autoregressive behavior), which is essential for next-token prediction.

2. **Better for Sequential Prediction**: Decoders are designed for generating sequences one token at a time, making them ideal for trajectory prediction where we want to predict the next POI.

3. **Self-Attention with Causality**: The model learns dependencies between POIs while respecting the temporal order of visits.

4. **More Natural Architecture**: For next-POI prediction, we're essentially doing language modeling but with POIs instead of words, and decoder-only transformers (like GPT) are the standard for this task.

The model now properly uses `TransformerDecoder` with causal masking, making it more suitable for trajectory prediction tasks. The training process remains efficient for Mac M1 with minimal data (300 sequences).