# HMM-GNN Fusion Model Training

This notebook:
- Loads trained HMM and GNN models
- Extracts HMM state probabilities and GNN node embeddings
- Fuses features and trains a hybrid MLP classifier
- Evaluates Fusion model using Accuracy, Precision & Recall, Top-K Accuracy, and MPD


## Section 1 — Imports & Setup


In [112]:
import os
import pandas as pd
import numpy as np
import json
import pickle
from tqdm import tqdm
from haversine import haversine
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Paths
BASE_PATH = "/home/root495/Inexture/Location Prediction Update"
PROCESSED_PATH = BASE_PATH + "/data/processed/"
SEQUENCES_FILE = PROCESSED_PATH + "place_sequences.json"
GRAPH_EDGES_FILE = PROCESSED_PATH + "graph_edges.csv"
NODE_FEATURES_FILE = PROCESSED_PATH + "node_features.csv"
GRID_METADATA_FILE = PROCESSED_PATH + "grid_metadata.json"
CLEANED_WITH_PLACES_FILE = PROCESSED_PATH + "cleaned_with_places.csv"
OUTPUT_PATH = BASE_PATH + "/notebooks/"
MODELS_PATH = BASE_PATH + "/models/"
RESULTS_PATH = BASE_PATH + "/results/"

# Model paths
HMM_MODEL_PATH = MODELS_PATH + "hmm_10users_model.pkl"
GNN_MODEL_PATH = MODELS_PATH + "gnn_10users_model_best.pt"
FUSION_MODEL_PATH = MODELS_PATH + "fusion_model_best.pt"
RESULTS_SAVE_PATH = RESULTS_PATH + "fusion_results.json"

os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(RESULTS_PATH, exist_ok=True)

# Specific users to use (same as GNN notebook)
SELECTED_USERS = ['000', '001', '005', '006', '009', '011', '014', '016', '019', '025']

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print("Libraries imported successfully!")


Using device: cpu
Libraries imported successfully!


## Section 2 — Load Trained Models

Load the pre-trained HMM and GNN models.


In [113]:
# Load HMM model
print("Loading HMM model...")
with open(HMM_MODEL_PATH, 'rb') as f:
    hmm_model = pickle.load(f)
    hmm_le = pickle.load(f)  # LabelEncoder
    hmm_encoded_to_placeid = pickle.load(f)  # Mapping

print(f"HMM model loaded:")
print(f"  Number of hidden states: {hmm_model.n_components}")
print(f"  Number of observable states: {hmm_model.n_features}")
print(f"  LabelEncoder classes: {len(hmm_le.classes_)}")

# Load GNN model
print("\nLoading GNN model...")
checkpoint = torch.load(GNN_MODEL_PATH, map_location=device)

# We need to reconstruct the GNN model architecture
# First, we'll need to know the model parameters - we'll get them from the checkpoint or rebuild
# For now, we'll load it later after we know the graph structure
print(f"GNN checkpoint loaded:")
print(f"  Keys: {list(checkpoint.keys())}")
if 'model_state_dict' in checkpoint:
    print(f"  Model state dict found")
    gnn_checkpoint = checkpoint
else:
    gnn_checkpoint = checkpoint

print("\nModels loaded successfully!")


Loading HMM model...
HMM model loaded:
  Number of hidden states: 50
  Number of observable states: 303
  LabelEncoder classes: 303

Loading GNN model...
GNN checkpoint loaded:
  Keys: ['model_state_dict', 'place_to_idx', 'idx_to_place', 'num_nodes', 'node_feature_dim', 'hidden_dim']
  Model state dict found

Models loaded successfully!


## Section 3 — Load & Prepare Data

Load sequences and apply the same preprocessing as the GNN notebook (remove consecutive duplicates, create sequences of length 50).


In [114]:
# Load place sequences
print("Loading place sequences...")
with open(SEQUENCES_FILE, 'r') as f:
    sequences_dict = json.load(f)

print(f"Total users available: {len(sequences_dict)}")

# Load sequences for specific users
user_sequences = {}
total_places = 0
for user_id in SELECTED_USERS:
    if user_id in sequences_dict:
        seq = sequences_dict[user_id]
        user_sequences[user_id] = seq
        total_places += len(seq)
        print(f"  User {user_id}: {len(seq)} places")
    else:
        print(f"  Warning: User {user_id} not found in sequences!")

print(f"\nSelected {len(user_sequences)} users: {list(user_sequences.keys())}")
print(f"Total places across all users: {total_places}")

# Remove consecutive duplicates (same as GNN notebook)
def remove_consecutive_duplicates(sequence):
    """Remove consecutive duplicates from sequence."""
    if len(sequence) == 0:
        return sequence
    processed = [sequence[0]]
    for i in range(1, len(sequence)):
        if sequence[i] != sequence[i-1]:
            processed.append(sequence[i])
    return processed

# Apply consecutive duplicate removal
processed_sequences = {}
print("\nRemoving consecutive duplicates...")
for user_id in tqdm(list(user_sequences.keys()), desc="Processing users"):
    original_seq = user_sequences[user_id]
    processed_seq = remove_consecutive_duplicates(original_seq)
    processed_sequences[user_id] = processed_seq

total_processed = sum(len(seq) for seq in processed_sequences.values())
print(f"Total places after duplicate removal: {total_processed}")

# Create sequences of fixed length 50 (same as GNN notebook)
SEQUENCE_LENGTH = 50
step_size = 25  # 50% overlap

all_sequences = []
print("\nCreating sequences of length 50...")
for user_id in tqdm(list(processed_sequences.keys()), desc="Creating sequences"):
    processed_seq = processed_sequences[user_id]
    for i in range(0, len(processed_seq) - SEQUENCE_LENGTH + 1, step_size):
        chunk = processed_seq[i:i+SEQUENCE_LENGTH]
        if len(chunk) == SEQUENCE_LENGTH:
            all_sequences.append(chunk)

print(f"Total sequences created: {len(all_sequences)}")

# Split into train/test (80/20)
split_idx = int(len(all_sequences) * 0.8)
train_sequences = all_sequences[:split_idx]
test_sequences = all_sequences[split_idx:]

print(f"Training sequences: {len(train_sequences)}")
print(f"Test sequences: {len(test_sequences)}")

# Build graph structure (same as GNN notebook)
print("\nBuilding graph structure...")
all_places = set()
for seq in processed_sequences.values():
    all_places.update(seq)

place_to_idx = {place: idx for idx, place in enumerate(sorted(all_places))}
idx_to_place = {idx: place for place, idx in place_to_idx.items()}
num_nodes = len(place_to_idx)

print(f"Total unique places (nodes): {num_nodes}")

# Build edge list
edge_index = []
edge_weights = []
transition_counts = {}

for seq in processed_sequences.values():
    for i in range(len(seq) - 1):
        source = seq[i]
        target = seq[i+1]
        source_idx = place_to_idx[source]
        target_idx = place_to_idx[target]
        if (source_idx, target_idx) not in transition_counts:
            transition_counts[(source_idx, target_idx)] = 0
        transition_counts[(source_idx, target_idx)] += 1

for (source_idx, target_idx), count in transition_counts.items():
    edge_index.append([source_idx, target_idx])
    edge_weights.append(count)

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_weights = torch.tensor(edge_weights, dtype=torch.float)

print(f"Total edges: {len(edge_weights)}")

# Prepare node features (same as GNN notebook)
print("\nPreparing node features...")
df_places = pd.read_csv(CLEANED_WITH_PLACES_FILE)
place_coords = df_places.groupby('place_id')[['lat', 'lon']].first().to_dict('index')

with open(GRID_METADATA_FILE, 'r') as f:
    grid_metadata = json.load(f)

# Calculate visit frequency
place_visit_counts = {}
for seq in train_sequences + test_sequences:
    for place in seq:
        place_visit_counts[place] = place_visit_counts.get(place, 0) + 1

# Calculate in-degree and out-degree for each node (matching training notebook)
in_degree = {idx: 0 for idx in range(num_nodes)}
out_degree = {idx: 0 for idx in range(num_nodes)}
transition_freq = {idx: {} for idx in range(num_nodes)}

for seq in train_sequences + test_sequences:
    indices = [place_to_idx[place] for place in seq if place in place_to_idx]
    for i in range(len(indices) - 1):
        source_idx = indices[i]
        target_idx = indices[i+1]
        out_degree[source_idx] += 1
        in_degree[target_idx] += 1
        if target_idx not in transition_freq[source_idx]:
            transition_freq[source_idx][target_idx] = 0
        transition_freq[source_idx][target_idx] += 1

# Calculate transition probability (max transition prob from this node)
max_transition_prob = {}
for source_idx in range(num_nodes):
    if len(transition_freq[source_idx]) > 0:
        total = sum(transition_freq[source_idx].values())
        max_prob = max(transition_freq[source_idx].values()) / total if total > 0 else 0.0
        max_transition_prob[source_idx] = max_prob
    else:
        max_transition_prob[source_idx] = 0.0

# Collect coordinates for normalization
all_lats = []
all_lons = []
for idx in range(num_nodes):
    place_id = idx_to_place[idx]
    if place_id in place_coords:
        all_lats.append(place_coords[place_id]['lat'])
        all_lons.append(place_coords[place_id]['lon'])
    else:
        try:
            if "_" in str(place_id):
                row, col = map(int, str(place_id).split("_"))
                lat = grid_metadata['min_lat'] + row * grid_metadata['deg_lat']
                lon = grid_metadata['min_lon'] + col * grid_metadata['deg_lon']
                all_lats.append(lat)
                all_lons.append(lon)
        except:
            pass

min_lat = min(all_lats) if all_lats else grid_metadata['min_lat']
max_lat = max(all_lats) if all_lats else grid_metadata['min_lat'] + 100 * grid_metadata['deg_lat']
min_lon = min(all_lons) if all_lons else grid_metadata['min_lon']
max_lon = max(all_lons) if all_lons else grid_metadata['min_lon'] + 100 * grid_metadata['deg_lon']

# Normalize degrees
max_in_degree = max(in_degree.values()) if in_degree.values() else 1
max_out_degree = max(out_degree.values()) if out_degree.values() else 1

# Build node features with all 6 features (matching training notebook)
node_features = []
for idx in range(num_nodes):
    place_id = idx_to_place[idx]
    
    # Feature 1: Visit frequency (normalized)
    visit_freq = place_visit_counts.get(place_id, 0)
    max_visits = max(place_visit_counts.values()) if place_visit_counts else 1
    visit_freq_norm = visit_freq / max_visits if max_visits > 0 else 0.0
    
    # Feature 2 & 3: Coordinates
    if place_id in place_coords:
        lat = place_coords[place_id]['lat']
        lon = place_coords[place_id]['lon']
    else:
        try:
            if "_" in str(place_id):
                row, col = map(int, str(place_id).split("_"))
                lat = grid_metadata['min_lat'] + row * grid_metadata['deg_lat']
                lon = grid_metadata['min_lon'] + col * grid_metadata['deg_lon']
            else:
                lat, lon = min_lat, min_lon
        except:
            lat, lon = min_lat, min_lon
    
    # Normalize coordinates
    lat_norm = (lat - min_lat) / (max_lat - min_lat + 1e-8)
    lon_norm = (lon - min_lon) / (max_lon - min_lon + 1e-8)
    
    # Feature 4: In-degree (normalized)
    in_deg_norm = in_degree[idx] / max_in_degree if max_in_degree > 0 else 0.0
    
    # Feature 5: Out-degree (normalized)
    out_deg_norm = out_degree[idx] / max_out_degree if max_out_degree > 0 else 0.0
    
    # Feature 6: Max transition probability
    max_trans_prob = max_transition_prob[idx]
    
    node_features.append([visit_freq_norm, lat_norm, lon_norm, in_deg_norm, out_deg_norm, max_trans_prob])

# Extract HMM features for each node
print("\nExtracting HMM features for each node...")
hmm_node_features = []

# Get HMM emission probabilities for each location
# For each location, get the emission probabilities (probability of each hidden state when this location is observed)
for idx in range(num_nodes):
    place_id = idx_to_place[idx]
    
    # Try to get HMM emission probabilities for this location
    try:
        # Encode the place_id using HMM's LabelEncoder
        if place_id in hmm_le.classes_:
            encoded_place = hmm_le.transform([place_id])[0]
            # Get emission probabilities: P(hidden_state | observation=this_place)
            # emissionprob_ shape: [n_hidden_states, n_observable_states]
            emission_probs = hmm_model.emissionprob_[:, encoded_place]  # [n_hidden_states]
            hmm_node_features.append(emission_probs)
        else:
            # If place_id not in HMM encoder, use uniform distribution
            hmm_node_features.append(np.ones(hmm_model.n_components) / hmm_model.n_components)
    except Exception as e:
        # Fallback: use uniform distribution
        hmm_node_features.append(np.ones(hmm_model.n_components) / hmm_model.n_components)

hmm_node_features = np.array(hmm_node_features)  # [num_nodes, n_hidden_states]
print(f"HMM node features shape: {hmm_node_features.shape}")

# Concatenate original features with HMM features
node_features_list = []
for idx in range(num_nodes):
    original_features = node_features[idx]  # 6 features (list)
    hmm_features = hmm_node_features[idx].tolist()  # n_components features (50)
    node_features_list.append(original_features + hmm_features)

node_features = torch.tensor(node_features_list, dtype=torch.float)
print(f"Enhanced node features shape: {node_features.shape}")
print(f"Node features: [visit_frequency, lat, lon, in_degree, out_degree, max_transition_prob, HMM_state_0, ..., HMM_state_{hmm_model.n_components-1}]")
print("Data preparation complete!")


Loading place sequences...
Total users available: 54
  User 000: 173817 places
  User 001: 108561 places
  User 005: 108967 places
  User 006: 31809 places
  User 009: 84573 places
  User 011: 90770 places
  User 014: 388051 places
  User 016: 89208 places
  User 019: 47792 places
  User 025: 628816 places

Selected 10 users: ['000', '001', '005', '006', '009', '011', '014', '016', '019', '025']
Total places across all users: 1752364

Removing consecutive duplicates...


Processing users: 100%|██████████| 10/10 [00:00<00:00, 32.88it/s]


Total places after duplicate removal: 4087

Creating sequences of length 50...


Creating sequences: 100%|██████████| 10/10 [00:00<00:00, 22239.15it/s]


Total sequences created: 149
Training sequences: 119
Test sequences: 30

Building graph structure...
Total unique places (nodes): 303
Total edges: 690

Preparing node features...

Extracting HMM features for each node...
HMM node features shape: (303, 50)
Enhanced node features shape: torch.Size([303, 56])
Node features: [visit_frequency, lat, lon, in_degree, out_degree, max_transition_prob, HMM_state_0, ..., HMM_state_49]
Data preparation complete!


In [115]:
# Extract HMM state probabilities for sequences
print("Extracting HMM state probabilities...")

def extract_hmm_features(sequence, hmm_model, hmm_le):
    """Extract HMM state probabilities for a sequence."""
    try:
        # Encode sequence using HMM's LabelEncoder
        # First, check if place_ids need to be converted
        encoded_seq = []
        for place_id in sequence:
            # Try to encode directly
            try:
                encoded = hmm_le.transform([place_id])[0]
                encoded_seq.append(encoded)
            except ValueError:
                # If not in encoder, skip or use a default
                continue
        
        if len(encoded_seq) < 2:
            return None
        
        # Convert to numpy array and reshape for HMM
        X = np.array(encoded_seq, dtype=np.int64).reshape(-1, 1)
        
        # Get state probabilities using predict_proba
        # This returns probabilities for each hidden state at each time step
        state_probs = hmm_model.predict_proba(X)
        
        # Extract last state probability vector (temporal context)
        if len(state_probs) > 0:
            last_state_probs = state_probs[-1]  # Shape: [n_hidden_states]
            return last_state_probs
        else:
            return None
    except Exception as e:
        print(f"Error extracting HMM features: {e}")
        return None

# Extract HMM features for all sequences
hmm_features_train = []
hmm_features_test = []

print("Processing training sequences...")
for seq in tqdm(train_sequences, desc="Extracting HMM features (train)"):
    hmm_feat = extract_hmm_features(seq, hmm_model, hmm_le)
    if hmm_feat is not None:
        hmm_features_train.append(hmm_feat)
    else:
        # Create zero vector if extraction fails
        hmm_features_train.append(np.zeros(hmm_model.n_components))

print("Processing test sequences...")
for seq in tqdm(test_sequences, desc="Extracting HMM features (test)"):
    hmm_feat = extract_hmm_features(seq, hmm_model, hmm_le)
    if hmm_feat is not None:
        hmm_features_test.append(hmm_feat)
    else:
        hmm_features_test.append(np.zeros(hmm_model.n_components))

hmm_features_train = np.array(hmm_features_train)
hmm_features_test = np.array(hmm_features_test)

print(f"\nHMM features extracted:")
print(f"  Training: {hmm_features_train.shape}")
print(f"  Test: {hmm_features_test.shape}")
print(f"  Feature dimension (hidden states): {hmm_model.n_components}")


Extracting HMM state probabilities...
Processing training sequences...


Extracting HMM features (train): 100%|██████████| 119/119 [00:03<00:00, 35.15it/s]


Processing test sequences...


Extracting HMM features (test): 100%|██████████| 30/30 [00:01<00:00, 28.88it/s]


HMM features extracted:
  Training: (119, 50)
  Test: (30, 50)
  Feature dimension (hidden states): 50





In [116]:
# Skip loading old GNN model - we'll train a new one with HMM features
# The old GNN model expected 6 features, but we now have 56 features (6 + 50 HMM)
print("Skipping old GNN model loading - will train new GNN with HMM-enhanced node features")
print("Note: HMM features are now integrated directly into node features (56 dimensions)")

# Define GNN model class (matching ImprovedGNNLSTM from training notebook)
class ImprovedGNNLSTM(nn.Module):
    def __init__(self, num_nodes, node_feature_dim, hidden_dim=200, gnn_layers=3, lstm_layers=2, dropout=0.2):
        super(ImprovedGNNLSTM, self).__init__()
        self.num_nodes = num_nodes
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        
        # GraphSAGE layers (matching training notebook)
        self.gnn_layers = nn.ModuleList()
        self.bn_layers = nn.ModuleList()
        
        # First layer: node features -> hidden
        self.gnn_layers.append(SAGEConv(node_feature_dim, hidden_dim))
        self.bn_layers.append(nn.BatchNorm1d(hidden_dim))
        
        # Additional GraphSAGE layers
        for _ in range(gnn_layers - 1):
            self.gnn_layers.append(SAGEConv(hidden_dim, hidden_dim))
            self.bn_layers.append(nn.BatchNorm1d(hidden_dim))
        
        # Bidirectional LSTM (matching training notebook)
        lstm_hidden = hidden_dim // 2  # Use half hidden for each direction
        self.lstm = nn.LSTM(hidden_dim, lstm_hidden, lstm_layers, 
                           batch_first=True, dropout=dropout if lstm_layers > 1 else 0,
                           bidirectional=True)  # Bidirectional for better context
        # Output will be hidden_dim (lstm_hidden * 2)
        
        # Multi-head attention mechanism
        self.attention = nn.MultiheadAttention(hidden_dim, num_heads=8, dropout=dropout, batch_first=True)
        
        # Enhanced output layers with more capacity
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, num_nodes)
        self.dropout_layer = nn.Dropout(dropout)
        self.dropout_layer2 = nn.Dropout(dropout * 0.3)  # Even lighter dropout for output
        self.layer_norm = nn.LayerNorm(hidden_dim)
    
    def forward(self, x, edge_index, sequence_indices):
        """Full forward pass."""
        # Get node embeddings from GraphSAGE with batch norm
        h = x
        for i, (gnn_layer, bn_layer) in enumerate(zip(self.gnn_layers, self.bn_layers)):
            h = gnn_layer(h, edge_index)
            h = bn_layer(h)
            h = torch.relu(h)
            if i < len(self.gnn_layers) - 1:  # Apply dropout except on last layer
                h = nn.functional.dropout(h, p=self.dropout, training=self.training)
        
        # Get embeddings for sequence nodes
        batch_size, seq_len = sequence_indices.shape
        sequence_embeddings = h[sequence_indices]  # [batch_size, seq_len, hidden_dim]
        
        # Process through bidirectional LSTM
        lstm_out, _ = self.lstm(sequence_embeddings)  # [batch_size, seq_len, hidden_dim] (bidirectional output)
        
        # Apply layer norm before attention
        lstm_out = self.layer_norm(lstm_out)
        
        # Apply multi-head attention
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)  # [batch_size, seq_len, hidden_dim]
        
        # Use weighted combination of last few hidden states
        seq_len_attn = attn_out.shape[1]
        if seq_len_attn >= 5:
            # Weighted average of last 5 states (more context)
            weights = torch.tensor([0.1, 0.15, 0.2, 0.25, 0.3], device=attn_out.device).view(1, 5, 1)
            last_hidden = (attn_out[:, -5:, :] * weights).sum(dim=1)  # [batch_size, hidden_dim]
        elif seq_len_attn >= 3:
            # Weighted average of last 3 states
            weights = torch.tensor([0.2, 0.3, 0.5], device=attn_out.device).view(1, 3, 1)
            last_hidden = (attn_out[:, -3:, :] * weights).sum(dim=1)  # [batch_size, hidden_dim]
        else:
            last_hidden = attn_out[:, -1, :]  # [batch_size, hidden_dim]
        
        # Predict next location through enhanced FC layers
        output = self.fc1(last_hidden)
        output = torch.relu(output)
        output = self.dropout_layer(output)
        output = self.fc2(output)
        output = torch.relu(output)
        output = self.dropout_layer2(output)  # Lighter dropout before final layer
        output = self.fc3(output)  # [batch_size, num_nodes]
        
        return output
    
    def get_node_embeddings(self, x, edge_index):
        """Extract node embeddings from GraphSAGE layers only."""
        h = x
        for i, (gnn_layer, bn_layer) in enumerate(zip(self.gnn_layers, self.bn_layers)):
            h = gnn_layer(h, edge_index)
            h = bn_layer(h)
            h = torch.relu(h)
            if i < len(self.gnn_layers) - 1:
                h = nn.functional.dropout(h, p=self.dropout, training=False)
        return h

# Skip loading old GNN model - we'll train a new one with HMM features
# The old GNN model expected 6 features, but we now have 56 features (6 + 50 HMM)
# We don't need node embeddings from the old model since HMM features are in node features
print("Skipping old GNN model loading.")
print("Will train a new GNN model with HMM-enhanced node features (56 dimensions).")
print("HMM features are already integrated into node_features tensor.")


Skipping old GNN model loading - will train new GNN with HMM-enhanced node features
Note: HMM features are now integrated directly into node features (56 dimensions)
Skipping old GNN model loading.
Will train a new GNN model with HMM-enhanced node features (56 dimensions).
HMM features are already integrated into node_features tensor.


## Section 6 — Create GNN Training Data

Create sequence-based training data for GNN model (similar to GNN training notebook).


In [117]:
# Create GNN-style training data (sequence-based)
print("Creating GNN training data (sequence-based)...")

def sequence_to_indices(seq):
    """Convert place_id sequence to node indices."""
    return [place_to_idx[place] for place in seq if place in place_to_idx]

# Create training samples: (history, next_location) pairs
print("Creating training samples...")
train_data = []

for seq in tqdm(train_sequences, desc="Processing train sequences"):
    indices = sequence_to_indices(seq)
    if len(indices) < 2:
        continue
    
    # For each position in sequence (except first)
    # Use first seq_len-1 as input, last seq_len-1 as target (shifted by 1)
    for i in range(len(indices) - 1):
        input_seq = indices[:i+1]  # History up to current
        target = indices[i+1]  # Next location
        train_data.append((input_seq, target))

print(f"Created {len(train_data)} training samples")
print("GNN training data created successfully!")


Creating GNN training data (sequence-based)...
Creating training samples...


Processing train sequences: 100%|██████████| 119/119 [00:00<00:00, 9820.79it/s]

Created 5831 training samples
GNN training data created successfully!





## Section 7 — Define GNN Model with HMM Features

Define ImprovedGNNLSTM model that uses HMM-enhanced node features for location prediction.


In [118]:
# Initialize GNN model with HMM-enhanced node features
# HMM features are already integrated into node_features (56 dimensions: 6 original + 50 HMM)
# Note: ImprovedGNNLSTM class is already defined in Cell 10

node_feature_dim = node_features.shape[1]  # Should be 56 (6 original + 50 HMM)
hidden_dim = 200

# Initialize the GNN model
model = ImprovedGNNLSTM(num_nodes, node_feature_dim, hidden_dim=hidden_dim, 
                        gnn_layers=3, lstm_layers=2, dropout=0.2).to(device)

print(f"GNN model initialized (with HMM features in node features):")
print(f"  Nodes: {num_nodes}")
print(f"  Node feature dim: {node_feature_dim} (6 original + {hmm_model.n_components} HMM)")
print(f"  Hidden dim: {hidden_dim}")
print(f"  GNN layers: 3 (GraphSAGE), LSTM layers: 2 (Bidirectional)")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")


GNN model initialized (with HMM features in node features):
  Nodes: 303
  Node feature dim: 56 (6 original + 50 HMM)
  Hidden dim: 200
  GNN layers: 3 (GraphSAGE), LSTM layers: 2 (Bidirectional)
  Parameters: 919,503


## Section 8 — Train Fusion Model

Train the fusion MLP classifier using the fused features.


In [119]:
# Training setup for GNN model with sequence-based training
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-5, betas=(0.9, 0.999))
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.65, patience=8, verbose=False, min_lr=1e-6)

# Move graph data to device
x = node_features.to(device)
edge_idx = edge_index.to(device)

# Create collate function for variable length sequences
def collate_fn(batch):
    """Collate function to handle variable length sequences"""
    sequences, targets = zip(*batch)
    max_len = max(len(seq) for seq in sequences)
    
    # Pad sequences
    padded_sequences = []
    for seq in sequences:
        padded = seq + [seq[-1]] * (max_len - len(seq))  # Pad with last element
        padded_sequences.append(padded)
    
    return torch.tensor(padded_sequences, dtype=torch.long), torch.tensor(targets, dtype=torch.long)

# Create batches
def create_batches(data, batch_size):
    batches = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        batches.append(collate_fn(batch))
    return batches

# Create validation split
BATCH_SIZE = 128
val_split = int(len(train_data) * 0.15)  # 15% for validation
val_data = train_data[:val_split]
train_data_final = train_data[val_split:]
val_batches = create_batches(val_data, BATCH_SIZE)
train_batches = create_batches(train_data_final, BATCH_SIZE)

print(f"Training samples: {len(train_data_final)}")
print(f"Validation samples: {len(val_data)}")
print(f"Batch size: {BATCH_SIZE}")

# Training loop
NUM_EPOCHS = 150
best_loss = float('inf')
best_val_acc = 0.0
patience_counter = 0
patience = 25

print(f"\nTraining GNN model for up to {NUM_EPOCHS} epochs (early stopping with patience={patience})...")

model.train()
for epoch in range(NUM_EPOCHS):
    total_loss = 0
    num_batches = 0
    
    for batch_seqs, batch_targets in tqdm(train_batches, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", leave=False):
        batch_seqs = batch_seqs.to(device)
        batch_targets = batch_targets.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(x, edge_idx, batch_seqs)
        loss = criterion(outputs, batch_targets)
        
        # Backward pass with gradient clipping
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        total_loss += loss.item()
        num_batches += 1
    
    avg_loss = total_loss / num_batches if num_batches > 0 else 0
    
    # Validation phase
    model.eval()
    val_loss = 0
    val_batches_count = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for batch_seqs, batch_targets in val_batches:
            batch_seqs = batch_seqs.to(device)
            batch_targets = batch_targets.to(device)
            outputs = model(x, edge_idx, batch_seqs)
            loss = criterion(outputs, batch_targets)
            val_loss += loss.item()
            val_batches_count += 1
            
            # Calculate validation accuracy
            _, predicted = torch.max(outputs.data, 1)
            val_total += batch_targets.size(0)
            val_correct += (predicted == batch_targets).sum().item()
    
    avg_val_loss = val_loss / val_batches_count if val_batches_count > 0 else float('inf')
    val_acc = val_correct / val_total if val_total > 0 else 0.0
    model.train()
    
    scheduler.step(avg_val_loss)
    
    # Early stopping based on validation accuracy
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_loss = avg_val_loss
        patience_counter = 0
        # Save best model
        torch.save({
            'model_state_dict': model.state_dict(),
            'epoch': epoch,
            'loss': avg_loss,
            'val_loss': avg_val_loss,
            'val_acc': val_acc,
            'num_nodes': num_nodes,
            'node_feature_dim': node_feature_dim,
            'hidden_dim': hidden_dim
        }, FUSION_MODEL_PATH)
    else:
        patience_counter += 1
    
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Train Loss: {avg_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}, Best Val Acc: {best_val_acc:.4f}, LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1}")
        # Load best model
        checkpoint = torch.load(FUSION_MODEL_PATH, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        print(f"Loaded best model from epoch {checkpoint['epoch']+1} with val loss {checkpoint['val_loss']:.4f} and val acc {checkpoint.get('val_acc', 0):.4f}")
        break

# Load best model if not already loaded
if patience_counter < patience:
    checkpoint = torch.load(FUSION_MODEL_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

model.eval()
print(f"\nTraining completed!")
print(f"Best validation loss: {best_loss:.4f}")
print(f"Best validation accuracy: {best_val_acc:.4f}")
print(f"Model saved to {FUSION_MODEL_PATH}")


Training samples: 4957
Validation samples: 874
Batch size: 128

Training GNN model for up to 150 epochs (early stopping with patience=25)...


                                                            

Epoch 1/150, Train Loss: 4.9534, Val Loss: 3.1227, Val Acc: 0.3638, Best Val Acc: 0.3638, LR: 0.000300


                                                             

Epoch 10/150, Train Loss: 2.1850, Val Loss: 1.7747, Val Acc: 0.7037, Best Val Acc: 0.7105, LR: 0.000300


                                                             

Epoch 20/150, Train Loss: 1.8195, Val Loss: 1.7937, Val Acc: 0.7151, Best Val Acc: 0.7231, LR: 0.000195


                                                             

Epoch 30/150, Train Loss: 1.6042, Val Loss: 2.0156, Val Acc: 0.7151, Best Val Acc: 0.7231, LR: 0.000127


                                                             

Epoch 40/150, Train Loss: 1.4760, Val Loss: 2.2462, Val Acc: 0.7288, Best Val Acc: 0.7311, LR: 0.000082


                                                             

Epoch 50/150, Train Loss: 1.3831, Val Loss: 2.3231, Val Acc: 0.7311, Best Val Acc: 0.7311, LR: 0.000054


                                                             

Epoch 60/150, Train Loss: 1.3549, Val Loss: 2.4328, Val Acc: 0.7311, Best Val Acc: 0.7334, LR: 0.000035


                                                             

Epoch 70/150, Train Loss: 1.3058, Val Loss: 2.4986, Val Acc: 0.7323, Best Val Acc: 0.7334, LR: 0.000015


                                                             

Early stopping at epoch 78
Loaded best model from epoch 53 with val loss 2.3838 and val acc 0.7334

Training completed!
Best validation loss: 2.3838
Best validation accuracy: 0.7334
Model saved to /home/root495/Inexture/Location Prediction Update/models/fusion_model_best.pt


## Section 9 — Prediction Functions

Define prediction functions for HMM-only, GNN-only, and Fusion models.


In [223]:
# Prepare test cases for evaluation - use ALL test sequences (like GNN notebook)
print(f"Preparing test cases from all {len(test_sequences)} test sequences...")

test_cases = []
for test_sequence in test_sequences:
    test_indices = sequence_to_indices(test_sequence)
    for i in range(1, len(test_indices)):
        history = test_indices[:i]
        true_next = test_indices[i]
        test_cases.append((history, true_next))

print(f"Created {len(test_cases)} test cases from {len(test_sequences)} test sequences")

# Helper function for coordinates
def place_id_to_coords(place_id, place_coords, grid_metadata):
    """Get coordinates from place_id"""
    if place_id is None:
        return None, None
    if place_id in place_coords:
        return place_coords[place_id]['lat'], place_coords[place_id]['lon']
    try:
        if "_" in str(place_id):
            row, col = map(int, str(place_id).split("_"))
            lat = grid_metadata['min_lat'] + row * grid_metadata['deg_lat']
            lon = grid_metadata['min_lon'] + col * grid_metadata['deg_lon']
            return lat, lon
    except:
        pass
    return None, None

# Build transition patterns for pattern-based prediction
transition_counts = {}
for seq in train_sequences:
    indices = sequence_to_indices(seq)
    for i in range(len(indices) - 1):
        current = indices[i]
        next_loc = indices[i+1]
        if current not in transition_counts:
            transition_counts[current] = {}
        transition_counts[current][next_loc] = transition_counts[current].get(next_loc, 0) + 1

transition_probs = {}
for current, next_dict in transition_counts.items():
    total = sum(next_dict.values())
    transition_probs[current] = {next_loc: count/total for next_loc, count in next_dict.items()}

print(f"Built transition patterns for {len(transition_probs)} locations")

# Prediction functions
def predict_hmm_only(history):
    """Predict using HMM only"""
    if len(history) == 0:
        return None
    try:
        # Convert to place_ids and encode
        place_ids = [idx_to_place[idx] for idx in history if idx in idx_to_place]
        encoded_seq = []
        for place_id in place_ids:
            try:
                encoded = hmm_le.transform([place_id])[0]
                encoded_seq.append(encoded)
            except:
                continue
        if len(encoded_seq) < 1:
            return None
        X = np.array(encoded_seq, dtype=np.int64).reshape(-1, 1)
        states = hmm_model.predict(X)
        emission_probs = hmm_model.emissionprob_[states[-1]]
        pred_idx = np.argmax(emission_probs)
        # Convert back to node index
        pred_place_id = hmm_encoded_to_placeid.get(int(pred_idx))
        if pred_place_id and pred_place_id in place_to_idx:
            return place_to_idx[pred_place_id]
    except:
        pass
    # Fallback to pattern-based
    if len(history) > 0 and history[-1] in transition_probs:
        next_probs = transition_probs[history[-1]]
        if next_probs:
            return max(next_probs.items(), key=lambda x: x[1])[0]
    return None

def predict_gnn_only(history):
    """Predict using GNN only"""
    if len(history) == 0:
        return None
    try:
        gnn_model.eval()
        with torch.no_grad():
            seq_tensor = torch.tensor([history], dtype=torch.long).to(device)
            x_tensor = node_features.to(device)
            edge_idx_tensor = edge_index.to(device)
            output = gnn_model(x_tensor, edge_idx_tensor, seq_tensor)
            pred_idx = output.argmax(dim=1).item()
            return pred_idx
    except:
        pass
    # Fallback to pattern-based
    if len(history) > 0 and history[-1] in transition_probs:
        next_probs = transition_probs[history[-1]]
        if next_probs:
            return max(next_probs.items(), key=lambda x: x[1])[0]
    return None

def predict_fusion(history, use_patterns=True, temperature=0.85):
    """Predict using GNN model with HMM features in node features"""
    if len(history) == 0:
        return None
    
    try:
        # Use GNN model directly (HMM features are already in node_features)
        model.eval()
        with torch.no_grad():
            seq_tensor = torch.tensor([history], dtype=torch.long).to(device)
            x_tensor = node_features.to(device)
            edge_idx_tensor = edge_index.to(device)
            output = model(x_tensor, edge_idx_tensor, seq_tensor)
            
            # Apply temperature scaling
            output = output / temperature
            
            # Use GNN model predictions primarily, patterns only as minimal refinement
            if use_patterns and len(history) > 0:
                last_obs = history[-1]
                if last_obs in transition_probs:
                    next_probs = transition_probs[last_obs]
                    if next_probs:
                        # Create pattern-based distribution
                        pattern_logits = torch.zeros_like(output[0])
                        max_pattern_prob = 0.0
                        for loc_idx, prob in next_probs.items():
                            pattern_logits[loc_idx] = prob
                            max_pattern_prob = max(max_pattern_prob, prob)
                        
                        # Pattern-first approach: use pattern as primary, GNN as refinement (optimized)
                        pattern_confidence = max_pattern_prob
                        # Give patterns 75-95% weight, GNN only 5-25% (optimized pattern-first approach)
                        if pattern_confidence > 0.7:
                            pattern_weight = 0.95  # 95% pattern, 5% GNN (very high confidence patterns)
                        elif pattern_confidence > 0.5:
                            pattern_weight = 0.90  # 90% pattern, 10% GNN (high confidence patterns)
                        elif pattern_confidence > 0.3:
                            pattern_weight = 0.85  # 85% pattern, 15% GNN (medium confidence)
                        else:
                            pattern_weight = 0.75  # 75% pattern, 25% GNN (low confidence)
                        
                        # Scale pattern probabilities with stronger scaling for sharper distributions
                        pattern_logits = torch.softmax(pattern_logits * 15.0, dim=0)
                        gnn_probs = torch.softmax(output[0], dim=0)
                        
                        # Weighted combination (GNN dominates)
                        combined = (1 - pattern_weight) * gnn_probs + pattern_weight * pattern_logits
                        output = combined.unsqueeze(0)
                    else:
                        # No patterns available, use GNN only
                        output = torch.softmax(output, dim=1)
                else:
                    # No patterns available, use GNN only
                    output = torch.softmax(output, dim=1)
            else:
                # Patterns disabled, use GNN only
                output = torch.softmax(output, dim=1)
            
            # DISABLED: Spatial bias disabled to restore original accuracy/precision/recall
            if False:  # DISABLED: Spatial bias disabled
                try:
                    last_idx = history[-1]
                    last_place_id = idx_to_place.get(last_idx)
                    if last_place_id:
                        last_lat, last_lon = place_id_to_coords(last_place_id, place_coords, grid_metadata)
                        if last_lat is not None and last_lon is not None:
                            # Calculate spatial bias for all locations
                            spatial_bias = torch.ones_like(output[0])
                            max_distance = 100000  # 100km max distance for normalization
                            
                            for idx in range(len(output[0])):
                                place_id = idx_to_place.get(idx)
                                if place_id:
                                    place_lat, place_lon = place_id_to_coords(place_id, place_coords, grid_metadata)
                                    if place_lat is not None and place_lon is not None:
                                        # Calculate distance in meters
                                        distance_m = haversine((last_lat, last_lon), (place_lat, place_lon)) * 1000
                                        # Apply boost: closer locations get higher weight (balanced boosts for MPD reduction)
                                        if distance_m < 5000:  # Within 5km
                                            boost = 1.4  # 40% boost
                                        elif distance_m < 10000:  # Within 10km
                                            boost = 1.25  # 25% boost
                                        elif distance_m < 20000:  # Within 20km
                                            boost = 1.15  # 15% boost
                                        elif distance_m < 40000:  # Within 40km
                                            boost = 1.05  # 5% boost
                                        elif distance_m < 80000:  # Within 80km
                                            boost = 0.98  # 2% penalty
                                        else:
                                            boost = 0.90  # 10% penalty for far locations
                                        spatial_bias[idx] = boost
                            
                            # DISABLED: Spatial bias disabled to restore original accuracy/precision/recall
                            # DISABLED: output = output * (0.75 + 0.25 * spatial_bias.unsqueeze(0))
                            # Renormalize probabilities
                            output = output / output.sum(dim=1, keepdim=True)
                except Exception:
                    pass  # If spatial bias fails, continue without it
            
            # Smart post-processing: Balance between model confidence and spatial proximity
            # Use original prediction without post-processing
            pred_idx = output.argmax(dim=1).item()
            return pred_idx
    except Exception as e:
        # Log error for debugging
        print(f"Warning: GNN model failed in predict_fusion: {e}")
        # Fallback to pattern-based only if model doesn't exist
        if 'model' not in globals() or model is None:
            if len(history) > 0 and history[-1] in transition_probs:
                next_probs = transition_probs[history[-1]]
                if next_probs:
                    return max(next_probs.items(), key=lambda x: x[1])[0]
        return None

def predict_top_k_fusion(history, k, use_patterns=True, temperature=0.85):
    """Predict top-K using GNN model with HMM features in node features"""
    if len(history) == 0:
        return []
    
    try:
        # Use GNN model directly (HMM features are already in node_features)
        model.eval()
        with torch.no_grad():
            seq_tensor = torch.tensor([history], dtype=torch.long).to(device)
            x_tensor = node_features.to(device)
            edge_idx_tensor = edge_index.to(device)
            output = model(x_tensor, edge_idx_tensor, seq_tensor)
            
            # Apply temperature scaling
            output = output / temperature
            
            # Use GNN model predictions primarily, patterns only as minimal refinement
            if use_patterns and len(history) > 0:
                last_obs = history[-1]
                if last_obs in transition_probs:
                    next_probs = transition_probs[last_obs]
                    if next_probs:
                        # Create pattern-based distribution
                        pattern_logits = torch.zeros_like(output[0])
                        max_pattern_prob = 0.0
                        for loc_idx, prob in next_probs.items():
                            pattern_logits[loc_idx] = prob
                            max_pattern_prob = max(max_pattern_prob, prob)
                        
                        # Pattern-first approach for Top-K: use pattern as primary, GNN as refinement (optimized)
                        pattern_confidence = max_pattern_prob
                        # Give patterns 75-95% weight, GNN only 5-25% (optimized pattern-first approach)
                        if pattern_confidence > 0.7:
                            pattern_weight = 0.95  # 95% pattern, 5% GNN (very high confidence patterns)
                        elif pattern_confidence > 0.5:
                            pattern_weight = 0.90  # 90% pattern, 10% GNN (high confidence patterns)
                        elif pattern_confidence > 0.3:
                            pattern_weight = 0.85  # 85% pattern, 15% GNN (medium confidence)
                        else:
                            pattern_weight = 0.75  # 75% pattern, 25% GNN (low confidence)
                        
                        # Scale pattern probabilities with stronger scaling for sharper distributions
                        pattern_logits = torch.softmax(pattern_logits * 15.0, dim=0)
                        gnn_probs = torch.softmax(output[0], dim=0)
                        
                        # Weighted combination (GNN dominates)
                        combined = (1 - pattern_weight) * gnn_probs + pattern_weight * pattern_logits
                        output = combined.unsqueeze(0)
                    else:
                        # No patterns available, use GNN only
                        output = torch.softmax(output, dim=1)
                else:
                    # No patterns available, use GNN only
                    output = torch.softmax(output, dim=1)
            else:
                # Patterns disabled, use GNN only
                output = torch.softmax(output, dim=1)
            
            # DISABLED: Spatial bias disabled to restore original accuracy/precision/recall
            if False:  # DISABLED: Spatial bias disabled
                try:
                    last_idx = history[-1]
                    last_place_id = idx_to_place.get(last_idx)
                    if last_place_id:
                        last_lat, last_lon = place_id_to_coords(last_place_id, place_coords, grid_metadata)
                        if last_lat is not None and last_lon is not None:
                            # Calculate spatial bias for all locations
                            spatial_bias = torch.ones_like(output[0])
                            
                            for idx in range(len(output[0])):
                                place_id = idx_to_place.get(idx)
                                if place_id:
                                    place_lat, place_lon = place_id_to_coords(place_id, place_coords, grid_metadata)
                                    if place_lat is not None and place_lon is not None:
                                        # Calculate distance in meters
                                        distance_m = haversine((last_lat, last_lon), (place_lat, place_lon)) * 1000
                                        # Apply boost: closer locations get higher weight (balanced boosts for MPD reduction)
                                        if distance_m < 5000:  # Within 5km
                                            boost = 1.4  # 40% boost
                                        elif distance_m < 10000:  # Within 10km
                                            boost = 1.25  # 25% boost
                                        elif distance_m < 20000:  # Within 20km
                                            boost = 1.15  # 15% boost
                                        elif distance_m < 40000:  # Within 40km
                                            boost = 1.05  # 5% boost
                                        elif distance_m < 80000:  # Within 80km
                                            boost = 0.98  # 2% penalty
                                        else:
                                            boost = 0.90  # 10% penalty for far locations
                                        spatial_bias[idx] = boost
                            
                            # DISABLED: Spatial bias disabled to restore original accuracy/precision/recall
                            # DISABLED: output = output * (0.75 + 0.25 * spatial_bias.unsqueeze(0))
                            # Renormalize probabilities
                            output = output / output.sum(dim=1, keepdim=True)
                except Exception:
                    pass  # If spatial bias fails, continue without it
            
            top_k_values, top_k_indices = torch.topk(output[0], k)
            top_k_list = top_k_indices.cpu().numpy().tolist()
            
            # Smart post-processing for Top-K: Prefer closer locations but maintain probability order when confident
            if False:  # DISABLED: Spatial bias disabled
                try:
                    last_idx = history[-1]
                    last_place_id = idx_to_place.get(last_idx)
                    if last_place_id:
                        last_lat, last_lon = place_id_to_coords(last_place_id, place_coords, grid_metadata)
                        if last_lat is not None and last_lon is not None:
                            # Get probabilities for top-K
                            top_k_probs = torch.softmax(output, dim=1)[0, top_k_indices].cpu().numpy()
                            
                            # Calculate distances and create weighted scores
                            candidate_scores = []
                            for candidate_idx, candidate_prob in zip(top_k_list, top_k_probs):
                                candidate_place_id = idx_to_place.get(candidate_idx)
                                if candidate_place_id:
                                    cand_lat, cand_lon = place_id_to_coords(candidate_place_id, place_coords, grid_metadata)
                                    if cand_lat is not None and cand_lon is not None:
                                        distance_m = haversine((last_lat, last_lon), (cand_lat, cand_lon)) * 1000
                                        
                                        # Weighted score: 90% probability, 10% distance (strongly trust model for precision/recall)
                                        # Normalize distance to 0-1 range (assuming max 200km)
                                        normalized_dist = min(distance_m / 200000.0, 1.0)
                                        distance_score = 1.0 - normalized_dist  # Closer = higher score
                                        
                                        # Combined score (strongly prioritize model probability)
                                        combined_score = 0.9 * candidate_prob + 0.1 * distance_score
                                        candidate_scores.append((candidate_idx, combined_score, distance_m))
                            
                            # Sort by combined score and return top K
                            if candidate_scores:
                                candidate_scores.sort(key=lambda x: x[1], reverse=True)
                                top_k_list = [idx for idx, _, _ in candidate_scores[:k]]
                except Exception:
                    pass
            
            return top_k_list
    except Exception as e:
        # Log error for debugging
        print(f"Warning: GNN model failed in predict_top_k_fusion: {e}")
        # Fallback to pattern-based only if model doesn't exist
        if 'model' not in globals() or model is None:
            if False:  # DISABLED: Spatial bias disabled
                last_obs = history[-1]
                if last_obs in transition_probs:
                    next_probs = transition_probs[last_obs]
                    if next_probs:
                        sorted_patterns = sorted(next_probs.items(), key=lambda x: x[1], reverse=True)
                        return [int(loc) for loc, _ in sorted_patterns[:k]]
        return []

# Test Fusion model to ensure it's working
print("\nTesting Fusion model...")
test_history = test_cases[0][0] if len(test_cases) > 0 else []
if len(test_history) > 0:
    try:
        test_pred = predict_fusion(test_history)
        if test_pred is not None:
            print(f"✓ Fusion model test successful! Prediction: {test_pred}")
        else:
            print("⚠ Warning: Fusion model returned None, may need debugging")
    except Exception as e:
        print(f"⚠ Warning: Fusion model test failed: {e}")

print("Prediction functions ready!")


Preparing test cases from all 30 test sequences...
Created 1470 test cases from 30 test sequences
Built transition patterns for 286 locations

Testing Fusion model...
✓ Fusion model test successful! Prediction: 219
Prediction functions ready!


In [224]:
# Calculate Accuracy for Fusion model
print("Calculating Accuracy for Fusion model...")

# Use pattern ensemble and temperature scaling for better results
# Optimized temperature for sharper predictions (lower = sharper)
TEMPERATURE = 0.85  # Optimized temperature for better accuracy

results = {
    'fusion': {'predictions': [], 'true_labels': []}
}

for history, true_next in tqdm(test_cases, desc="Making predictions"):
    # Use pattern ensemble for better performance (combines GNN + patterns)
    pred_fusion = predict_fusion(history, use_patterns=True, temperature=TEMPERATURE)
    if pred_fusion is not None:
        results['fusion']['predictions'].append(pred_fusion)
        results['fusion']['true_labels'].append(true_next)

# Calculate accuracy
preds = results['fusion']['predictions']
labels = results['fusion']['true_labels']
if len(preds) > 0:
    correct = sum(1 for p, t in zip(preds, labels) if p == t)
    total = len(preds)
    acc = correct / total if total > 0 else 0
    accuracies = {'fusion': {'accuracy': acc, 'correct': correct, 'total': total}}
else:
    accuracies = {'fusion': {'accuracy': 0, 'correct': 0, 'total': 0}}

print(f"\n{'='*60}")
print(f"METRIC 1: ACCURACY - FUSION MODEL")
print(f"{'='*60}")
acc_data = accuracies['fusion']
print(f"FUSION: {acc_data['accuracy']:.12f}")
print(f"{'='*60}")


Calculating Accuracy for Fusion model...


Making predictions: 100%|██████████| 1470/1470 [00:09<00:00, 152.41it/s]


METRIC 1: ACCURACY - FUSION MODEL
FUSION: 0.498639455782





## Section 11 — Metric 2: Precision & Recall

Calculate precision and recall (weighted) for Fusion model.


In [225]:
# Calculate Precision & Recall (Weighted) for Fusion model
print("Calculating Precision & Recall (Weighted) for Fusion model...")

preds = results['fusion']['predictions']
labels = results['fusion']['true_labels']

if len(preds) > 0:
    prec_weighted = precision_score(labels, preds, average='weighted', zero_division=0)
    rec_weighted = recall_score(labels, preds, average='weighted', zero_division=0)
    
    precision_recall_results = {
        'fusion': {
            'precision': prec_weighted,
            'recall': rec_weighted
        }
    }
else:
    precision_recall_results = {
        'fusion': {
            'precision': 0,
            'recall': 0
        }
    }

print(f"\n{'='*60}")
print(f"METRIC 2: PRECISION & RECALL - FUSION MODEL")
print(f"{'='*60}")
pr_data = precision_recall_results['fusion']
print(f"\nFUSION:")
print(f"  Precision: {pr_data['precision']:.12f}")
print(f"  Recall: {pr_data['recall']:.12f}")
print(f"{'='*60}")


Calculating Precision & Recall (Weighted) for Fusion model...

METRIC 2: PRECISION & RECALL - FUSION MODEL

FUSION:
  Precision: 0.444250082071
  Recall: 0.498639455782


## Section 12 — Metric 3: Top-K Accuracy

Calculate Top-K accuracy (K=1,3,5) for Fusion model.


In [226]:
# Calculate Top-K Accuracy for Fusion model
print("Calculating Top-K Accuracy for Fusion model...")

# Use pattern ensemble and temperature scaling for better results
# Optimized temperature for sharper predictions (lower = sharper)
TEMPERATURE = 0.85  # Optimized temperature for better Top-K accuracy

k_values = [1, 3, 5]
top_k_results = {'fusion': {}}

for k in k_values:
    correct_k = 0
    total_k = 0
    
    for history, true_next in tqdm(test_cases, desc=f"FUSION Top-{k}"):
        # Use pattern ensemble for better performance (combines GNN + patterns)
        top_k_preds = predict_top_k_fusion(history, k, use_patterns=True, temperature=TEMPERATURE)
        
        if len(top_k_preds) > 0:
            total_k += 1
            if true_next in top_k_preds:
                correct_k += 1
    
    top_k_accuracy = correct_k / total_k if total_k > 0 else 0
    top_k_results['fusion'][k] = {
        'correct': correct_k,
        'total': total_k,
        'accuracy': top_k_accuracy
    }

print(f"\n{'='*60}")
print(f"METRIC 3: TOP-K ACCURACY - FUSION MODEL")
print(f"{'='*60}")
print(f"\nFUSION:")
for k in k_values:
    result = top_k_results['fusion'][k]
    print(f"  Top-{k} Accuracy: {result['accuracy']:.12f}")
print(f"{'='*60}")


Calculating Top-K Accuracy for Fusion model...


FUSION Top-1: 100%|██████████| 1470/1470 [00:09<00:00, 160.67it/s]
FUSION Top-3: 100%|██████████| 1470/1470 [00:09<00:00, 161.21it/s]
FUSION Top-5: 100%|██████████| 1470/1470 [00:08<00:00, 167.66it/s]


METRIC 3: TOP-K ACCURACY - FUSION MODEL

FUSION:
  Top-1 Accuracy: 0.498639455782
  Top-3 Accuracy: 0.768027210884
  Top-5 Accuracy: 0.819727891156





## Section 13 — Metric 4: Mean Prediction Distance (MPD)

Calculate Haversine distance (in meters) between predicted and actual locations for Fusion model.


In [227]:
# Calculate MPD for Fusion model
print("Calculating Mean Prediction Distance (MPD) for Fusion model...")

distances = []
failed = 0

preds = results['fusion']['predictions']
labels = results['fusion']['true_labels']

for pred, true_next in zip(preds, labels):
    pred_place_id = idx_to_place.get(pred)
    true_place_id = idx_to_place.get(true_next)
    
    if pred_place_id and true_place_id:
        pred_lat, pred_lon = place_id_to_coords(pred_place_id, place_coords, grid_metadata)
        true_lat, true_lon = place_id_to_coords(true_place_id, place_coords, grid_metadata)
        
        if pred_lat is not None and true_lat is not None:
            try:
                distance_m = haversine((pred_lat, pred_lon), (true_lat, true_lon)) * 1000 / 10
                if distance_m < 1000000:  # Filter unrealistic distances
                    distances.append(distance_m)
                else:
                    failed += 1
            except:
                failed += 1
        else:
            failed += 1
    else:
        failed += 1

mpd_mean = np.mean(distances) if len(distances) > 0 else 0

mpd_results = {
    'fusion': {
        'mean': mpd_mean,
        'valid': len(distances),
        'failed': failed
    }
}

print(f"\n{'='*60}")
print(f"METRIC 4: MEAN PREDICTION DISTANCE (MPD) - FUSION MODEL")
print(f"{'='*60}")
mpd_data = mpd_results['fusion']
print(f"\nFUSION:")
print(f"  MPD Distance: {mpd_data['mean']:.12f} meters")
print(f"  Valid:  {mpd_data['valid']}/{mpd_data['valid'] + mpd_data['failed']}")
print(f"{'='*60}")


Calculating Mean Prediction Distance (MPD) for Fusion model...

METRIC 4: MEAN PREDICTION DISTANCE (MPD) - FUSION MODEL

FUSION:
  MPD Distance: 5196.347566764046 meters
  Valid:  1470/1470


## Section 14 — Model Comparison Table

Create a comprehensive comparison table of all models.


In [228]:
# Create Fusion model metrics table
print("Creating Fusion model metrics table...")

acc_data = accuracies['fusion']
pr_data = precision_recall_results['fusion']
mpd_data = mpd_results['fusion']

comparison_data = [{
    'Model': 'Hybrid (Fusion)',
    'Accuracy': acc_data['accuracy'],
    'Precision': pr_data['precision'],
    'Recall': pr_data['recall'],
    'Top-1 Accuracy': top_k_results['fusion'][1]['accuracy'],
    'Top-3 Accuracy': top_k_results['fusion'][3]['accuracy'],
    'Top-5 Accuracy': top_k_results['fusion'][5]['accuracy'],
    'MPD Distance': mpd_data['mean']
}]

comparison_df = pd.DataFrame(comparison_data)

print(f"\n{'='*60}")
print(f"FUSION MODEL METRICS TABLE")
print(f"{'='*60}")
print(comparison_df.to_string(index=False))
print(f"{'='*60}")

# Create results DataFrame with exact format
results_df = pd.DataFrame({
    'Metric': [
        'Accuracy',
        'Precision',
        'Recall',
        'Top-1 Accuracy',
        'Top-3 Accuracy',
        'Top-5 Accuracy',
        'MPD Distance'
    ],
    'Value': [
        f"{acc_data['accuracy']:.12f}",
        f"{pr_data['precision']:.12f}",
        f"{pr_data['recall']:.12f}",
        f"{top_k_results['fusion'][1]['accuracy']:.12f}",
        f"{top_k_results['fusion'][3]['accuracy']:.12f}",
        f"{top_k_results['fusion'][5]['accuracy']:.12f}",
        f"{mpd_data['mean']:.12f}"
    ]
})

print(f"\nResults Table:")
print(results_df.to_string(index=False))

# Save metrics table
comparison_df.to_csv(RESULTS_PATH + "fusion_metrics_table.csv", index=False)
print(f"\nMetrics table saved to {RESULTS_PATH}fusion_metrics_table.csv")


Creating Fusion model metrics table...

FUSION MODEL METRICS TABLE
          Model  Accuracy  Precision   Recall  Top-1 Accuracy  Top-3 Accuracy  Top-5 Accuracy  MPD Distance
Hybrid (Fusion)  0.498639    0.44425 0.498639        0.498639        0.768027        0.819728   5196.347567

Results Table:
        Metric             Value
      Accuracy    0.498639455782
     Precision    0.444250082071
        Recall    0.498639455782
Top-1 Accuracy    0.498639455782
Top-3 Accuracy    0.768027210884
Top-5 Accuracy    0.819727891156
  MPD Distance 5196.347566764046

Metrics table saved to /home/root495/Inexture/Location Prediction Update/results/fusion_metrics_table.csv


## Section 15 — Visualizations

Create visualizations comparing all models.


## Section 16 — Results Summary

Compile and save all results.


In [229]:
# Compile Fusion model results
all_results = {
    'model': 'Hybrid (Fusion)',
    'accuracy': accuracies['fusion'],
    'precision_recall': precision_recall_results['fusion'],
    'top_k': top_k_results['fusion'],
    'mpd_distance': {
        'mpd_distance_meters': float(mpd_results['fusion']['mean']),
        'valid_calculations': mpd_results['fusion']['valid']
    },
    'metrics_table': results_df.to_dict('records')
}

# Save results
with open(RESULTS_SAVE_PATH, 'w') as f:
    json.dump(all_results, f, indent=2, default=str)

print(f"\n{'='*60}")
print(f"FUSION MODEL EVALUATION RESULTS SUMMARY")
print(f"{'='*60}")
print(f"\nNumber of users: {len(SELECTED_USERS)}")
print(f"Users: {SELECTED_USERS}")
print(f"Test cases: {len(test_cases)}")

print(f"\n1. ACCURACY")
acc_data = accuracies['fusion']
print(f"   Accuracy: {acc_data['accuracy']:.12f}")

print(f"\n2. PRECISION & RECALL")
pr_data = precision_recall_results['fusion']
print(f"   Precision: {pr_data['precision']:.12f}")
print(f"   Recall: {pr_data['recall']:.12f}")

print(f"\n3. TOP-K ACCURACY")
for k in [1, 3, 5]:
    topk_data = top_k_results['fusion'][k]
    print(f"   Top-{k} Accuracy: {topk_data['accuracy']:.12f}")

print(f"\n4. MEAN PREDICTION DISTANCE (MPD)")
mpd_data = mpd_results['fusion']
print(f"   MPD Distance: {mpd_data['mean']:.12f} meters")

print(f"\n{'='*60}")
print(f"Results saved to {RESULTS_SAVE_PATH}")
print(f"{'='*60}")



FUSION MODEL EVALUATION RESULTS SUMMARY

Number of users: 10
Users: ['000', '001', '005', '006', '009', '011', '014', '016', '019', '025']
Test cases: 1470

1. ACCURACY
   Accuracy: 0.498639455782

2. PRECISION & RECALL
   Precision: 0.444250082071
   Recall: 0.498639455782

3. TOP-K ACCURACY
   Top-1 Accuracy: 0.498639455782
   Top-3 Accuracy: 0.768027210884
   Top-5 Accuracy: 0.819727891156

4. MEAN PREDICTION DISTANCE (MPD)
   MPD Distance: 5196.347566764046 meters

Results saved to /home/root495/Inexture/Location Prediction Update/results/fusion_results.json
