In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the data
mobility_data = pd.read_parquet('mobility_data.parquet')
print(f"Original mobility_data shape: {mobility_data.shape}")
print(f"Number of handover samples: {mobility_data['handover_needed'].sum()}")

In [None]:
def prepare_lstm_data_robust(df, sequence_length=20, prediction_horizon=5):
    """
    Prepare sequential data for LSTM model with better error handling.
    """
    # Check if DataFrame is empty
    if df.empty:
        raise ValueError("Input DataFrame is empty")
        
    # Select features for model
    features = [
        'x', 'y', 'velocity', 'heading', 'signal_strength', 
        'sinr', 'network_load', 'throughput_mbps'
    ]
    
    # Check if all required columns exist
    missing_cols = [col for col in features if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
    
    # Add time-based features
    df['hour_sin'] = np.sin(2 * np.pi * df['timestamp'].dt.hour / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['timestamp'].dt.hour / 24)
    df['day_of_week_sin'] = np.sin(2 * np.pi * df['timestamp'].dt.dayofweek / 7)
    df['day_of_week_cos'] = np.cos(2 * np.pi * df['timestamp'].dt.dayofweek / 7)
    
    features += ['hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos']
    
    # One-hot encode categorical features
    if 'pattern_type' in df.columns:
        pattern_dummies = pd.get_dummies(df['pattern_type'], prefix='pattern')
        df = pd.concat([df, pattern_dummies], axis=1)
        features += list(pattern_dummies.columns)
    
    if 'device_type' in df.columns:
        device_dummies = pd.get_dummies(df['device_type'], prefix='device')
        df = pd.concat([df, device_dummies], axis=1)
        features += list(device_dummies.columns)
    
    # Create sequences
    X = []
    y = []
    
    # Group by user to maintain trajectory integrity
    for user_id, user_df in df.groupby('user_id'):
        user_df = user_df.sort_values('timestamp')
        
        # Skip users with insufficient data
        if len(user_df) <= sequence_length + prediction_horizon:
            continue
        
        for i in range(len(user_df) - sequence_length - prediction_horizon):
            # Input sequence
            seq = user_df.iloc[i:i+sequence_length][features].values
            
            # Target: will handover be needed within prediction horizon?
            target_window = user_df.iloc[i+sequence_length:i+sequence_length+prediction_horizon]
            handover_needed = target_window['handover_needed'].any()
            
            X.append(seq)
            y.append(1 if handover_needed else 0)
    
    # Check if we have any sequences
    if not X:
        raise ValueError("No valid sequences were created. Check your data and sequence length parameters.")
    
    X_array = np.array(X)
    y_array = np.array(y)
    
    return X_array, y_array

# Prepare sequences
X, y = prepare_lstm_data_robust(mobility_data)
print(f"Successfully created sequences: X shape: {X.shape}, y shape: {y.shape}")

In [None]:
# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")

In [None]:
# Build the LSTM model
def build_lstm_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, input_shape=input_shape, return_sequences=True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
    )
    
    return model

# Create and train the model
model = build_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2]))

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(121)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(122)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Evaluate the model
test_loss, test_accuracy, test_auc, test_precision, test_recall = model.evaluate(X_test, y_test)
print(f"\nTest Results:")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"AUC: {test_auc:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")

# Save the model
model.save('handover_prediction_model')