In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import xgboost as xgb
from lightgbm import LGBMClassifier
import gc

# Model Definition and Training
class TrajectoryClassifier:
    def __init__(self, sequence_length=10, batch_size=32, learning_rate=0.001, epochs=10, validation_split=0.2):  # Further reduced batch size, epochs
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.models = {}
        self.results = {}
        self.validation_split = validation_split

    def create_rnn_model(self, input_shape):
        """Create the GRU model."""
        model = Sequential([
            GRU(4, input_shape=input_shape, return_sequences=True), # Even smaller GRU
            BatchNormalization(),
            Dropout(0.2),
            GRU(2, return_sequences=False),  # Even smaller GRU
            BatchNormalization(),
            Dropout(0.2),
            Dense(4, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        optimizer = Adam(learning_rate=self.learning_rate)
        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
        return model
    
    def prepare_rnn_data(self, df, feature_columns):
        """Prepare sequence data for RNN using a generator to avoid loading everything in memory."""
        
        # Create sequences and labels first
        X = []
        y = []
        for i in range(len(df) - self.sequence_length):
            seq = df[feature_columns].iloc[i:(i + self.sequence_length)].values
            label = df['turn_binary'].iloc[i + self.sequence_length - 1]
            X.append(seq)
            y.append(label)
        X = np.array(X, dtype=np.float32)
        y = np.array(y, dtype=np.int8)

        # Then yield in batches
        num_samples = len(X)
        while True:
            for i in range(0, num_samples, self.batch_size):
                yield X[i:i + self.batch_size], y[i:i + self.batch_size]

    def split_data_for_rnn(self, df, feature_columns):
      """Split data for RNN into training and validation sets."""
      train_df, val_df = train_test_split(df, test_size=self.validation_split, shuffle=False)
      return train_df, val_df
        
    def train_rnn_model(self, train_data, feature_columns, class_weight=None):
        """Train the RNN model."""
        print("\nTraining GRU model...")
        
        # Split data into train and validation sets for the RNN
        train_df, val_df = self.split_data_for_rnn(train_data, feature_columns)

        # Prepare data generators
        train_generator = self.prepare_rnn_data(train_df, feature_columns)
        val_generator = self.prepare_rnn_data(val_df, feature_columns)

        # Calculate number of training and validation steps
        num_train_samples = len(train_df) - self.sequence_length
        steps_per_epoch = num_train_samples // self.batch_size
        if num_train_samples % self.batch_size != 0:
            steps_per_epoch += 1

        num_val_samples = len(val_df) - self.sequence_length
        validation_steps = num_val_samples // self.batch_size
        if num_val_samples % self.batch_size != 0:
            validation_steps += 1
        
        # Build model
        input_shape = (self.sequence_length, len(feature_columns))
        model = self.create_rnn_model(input_shape)
        
        callbacks = [
            EarlyStopping(patience=5, restore_best_weights=True),
            ReduceLROnPlateau(factor=0.2, patience=3, min_lr=0.00001)
        ]
        
        model.fit(
            train_generator,
            steps_per_epoch=steps_per_epoch,
            epochs=self.epochs,
            validation_data=val_generator,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weight,
            verbose=1
        )
        

        self.models['gru'] = model
        del model, callbacks, train_generator, val_generator, train_df, val_df
        gc.collect()

    def train_tree_based_models(self, train_data, feature_columns, class_weight=None):
      """Train tree-based models (XGBoost and LightGBM)."""
      print("\nTraining XGBoost model...")
      xgb_model = xgb.XGBClassifier(
            scale_pos_weight=class_weight[1],
            max_depth=6,
            learning_rate=0.1,
            n_estimators=100,
            use_label_encoder=False,
            eval_metric='logloss'
        )
      X_train = train_data[feature_columns]
      y_train = train_data['turn_binary']
      xgb_model.fit(X_train, y_train)
      self.models['xgb'] = xgb_model
      
      print("\nTraining LightGBM model...")
      lgb_model = LGBMClassifier(
            scale_pos_weight=class_weight[1],
            n_estimators=100,
            learning_rate=0.1,
            max_depth=6
        )
      lgb_model.fit(X_train, y_train)
      self.models['lgb'] = lgb_model

    def train(self, train_data, feature_columns):
      """Train all models."""
      class_weights = compute_class_weight('balanced', classes=np.unique(train_data['turn_binary']), y=train_data['turn_binary'])
      class_weight = {0: class_weights[0], 1: class_weights[1]}
      
      self.train_rnn_model(train_data, feature_columns, class_weight)
      self.train_tree_based_models(train_data, feature_columns, class_weight)
    
    def evaluate(self, test_data, feature_columns):
        """Evaluate trained models."""
        results = {model_name: {'y_true': [], 'y_pred': []} for model_name in self.models}

        for model_name, model in self.models.items():
            if model_name == 'gru':
              X_test, y_test = next(self.prepare_rnn_data(test_data, feature_columns))
              y_pred_probs = model.predict(X_test)
              y_pred = (y_pred_probs > 0.5).astype(int)
            else:
                X_test = test_data[feature_columns]
                y_test = test_data['turn_binary']
                y_pred = model.predict(X_test)
            
            results[model_name]['y_true'].extend(y_test)
            results[model_name]['y_pred'].extend(y_pred)
        
        for model_name in self.models:
            y_true = np.array(results[model_name]['y_true'])
            y_pred = np.array(results[model_name]['y_pred'])

            print(f"\n{model_name} Results:")
            print(classification_report(y_true, y_pred))

            cm = confusion_matrix(y_true, y_pred)
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'{model_name} Confusion Matrix')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            plt.show()
            
            if model_name == 'gru':
              X_test, y_test = next(self.prepare_rnn_data(test_data, feature_columns))
              y_pred_probs = self.models[model_name].predict(X_test)
              fpr, tpr, _ = roc_curve(y_true, y_pred_probs)
              roc_auc = roc_auc_score(y_true, y_pred_probs)

              plt.figure(figsize=(8, 6))
              plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
              plt.plot([0, 1], [0, 1], 'k--')
              plt.xlabel('False Positive Rate')
              plt.ylabel('True Positive Rate')
              plt.title(f'{model_name} ROC Curve')
              plt.legend()
              plt.show()
        
# Main Execution
if __name__ == "__main__":
    # Load engineered data
    train_data = pd.read_csv('train_data_engineered.csv')
    test_data = pd.read_csv('test_data_engineered.csv')
    feature_columns = [col for col in train_data.columns if col not in ['turn_binary', 'turn_label', 'file_id']]
    
    # Model Training and Evaluation
    print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")
    print(tf.test.is_gpu_available())

    classifier = TrajectoryClassifier(sequence_length=10, batch_size=32, learning_rate=0.001, epochs=10) # Reduced batch size and epochs

    classifier.train(train_data, feature_columns)
    classifier.evaluate(test_data, feature_columns)

    # Save the best model
    if 'gru' in classifier.models:
        best_model = classifier.models['gru']
        best_model.save('best_model')
        print("\nBest model saved to 'best_model'")
    else:
        print("\nGRU model was not trained, no best model to save")

Num GPUs Available: 1
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


2024-12-12 20:05:04.372100: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-12-12 20:05:04.372140: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-12-12 20:05:04.372156: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-12-12 20:05:04.372640: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-12 20:05:04.372858: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)



Training GRU model...


2024-12-12 20:05:07.163864: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-12 20:05:07.163896: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


KeyboardInterrupt: 

: 