In [1]:
!pip install tensorflow>=2.13.0 transformers matplotlib seaborn scikit-learn

# Connect to my google drive
from google.colab import drive
drive.mount('/content/drive')

# copy files from google drive to project
!cp '/content/drive/MyDrive/Uni/BD/part-00000.snappy.parquet' '/content/'
!cp '/content/drive/MyDrive/Uni/BD/part-00001.snappy.parquet' '/content/'
!cp '/content/drive/MyDrive/Uni/BD/part-00002.snappy.parquet' '/content/'
!cp '/content/drive/MyDrive/Uni/BD/part-00003.snappy.parquet' '/content/'

Mounted at /content/drive


### Now

In [1]:
"""
Deep Learning Models for Sentiment Analysis - GPU/TPU Optimized for Google Colab
PIPELINE-COMPATIBLE VERSION - Works with existing MICAP preprocessing pipeline
Implements LSTM, CNN, and Transformer models with CUDA/TPU acceleration

Dependencies: tensorflow>=2.13.0, pandas, numpy, pyspark (for data loading)
Environment: Google Colab with GPU/TPU runtime
Pipeline: Reads from preprocessed parquet files created by MICAP pipeline
"""

import os
import numpy as np
import pandas as pd
import logging
import time
from typing import Dict, List, Tuple, Optional, Union
import json
import warnings
from pathlib import Path

# TensorFlow imports with GPU optimization
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks, optimizers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import plot_model
from tensorflow.keras.mixed_precision import LossScaleOptimizer

# Additional ML libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# PySpark imports for reading existing pipeline data
try:
    from pyspark.sql import SparkSession
    from pyspark.sql.functions import col
    PYSPARK_AVAILABLE = True
except ImportError:
    PYSPARK_AVAILABLE = False
    logging.warning("PySpark not available. Will use pandas for data loading.")

tf.config.optimizer.set_jit(True)      # XLA compilation
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

# Configure warnings and logging
warnings.filterwarnings('ignore', category=FutureWarning)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)


class PipelineDataLoader:
    """
    Loads data from MICAP preprocessing pipeline (parquet files)
    Maintains compatibility with existing feature engineering
    """

    def __init__(self):
        """Initialize pipeline data loader."""
        self.spark = None
        if PYSPARK_AVAILABLE:
            self._init_spark_session()

    def _init_spark_session(self):
        """Initialize Spark session for reading parquet files."""
        try:
            self.spark = (SparkSession.builder
                         .appName("ColabDataLoader")
                         .master("local[*]")
                         .config("spark.driver.memory", "64g")
                         .config("spark.sql.shuffle.partitions", "200")
                         .getOrCreate())
            self.spark.sparkContext.setLogLevel("WARN")
            logger.info("Spark session created for pipeline data loading")
        except Exception as e:
            logger.error(f"Failed to create Spark session: {e}")
            self.spark = None

    def load_pipeline_data(self, data_path: str,
                          sample_fraction: float = 1.0,
                          use_spark: bool = True) -> pd.DataFrame:
        """
        Load preprocessed data from MICAP pipeline.

        Args:
            data_path: Path to processed parquet file or directory
            sample_fraction: Fraction of data to use
            use_spark: Whether to use Spark for loading (fallback to pandas)

        Returns:
            pd.DataFrame: Loaded and sampled data
        """
        logger.info(f"Loading pipeline data from: {data_path}")

        if use_spark and self.spark and PYSPARK_AVAILABLE:
            return self._load_with_spark(data_path, sample_fraction)
        else:
            return self._load_with_pandas(data_path, sample_fraction)

    def _load_with_spark(self, data_path: str, sample_fraction: float) -> pd.DataFrame:
        """Load data using Spark (maintains original pipeline compatibility)."""
        logger.info("Loading data with Spark...")

        try:
            # Read parquet file(s)
            df = self.spark.read.parquet(data_path)

            # Sample if requested
            if sample_fraction < 1.0:
                df = df.sample(sample_fraction, seed=42)

            # Convert to pandas for TensorFlow compatibility
            # Use efficient streaming for large datasets
            pandas_df = self._spark_to_pandas_efficient(df)

            logger.info(f"Loaded {len(pandas_df)} records with Spark")
            return pandas_df

        except Exception as e:
            logger.error(f"Spark loading failed: {e}")
            logger.info("Falling back to pandas...")
            return self._load_with_pandas(data_path, sample_fraction)

    def _load_with_pandas(self, data_path: str, sample_fraction: float) -> pd.DataFrame:
        """Load data with pandas (fallback method)."""
        logger.info("Loading data with pandas...")

        try:
            # Try reading as parquet first
            if data_path.endswith('.parquet') or os.path.isdir(data_path):
                df = pd.read_parquet(data_path)
            else:
                # Fallback to CSV
                df = pd.read_csv(data_path)

            # Sample if requested
            if sample_fraction < 1.0:
                df = df.sample(frac=sample_fraction, random_state=42)

            logger.info(f"Loaded {len(df)} records with pandas")
            return df

        except Exception as e:
            logger.error(f"Failed to load data: {e}")
            raise

    def _spark_to_pandas_efficient(self, spark_df, batch_size: int = 50000) -> pd.DataFrame:
        """Efficiently convert Spark DataFrame to pandas using streaming."""
        try:
            # Try direct conversion for smaller datasets
            if spark_df.count() < batch_size:
                return spark_df.toPandas()

            # Stream large datasets in batches
            logger.info("Streaming large dataset in batches...")
            parts = []
            for batch in spark_df.toLocalIterator(batch_size):
                batch_df = pd.DataFrame(list(batch), columns=spark_df.columns)
                parts.append(batch_df)

            return pd.concat(parts, ignore_index=True)

        except Exception as e:
            logger.warning(f"Streaming failed, using direct conversion: {e}")
            return spark_df.toPandas()

    def validate_pipeline_features(self, df: pd.DataFrame) -> bool:
        """
        Validate that the DataFrame contains expected pipeline features.

        Args:
            df: DataFrame to validate

        Returns:
            bool: True if valid pipeline data
        """
        # Expected features from MICAP pipeline
        required_features = [
            'text', 'sentiment', 'text_processed',
            'text_length', 'processed_length', 'token_count'
        ]

        # Optional but expected features
        expected_features = [
            'vader_compound', 'vader_positive', 'vader_negative', 'vader_neutral',
            'emoji_sentiment', 'exclamation_count', 'question_count',
            'uppercase_ratio', 'punctuation_density',
            'hour_sin', 'hour_cos', 'is_weekend'
        ]

        # Check required features
        missing_required = [f for f in required_features if f not in df.columns]
        if missing_required:
            logger.error(f"Missing required pipeline features: {missing_required}")
            return False

        # Log available optional features
        available_optional = [f for f in expected_features if f in df.columns]
        logger.info(f"Available pipeline features: {len(available_optional)}/{len(expected_features)}")

        return True

    def prepare_features_for_training(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, Dict]:
        """
        Prepare features from pipeline data for deep learning training.

        Args:
            df: DataFrame with pipeline features

        Returns:
            Tuple of (text_sequences, labels, feature_info)
        """
        logger.info("Preparing pipeline features for training...")

        # Validate data
        if not self.validate_pipeline_features(df):
            raise ValueError("Invalid pipeline data structure")

        # Use processed text for deep learning (already cleaned by pipeline)
        text_column = 'text_processed' if 'text_processed' in df.columns else 'text'
        texts = df[text_column].fillna('').astype(str)
        labels = df['sentiment'].values

        # Extract numeric features created by pipeline
        numeric_features = []
        feature_names = []

        # Basic text features
        if 'text_length' in df.columns:
            numeric_features.append(df['text_length'].fillna(0))
            feature_names.append('text_length')

        if 'token_count' in df.columns:
            numeric_features.append(df['token_count'].fillna(0))
            feature_names.append('token_count')

        # VADER sentiment features
        vader_features = ['vader_compound', 'vader_positive', 'vader_negative', 'vader_neutral']
        for feature in vader_features:
            if feature in df.columns:
                numeric_features.append(df[feature].fillna(0))
                feature_names.append(feature)

        # Emoji and text statistics
        text_stat_features = ['emoji_sentiment', 'exclamation_count', 'question_count',
                             'uppercase_ratio', 'punctuation_density']
        for feature in text_stat_features:
            if feature in df.columns:
                numeric_features.append(df[feature].fillna(0))
                feature_names.append(feature)

        # Temporal features
        temporal_features = ['hour_sin', 'hour_cos', 'is_weekend']
        for feature in temporal_features:
            if feature in df.columns:
                numeric_features.append(df[feature].fillna(0))
                feature_names.append(feature)

        # Combine numeric features
        if numeric_features:
            numeric_array = np.column_stack(numeric_features)
            logger.info(f"Extracted {len(feature_names)} numeric features: {feature_names}")
        else:
            numeric_array = None
            logger.warning("No numeric features found in pipeline data")

        feature_info = {
            'text_column': text_column,
            'numeric_features': feature_names,
            'numeric_shape': numeric_array.shape if numeric_array is not None else None,
            'text_samples': len(texts),
            'label_distribution': pd.Series(labels).value_counts().to_dict()
        }

        logger.info(f"Feature preparation completed: {feature_info}")

        return texts.values, labels, numeric_array, feature_info


class GPUEnvironmentManager:
    """Manages GPU/TPU environment setup and optimization for Google Colab"""

    def __init__(self):
        self.device_type = self._detect_accelerator()
        self.strategy = self._setup_distribution_strategy()
        self.mixed_precision_enabled = False
        # self._configure_mixed_precision()
        self._log_environment_info()

        # if self.device_type == 'GPU':
            # self._setup_mixed_precision()

    def _detect_accelerator(self) -> str:
        """Detect available accelerator (GPU/TPU/CPU)"""
        try:
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            logger.info("TPU detected and initialized")
            return 'TPU'
        except (ValueError, RuntimeError):
            pass

        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            logger.info(f"GPU detected: {len(gpus)} device(s)")
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            return 'GPU'

        logger.warning("No accelerator detected, using CPU")
        return 'CPU'

    def _setup_distribution_strategy(self):
        """Setup distribution strategy based on hardware"""
        if self.device_type == 'TPU':
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
            strategy = tf.distribute.TPUStrategy(tpu)
            logger.info(f"Using TPU strategy with {strategy.num_replicas_in_sync} replicas")
        elif self.device_type == 'GPU':
            strategy = tf.distribute.MirroredStrategy()
            logger.info(f"Using MirroredStrategy with {strategy.num_replicas_in_sync} replicas")
        else:
            strategy = tf.distribute.get_strategy()
            logger.info("Using default strategy (CPU)")

        return strategy

    def _setup_mixed_precision(self):
        """Setup mixed precision training for faster GPU training"""
        try:
            policy = tf.keras.mixed_precision.Policy('mixed_float16')
            tf.keras.mixed_precision.set_global_policy(policy)
            self.mixed_precision_enabled = True
            logger.info("Mixed precision training enabled (float16)")
        except Exception as e:
            logger.warning(f"Could not enable mixed precision: {e}")

    def _log_environment_info(self):
        """Log environment information"""
        logger.info("=== GPU/TPU Environment Information ===")
        logger.info(f"TensorFlow version: {tf.__version__}")
        logger.info(f"Detected accelerator: {self.device_type}")

        if self.device_type == 'GPU':
            gpus = tf.config.list_physical_devices('GPU')
            for i, gpu in enumerate(gpus):
                logger.info(f"GPU {i}: {gpu}")

        logger.info("=" * 50)

    def get_optimal_batch_size(self, base_batch_size: int = 16384) -> int:
        """Calculate optimal batch size based on hardware"""
        if self.device_type == 'TPU':
            return max(128, base_batch_size * 8)
        elif self.device_type == 'GPU':
            return base_batch_size * max(1, self.strategy.num_replicas_in_sync)
        else:
            return max(16, base_batch_size // 2)


class PipelineOptimizedModel:
    """
    Base class for pipeline-compatible GPU/TPU optimized deep learning models
    Works with features from MICAP preprocessing pipeline
    """

    def __init__(self, env_manager: GPUEnvironmentManager,
                 max_words: int = 10000, max_length: int = 100):
        self.env_manager = env_manager
        self.max_words = max_words
        self.max_length = max_length
        self.model = None
        self.tokenizer = None
        self.history = None
        self.training_time = 0
        self.numeric_features_dim = 0

    def prepare_data(self, texts: np.ndarray, labels: np.ndarray,
                    numeric_features: Optional[np.ndarray] = None,
                    validation_split: float = 0.2) -> Tuple:
        """
        Prepare text and numeric features for training.

        Args:
            texts: Array of text data
            labels: Array of labels
            numeric_features: Optional array of numeric features from pipeline
            validation_split: Validation split ratio

        Returns:
            Tuple of prepared datasets
        """
        logger.info("Preparing data for pipeline-compatible training...")

        # Initialize tokenizer
        self.tokenizer = Tokenizer(
            num_words=self.max_words,
            oov_token='<OOV>',
            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
        )

        # Fit tokenizer
        self.tokenizer.fit_on_texts(texts)

        # Convert to sequences
        sequences = self.tokenizer.texts_to_sequences(texts)
        X_text = pad_sequences(sequences, maxlen=self.max_length,
                              padding='post', truncating='post')

        # Prepare numeric features if available
        X_numeric = None
        if numeric_features is not None:
            X_numeric = numeric_features.astype(np.float32)
            self.numeric_features_dim = X_numeric.shape[1]
            logger.info(f"Using {self.numeric_features_dim} numeric features from pipeline")

        y = labels.astype(np.float32)

        # Split data
        if X_numeric is not None:
            X_text_train, X_text_val, X_num_train, X_num_val, y_train, y_val = train_test_split(
                X_text, X_numeric, y, test_size=validation_split,
                random_state=42, stratify=y
            )

            return (X_text_train, X_num_train, y_train), (X_text_val, X_num_val, y_val)
        else:
            X_text_train, X_text_val, y_train, y_val = train_test_split(
                X_text, y, test_size=validation_split,
                random_state=42, stratify=y
            )

            return (X_text_train, y_train), (X_text_val, y_val)

    def _get_optimizer(self, learning_rate: float = 0.001):
        """Get optimized optimizer"""
        if self.env_manager.device_type == 'TPU':
            optimizer = optimizers.Adam(learning_rate=learning_rate * 2)
        else:
            optimizer = optimizers.Adam(learning_rate=learning_rate)

        if self.env_manager.mixed_precision_enabled:
            optimizer = LossScaleOptimizer(optimizer)

        return optimizer

    def _get_callbacks(self, model_name: str, patience: int = 5):
        """Get training callbacks"""
        return [
            callbacks.EarlyStopping(
                monitor='val_loss', patience=patience,
                restore_best_weights=True, verbose=1
            ),
            callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.5,
                patience=max(2, patience // 2), min_lr=1e-7, verbose=1
            ),
            callbacks.ModelCheckpoint(
                filepath=f'/content/best_{model_name}_pipeline_model.h5',
                monitor='val_loss', save_best_only=True, verbose=1
            )
        ]


class PipelineLSTMModel(PipelineOptimizedModel):
    """LSTM model optimized for pipeline features"""

    def build_model(self, embedding_dim: int = 128, lstm_units: int = 64,
                   dropout_rate: float = 0.3):
        """Build LSTM model with optional numeric features integration"""
        logger.info("Building pipeline-compatible LSTM model...")

        with self.env_manager.strategy.scope():
            # Text input branch
            text_input = layers.Input(shape=(self.max_length,), name='text_input')

            # Embedding layer
            embedding = layers.Embedding(
                input_dim=self.max_words,
                output_dim=embedding_dim,
                input_length=self.max_length,
                mask_zero=True,
                name='embedding'
            )(text_input)

            embedding = layers.SpatialDropout1D(dropout_rate * 0.5)(embedding)

            # Bidirectional LSTM layers
            lstm1 = layers.Bidirectional(
                layers.LSTM(lstm_units, dropout=dropout_rate,
                            return_sequences=True),
                name='bi_lstm_1'
            )(embedding)

            lstm2 = layers.Bidirectional(
                layers.LSTM(lstm_units // 2, dropout=dropout_rate,
                            return_sequences=False),
                name='bi_lstm_2'
            )(lstm1)

            # Text features
            text_features = layers.Dense(64, activation='relu', name='text_dense')(lstm2)
            text_features = layers.BatchNormalization()(text_features)
            text_features = layers.Dropout(dropout_rate)(text_features)

            # Combine with numeric features if available
            if self.numeric_features_dim > 0:
                # Numeric input branch
                numeric_input = layers.Input(shape=(self.numeric_features_dim,), name='numeric_input')
                numeric_features = layers.Dense(32, activation='relu', name='numeric_dense')(numeric_input)
                numeric_features = layers.BatchNormalization()(numeric_features)
                numeric_features = layers.Dropout(dropout_rate * 0.5)(numeric_features)

                # Combine text and numeric features
                combined = layers.Concatenate(name='combine_features')([text_features, numeric_features])
                inputs = [text_input, numeric_input]
            else:
                combined = text_features
                inputs = text_input

            # Final classification layers
            dense = layers.Dense(32, activation='relu', name='final_dense')(combined)
            dense = layers.Dropout(dropout_rate * 0.5)(dense)

            output = layers.Dense(1, activation='sigmoid', name='output')(dense)

            model = models.Model(inputs=inputs, outputs=output, name='PipelineLSTM')

        self.model = model

        # Compile model
        optimizer = self._get_optimizer()
        self.model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
        )

        logger.info("LSTM Model Architecture:")
        self.model.summary(print_fn=logger.info)

        return model

    def train(self, train_data: Tuple, val_data: Tuple, epochs: int = 10):
        """Train the LSTM model"""
        logger.info("Training pipeline LSTM model...")

        callbacks_list = self._get_callbacks('LSTM')

        start_time = time.time()

        # Prepare training data
        if self.numeric_features_dim > 0:
            X_text_train, X_num_train, y_train = train_data
            X_text_val, X_num_val, y_val = val_data

            train_inputs = [X_text_train, X_num_train]
            val_inputs = [X_text_val, X_num_val]
        else:
            X_text_train, y_train = train_data
            X_text_val, y_val = val_data

            train_inputs = X_text_train
            val_inputs = X_text_val

        batch_size = self.env_manager.get_optimal_batch_size()

        # with self.env_manager.strategy.scope():
        #     self.history = self.model.fit(
        #         train_inputs, y_train,
        #         batch_size=batch_size,
        #         epochs=epochs,
        #         validation_data=(val_inputs, y_val),
        #         callbacks=callbacks_list,
        #         verbose=1
        #     )
        def make_ds(x_text, x_num, y, shuffle=True):
            inputs = (x_text, x_num) if x_num is not None else x_text
            ds = tf.data.Dataset.from_tensor_slices((inputs, y))
            if shuffle:
                ds = ds.shuffle(100_000)
            return ds.batch(batch_size, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

        train_ds = make_ds(*train_data, shuffle=True)
        val_ds   = make_ds(*val_data,   shuffle=False)

        with self.env_manager.strategy.scope():
            self.history = self.model.fit(
                train_ds,
                epochs=epochs,
                validation_data=val_ds,
                callbacks=callbacks_list,
                verbose=1
            )

        self.training_time = time.time() - start_time
        logger.info(f"Training completed in {self.training_time:.2f} seconds")

        return self.history.history


def evaluate_pipeline_models_gpu(data_path: str,
                                sample_fraction: float = 1.0,
                                epochs: int = 10) -> Dict:
    """
    Evaluate models using MICAP pipeline data with GPU/TPU optimization.

    Args:
        data_path: Path to processed pipeline data (parquet)
        sample_fraction: Fraction of data to use
        epochs: Number of training epochs

    Returns:
        Dict: Evaluation results
    """
    logger.info("Starting pipeline-compatible GPU evaluation...")

    # Initialize components
    env_manager = GPUEnvironmentManager()
    data_loader = PipelineDataLoader()

    # Load pipeline data
    df = data_loader.load_pipeline_data(data_path, sample_fraction)

    # Prepare features
    texts, labels, numeric_features, feature_info = data_loader.prepare_features_for_training(df)

    logger.info(f"Loaded pipeline data: {feature_info}")

    # Initialize models
    models_to_evaluate = [
        ('Pipeline_LSTM', PipelineLSTMModel)
    ]

    results = {}

    for model_name, ModelClass in models_to_evaluate:
        logger.info(f"\n{'='*60}")
        logger.info(f"Training {model_name}")
        logger.info(f"{'='*60}")

        try:
            # Initialize model
            model = ModelClass(env_manager)

            # Prepare data
            train_data, val_data = model.prepare_data(
                texts, labels, numeric_features
            )

            # Build model
            model.build_model()

            # Train model
            history = model.train(train_data, val_data, epochs=epochs)

            # Evaluate
            if model.numeric_features_dim > 0:
                val_inputs = [val_data[0], val_data[1]]
                val_labels = val_data[2]
            else:
                val_inputs = val_data[0]
                val_labels = val_data[1]

            eval_results = model.model.evaluate(val_inputs, val_labels, verbose=0)

            metrics = {}
            for i, metric_name in enumerate(model.model.metrics_names):
                metrics[metric_name] = eval_results[i]
            metrics['training_time'] = model.training_time

            results[model_name] = {
                'metrics': metrics,
                'history': history,
                'feature_info': feature_info
            }

            # Save model
            model.model.save(f'/content/{model_name.lower()}_model.h5')
            logger.info(f"Model saved as {model_name.lower()}_model.h5")

        except Exception as e:
            logger.error(f"Failed to train {model_name}: {e}")
            results[model_name] = {'error': str(e)}

    # Save results
    with open('pipeline_model_results.json', 'w') as f:
        json.dump(results, f, indent=2, default=str)

    logger.info("Pipeline evaluation completed!")
    return results


# Usage examples for Google Colab
if __name__ == "__main__":
    """
    Example usage in Google Colab:

    # Option 1: Use your existing pipeline data (RECOMMENDED)
    results = evaluate_pipeline_models_gpu(
        data_path='/content/pipeline_features.parquet',  # Upload your processed parquet file
        sample_fraction=1.0,
        epochs=10
    )

    # Option 2: If you have PySpark setup in Colab (advanced)
    # First run your preprocessing pipeline to create the parquet file
    # Then use the above approach
    """

    results = evaluate_pipeline_models_gpu(
        'part-00000.snappy.parquet',
        sample_fraction=1.0,
        epochs=5
    )
    results = evaluate_pipeline_models_gpu(
        'part-00001.snappy.parquet',
        sample_fraction=1.0,
        epochs=5
    )
    results = evaluate_pipeline_models_gpu(
        'part-00002.snappy.parquet',
        sample_fraction=1.0,
        epochs=5
    )
    results = evaluate_pipeline_models_gpu(
        'part-00003.snappy.parquet',
        sample_fraction=1.0,
        epochs=5
    )

    logger.info("Pipeline-compatible GPU deep learning models ready!")
    logger.info("Upload your 'pipeline_features.parquet' file to /content/ and run evaluation.")



Epoch 1/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7363 - auc: 0.0000e+00 - loss: 0.5106
Epoch 1: val_loss improved from inf to 0.05236, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.7414 - auc: 0.0000e+00 - loss: 0.5045 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0524 - learning_rate: 0.0010
Epoch 2/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9992 - auc: 0.0000e+00 - loss: 0.0520
Epoch 2: val_loss improved from 0.05236 to 0.04999, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.9992 - auc: 0.0000e+00 - loss: 0.0515 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0500 - learning_rate: 0.0010
Epoch 3/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0138
Epoch 3: val_loss improved from 0.04999 to 0.03539, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0137 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0354 - learning_rate: 0.0010
Epoch 4/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0071
Epoch 4: val_loss improved from 0.03539 to 0.01963, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0071 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0196 - learning_rate: 0.0010
Epoch 5/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0044
Epoch 5: val_loss improved from 0.01963 to 0.01015, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0044 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0102 - learning_rate: 0.0010
Restoring model weights from the end of the best epoch: 5.




Epoch 1/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8963 - auc: 0.5173 - loss: 0.3879
Epoch 1: val_loss improved from inf to 0.21792, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.8979 - auc: 0.5180 - loss: 0.3839 - val_accuracy: 0.9701 - val_auc: 0.4735 - val_loss: 0.2179 - learning_rate: 0.0010
Epoch 2/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9695 - auc: 0.6665 - loss: 0.1387
Epoch 2: val_loss improved from 0.21792 to 0.19818, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.9695 - auc: 0.6682 - loss: 0.1384 - val_accuracy: 0.9701 - val_auc: 0.5352 - val_loss: 0.1982 - learning_rate: 0.0010
Epoch 3/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9701 - auc: 0.8127 - loss: 0.1136
Epoch 3: val_loss improved from 0.19818 to 0.17331, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9701 - auc: 0.8132 - loss: 0.1135 - val_accuracy: 0.9701 - val_auc: 0.6190 - val_loss: 0.1733 - learning_rate: 0.0010
Epoch 4/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9715 - auc: 0.8696 - loss: 0.0996
Epoch 4: val_loss improved from 0.17331 to 0.15166, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9715 - auc: 0.8699 - loss: 0.0996 - val_accuracy: 0.9705 - val_auc: 0.7047 - val_loss: 0.1517 - learning_rate: 0.0010
Epoch 5/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9723 - auc: 0.8978 - loss: 0.0914
Epoch 5: val_loss improved from 0.15166 to 0.13632, saving model to /content/best_LSTM_pipeline_model.h5




[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.9724 - auc: 0.8979 - loss: 0.0913 - val_accuracy: 0.9713 - val_auc: 0.7863 - val_loss: 0.1363 - learning_rate: 0.0010
Restoring model weights from the end of the best epoch: 5.




Epoch 1/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8850 - auc: 0.0000e+00 - loss: 0.4036
Epoch 1: val_loss improved from inf to 0.04410, saving model to /content/best_LSTM_pipeline_model.h5




[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.8875 - auc: 0.0000e+00 - loss: 0.3980 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0441 - learning_rate: 0.0010
Epoch 2/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9995 - auc: 0.0000e+00 - loss: 0.0245
Epoch 2: val_loss did not improve from 0.04410
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.9995 - auc: 0.0000e+00 - loss: 0.0242 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0603 - learning_rate: 0.0010
Epoch 3/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0055
Epoch 3: val_loss improved from 0.04410 to 0.04393, saving model to /content/best_LSTM_pipeline_model.h5




[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0055 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0439 - learning_rate: 0.0010
Epoch 4/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0027
Epoch 4: val_loss improved from 0.04393 to 0.02511, saving model to /content/best_LSTM_pipeline_model.h5




[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0027 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0251 - learning_rate: 0.0010
Epoch 5/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0017
Epoch 5: val_loss improved from 0.02511 to 0.01308, saving model to /content/best_LSTM_pipeline_model.h5




[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0017 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0131 - learning_rate: 0.0010
Restoring model weights from the end of the best epoch: 5.




Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6789 - auc: 0.0000e+00 - loss: 0.5618
Epoch 1: val_loss improved from inf to 0.17322, saving model to /content/best_LSTM_pipeline_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2s/step - accuracy: 0.6851 - auc: 0.0000e+00 - loss: 0.5551 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.1732 - learning_rate: 0.0010
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9966 - auc: 0.0000e+00 - loss: 0.0992
Epoch 2: val_loss improved from 0.17322 to 0.09303, saving model to /content/best_LSTM_pipeline_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 0.9967 - auc: 0.0000e+00 - loss: 0.0981 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0930 - learning_rate: 0.0010
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9995 - auc: 0.0000e+00 - loss: 0.0233
Epoch 3: val_loss improved from 0.09303 to 0.07109, saving model to /content/best_LSTM_pipeline_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 0.9995 - auc: 0.0000e+00 - loss: 0.0232 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0711 - learning_rate: 0.0010
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0099
Epoch 4: val_loss improved from 0.07109 to 0.04289, saving model to /content/best_LSTM_pipeline_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 0.9999 - auc: 0.0000e+00 - loss: 0.0099 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0429 - learning_rate: 0.0010
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0057
Epoch 5: val_loss improved from 0.04289 to 0.02277, saving model to /content/best_LSTM_pipeline_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.0057 - val_accuracy: 1.0000 - val_auc: 0.0000e+00 - val_loss: 0.0228 - learning_rate: 0.0010
Restoring model weights from the end of the best epoch: 5.


