In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# --- 1. Data Generation and Preprocessing ---
# In a real-world scenario, you would load your own dataset here.
# For demonstration purposes, we'll generate synthetic time-series data.

def generate_synthetic_data(n_customers=1000, n_timesteps=12, n_features=5):
    """
    Generates synthetic customer data over time.
    Features could represent things like:
    - monthly charges
    - customer service calls
    - data usage
    - subscription changes
    - tenure
    """
    X = np.random.rand(n_customers, n_timesteps, n_features)
    y = np.random.randint(0, 2, n_customers) # 0 for no churn, 1 for churn

    # Introduce some correlation for churning customers to make it less random
    for i in range(n_customers):
        if y[i] == 1:
            # Churning customers might show increasing service calls and decreasing usage
            X[i, :, 1] += np.linspace(0, 1, n_timesteps) # Increasing calls
            X[i, :, 2] -= np.linspace(0, 0.5, n_timesteps) # Decreasing usage
            # Make the last few timesteps more indicative of churn
            if np.random.rand() > 0.5:
                 X[i, -3:, 0] *= 1.5 # Spike in charges


    # Reshape for scaling
    X_reshaped = X.reshape(-1, n_features)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_reshaped)
    X = X_scaled.reshape(n_customers, n_timesteps, n_features)

    return X, y

print("Generating synthetic customer churn data...")
X, y = generate_synthetic_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")


# --- 2. LSTM Model for Churn Prediction ---

def build_lstm_model(input_shape):
    """
    Builds a simple LSTM model for sequence classification.
    """
    inputs = Input(shape=input_shape)
    # Using return_sequences=False as we only need the final output for classification
    lstm_layer = LSTM(64, activation='tanh')(inputs)
    dropout_layer = Dropout(0.3)(lstm_layer)
    dense_layer = Dense(32, activation='relu')(dropout_layer)
    outputs = Dense(1, activation='sigmoid')(dense_layer)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

print("\n--- Building and Training LSTM Model ---")
input_shape = (X_train.shape[1], X_train.shape[2])
lstm_model = build_lstm_model(input_shape)
lstm_model.summary()

print("\nTraining LSTM model...")
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

print("\nEvaluating LSTM model...")
loss, accuracy = lstm_model.evaluate(X_test, y_test, verbose=0)
print(f"LSTM Test Accuracy: {accuracy:.4f}")


# --- 3. Transformer Model for Churn Prediction ---

# We need a Transformer Encoder block for our classification task.
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    """
    Creates a single Transformer Encoder block.
    """
    # Multi-Head Attention
    x = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x + inputs)

    # Feed Forward Network
    ffn = tf.keras.Sequential(
        [Dense(ff_dim, activation="relu"), Dense(inputs.shape[-1]),]
    )
    x = ffn(x)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    return x

def build_transformer_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    """
    Builds a Transformer-based model for sequence classification.
    """
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    # Pooling layer to aggregate the outputs from the transformer blocks
    x = GlobalAveragePooling1D(data_format="channels_last")(x)

    # MLP for final classification
    for dim in mlp_units:
        x = Dense(dim, activation="relu")(x)
        x = Dropout(mlp_dropout)(x)
    outputs = Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

print("\n--- Building and Training Transformer Model ---")
input_shape = (X_train.shape[1], X_train.shape[2])

transformer_model = build_transformer_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)
transformer_model.summary()

print("\nTraining Transformer model...")
transformer_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

print("\nEvaluating Transformer model...")
loss, accuracy = transformer_model.evaluate(X_test, y_test, verbose=0)
print(f"Transformer Test Accuracy: {accuracy:.4f}")


# --- 4. Integrated Approach for Explainability (Conceptual) ---
# To measure explainability, you would now use a library like SHAP or LIME.
# Here is a conceptual example of how you might use SHAP with the trained models.

# You would need to install shap: pip install shap
# import shap

# # Create an explainer object. For deep learning models, DeepExplainer is often used.
# # It's recommended to use a subset of the training data as the background distribution.
# background_data = X_train[np.random.choice(X_train.shape[0], 100, replace=False)]

# # --- SHAP for LSTM ---
# print("\n--- Generating SHAP explanations for LSTM ---")
# lstm_explainer = shap.DeepExplainer(lstm_model, background_data)
# lstm_shap_values = lstm_explainer.shap_values(X_test[:5]) # Explain first 5 test instances

# # The output `lstm_shap_values` is an array. You can visualize the explanations.
# # For a time-series model, this will show which features at which timesteps
# # contributed most to the prediction.
# # shap.summary_plot(lstm_shap_values[0], feature_names=[f'F{i+1}' for i in range(X_test.shape[2])], plot_type="bar")


# # --- SHAP for Transformer ---
# print("\n--- Generating SHAP explanations for Transformer ---")
# transformer_explainer = shap.DeepExplainer(transformer_model, background_data)
# transformer_shap_values = transformer_explainer.shap_values(X_test[:5])

# print("\nExplainability analysis would follow, comparing the outputs from SHAP for both models.")
print("\nCode execution finished. You can now use libraries like SHAP or LIME to analyze these trained models.")



Generating synthetic customer churn data...
Training data shape: (800, 12, 5)
Test data shape: (200, 12, 5)

--- Building and Training LSTM Model ---



Training LSTM model...
Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.7528 - loss: 0.6111 - val_accuracy: 1.0000 - val_loss: 0.2881
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.9981 - loss: 0.1817 - val_accuracy: 1.0000 - val_loss: 0.0444
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9994 - loss: 0.0266 - val_accuracy: 1.0000 - val_loss: 0.0088
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9951 - loss: 0.0127 - val_accuracy: 1.0000 - val_loss: 0.0030
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 0.0028 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accuracy: 1.0000 - val_loss: 8.0548e-04
Epoch 


Training Transformer model...
Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 182ms/step - accuracy: 0.4834 - loss: 0.6993 - val_accuracy: 0.4250 - val_loss: 0.6951
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 92ms/step - accuracy: 0.4665 - loss: 0.7020 - val_accuracy: 0.4250 - val_loss: 0.6935
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 93ms/step - accuracy: 0.5089 - loss: 0.6975 - val_accuracy: 0.5750 - val_loss: 0.6905
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 90ms/step - accuracy: 0.5049 - loss: 0.6959 - val_accuracy: 0.9000 - val_loss: 0.6203
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 132ms/step - accuracy: 0.8434 - loss: 0.5944 - val_accuracy: 1.0000 - val_loss: 0.2194
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 95ms/step - accuracy: 0.9902 - loss: 0.2410 - val_accuracy: 1.0000 - val_loss: 0.0520
