In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import joblib

# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath('.')))

from preprocessing.preprocess import load_csv, clean_data, preprocess_for_model
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

sns.set_style('darkgrid')
%matplotlib inline

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## Load & Prepare Data

In [None]:
# Load dataset
dataset_path = 'datasets/sample_toy.csv'
df = load_csv(dataset_path)
df = clean_data(df)

# Preprocess
prep = preprocess_for_model(df, label_col='label')

X_train = prep['X_train']
y_train = prep['y_train']
X_val = prep['X_val']
y_val = prep['y_val']
X_test = prep['X_test']
y_test = prep['y_test']

print(f"Data loaded and preprocessed")
print(f"Training: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}")

## TensorFlow Neural Network

In [None]:
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Build neural network
input_dim = X_train.shape[1]

model_tf = keras.Sequential([
    keras.layers.Input(shape=(input_dim,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

print("Model architecture:")
print(model_tf.summary())

In [None]:
# Compile model
model_tf.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("Model compiled")

In [None]:
# Train with early stopping
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

history = model_tf.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stop],
    verbose=1
)

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss
axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Validation Loss')
axes[0].set_title('TensorFlow Model Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid()

# Accuracy
axes[1].plot(history.history['accuracy'], label='Train Accuracy')
axes[1].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[1].set_title('TensorFlow Model Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid()

plt.tight_layout()
plt.show()

In [None]:
# Evaluate TensorFlow model
tf_loss, tf_accuracy = model_tf.evaluate(X_test, y_test, verbose=0)
print(f"TensorFlow Test Loss: {tf_loss:.4f}")
print(f"TensorFlow Test Accuracy: {tf_accuracy:.4f}")

# Get predictions
y_pred_prob_tf = model_tf.predict(X_test).flatten()
y_pred_tf = (y_pred_prob_tf > 0.5).astype(int)

# Calculate ROC AUC
roc_auc_tf = roc_auc_score(y_test, y_pred_prob_tf)
print(f"TensorFlow ROC AUC: {roc_auc_tf:.4f}")

## scikit-learn Model for Comparison

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Train scikit-learn model
model_sk = LogisticRegression(max_iter=200, random_state=42)
model_sk.fit(X_train, y_train)

print("scikit-learn model trained")

# Evaluate
sk_accuracy = accuracy_score(y_test, model_sk.predict(X_test))
print(f"scikit-learn Test Accuracy: {sk_accuracy:.4f}")

# Get predictions
y_pred_prob_sk = model_sk.predict_proba(X_test)[:, 1]
y_pred_sk = model_sk.predict(X_test)

# Calculate ROC AUC
roc_auc_sk = roc_auc_score(y_test, y_pred_prob_sk)
print(f"scikit-learn ROC AUC: {roc_auc_sk:.4f}")

## Model Comparison

In [None]:
# Compare metrics
comparison = pd.DataFrame({
    'Framework': ['TensorFlow', 'scikit-learn'],
    'Test Accuracy': [tf_accuracy, sk_accuracy],
    'ROC AUC': [roc_auc_tf, roc_auc_sk]
})

print("Model Comparison:")
print(comparison.to_string(index=False))

In [None]:
# Visualize comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Accuracy comparison
axes[0].bar(comparison['Framework'], comparison['Test Accuracy'], color=['steelblue', 'coral'])
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Test Accuracy Comparison')
axes[0].set_ylim([0, 1])
for i, v in enumerate(comparison['Test Accuracy']):
    axes[0].text(i, v + 0.02, f'{v:.4f}', ha='center')

# ROC AUC comparison
axes[1].bar(comparison['Framework'], comparison['ROC AUC'], color=['steelblue', 'coral'])
axes[1].set_ylabel('ROC AUC')
axes[1].set_title('ROC AUC Comparison')
axes[1].set_ylim([0, 1])
for i, v in enumerate(comparison['ROC AUC']):
    axes[1].text(i, v + 0.02, f'{v:.4f}', ha='center')

plt.tight_layout()
plt.show()

## Classification Metrics

In [None]:
# TensorFlow classification report
print("TensorFlow Classification Report:")
print(classification_report(y_test, y_pred_tf, target_names=['Class 0', 'Class 1']))

# scikit-learn classification report
print("\nscikit-learn Classification Report:")
print(classification_report(y_test, y_pred_sk, target_names=['Class 0', 'Class 1']))

In [None]:
# Confusion matrices
cm_tf = confusion_matrix(y_test, y_pred_tf)
cm_sk = confusion_matrix(y_test, y_pred_sk)

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# TensorFlow confusion matrix
sns.heatmap(cm_tf, annot=True, fmt='d', cmap='Blues', ax=axes[0], cbar=False)
axes[0].set_title('TensorFlow Confusion Matrix')
axes[0].set_ylabel('True')
axes[0].set_xlabel('Predicted')

# scikit-learn confusion matrix
sns.heatmap(cm_sk, annot=True, fmt='d', cmap='Blues', ax=axes[1], cbar=False)
axes[1].set_title('scikit-learn Confusion Matrix')
axes[1].set_ylabel('True')
axes[1].set_xlabel('Predicted')

plt.tight_layout()
plt.show()

In [None]:
# ROC curves
fpr_tf, tpr_tf, _ = roc_curve(y_test, y_pred_prob_tf)
fpr_sk, tpr_sk, _ = roc_curve(y_test, y_pred_prob_sk)

plt.figure(figsize=(8, 6))
plt.plot(fpr_tf, tpr_tf, label=f'TensorFlow (AUC={roc_auc_tf:.4f})', linewidth=2)
plt.plot(fpr_sk, tpr_sk, label=f'scikit-learn (AUC={roc_auc_sk:.4f})', linewidth=2)
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.grid()
plt.show()

## Save Models

In [None]:
# Create models directory
os.makedirs('models', exist_ok=True)

# Save TensorFlow model
tf_model_path = 'models/tensorflow_model.h5'
model_tf.save(tf_model_path)
print(f"TensorFlow model saved to: {tf_model_path}")

# Save scikit-learn model
sk_model_path = 'models/sklearn_model.pkl'
joblib.dump(model_sk, sk_model_path)
print(f"scikit-learn model saved to: {sk_model_path}")

# Save scaler
scaler_path = 'models/scaler.pkl'
joblib.dump(prep['scaler'], scaler_path)
print(f"Scaler saved to: {scaler_path}")

# Save feature columns
features_path = 'models/feature_columns.pkl'
joblib.dump(prep['feature_columns'], features_path)
print(f"Feature columns saved to: {features_path}")

## Load & Test Saved Models

In [None]:
# Load models
loaded_tf_model = keras.models.load_model(tf_model_path)
loaded_sk_model = joblib.load(sk_model_path)
loaded_scaler = joblib.load(scaler_path)
loaded_features = joblib.load(features_path)

print("Models loaded successfully")

# Test loaded models
loaded_tf_acc = loaded_tf_model.evaluate(X_test, y_test, verbose=0)[1]
loaded_sk_acc = loaded_sk_model.score(X_test, y_test)

print(f"Loaded TensorFlow accuracy: {loaded_tf_acc:.4f}")
print(f"Loaded scikit-learn accuracy: {loaded_sk_acc:.4f}")

## Export to TensorFlow Lite (Mobile)

In [None]:
# Convert TensorFlow model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model_tf)
tflite_model = converter.convert()

# Save TFLite model
tflite_path = 'models/model.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

print(f"TensorFlow Lite model saved to: {tflite_path}")
print(f"File size: {os.path.getsize(tflite_path) / 1024:.2f} KB")

## Summary

In [None]:
print("=" * 50)
print("TRAINING SUMMARY")
print("=" * 50)

print(f"\nBest Model: {'TensorFlow' if tf_accuracy > sk_accuracy else 'scikit-learn'}")
print(f"\nFinal Metrics:")
print(f"  TensorFlow - Accuracy: {tf_accuracy:.4f}, ROC AUC: {roc_auc_tf:.4f}")
print(f"  scikit-learn - Accuracy: {sk_accuracy:.4f}, ROC AUC: {roc_auc_sk:.4f}")

print(f"\nModel Files:")
print(f"  - {tf_model_path}")
print(f"  - {sk_model_path}")
print(f"  - {scaler_path}")
print(f"  - {features_path}")
print(f"  - {tflite_path} (for mobile)")

print(f"\nNext Steps:")
print(f"  1. Deploy model via Flask/FastAPI endpoint")
print(f"  2. Integrate with mobile app using TFLite model")
print(f"  3. Monitor model performance in production")