# ==============================================================
#  TENSORFLOW LITE CONVERSION NOTEBOOK
#  Konversi Model XGBoost ke TFLite untuk Android
# ==============================================================

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, f1_score
import json
import os
import warnings
warnings.filterwarnings('ignore')

# Set random seeds untuk reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

## STEP 1: LOAD EXISTING FEATURES AND DATA

In [None]:
print("\n=== STEP 1: LOADING EXISTING FEATURES ===")

# Load combined features dari notebook sebelumnya
try:
    # Sesuaikan path jika perlu
    X_train_combined, X_test_combined, y_train, y_test = joblib.load("features_combined.pkl")
    print(f"✅ Features loaded successfully")
    print(f"Train shape: {X_train_combined.shape}")
    print(f"Test shape: {X_test_combined.shape}")
    print(f"Label columns: {y_train.columns.tolist()}")
except FileNotFoundError:
    print("⚠️ features_combined.pkl not found. Please run the preprocessing first.")
    print("Make sure you have run the original notebook to generate this file.")

In [None]:
# Load balanced data jika available
try:
    X_bal, y_bal = joblib.load("balanced_data_fixed.pkl")
    print(f"✅ Balanced data loaded: {X_bal.shape}")
    use_balanced = True
except FileNotFoundError:
    print("⚠️ Balanced data not found, using original data")
    X_bal, y_bal = X_train_combined, y_train
    use_balanced = False

# Convert sparse matrices to dense jika diperlukan
from scipy.sparse import issparse
if issparse(X_bal):
    print("Converting sparse matrix to dense...")
    X_bal_dense = X_bal.toarray().astype(np.float32)
    X_test_dense = X_test_combined.toarray().astype(np.float32)
else:
    X_bal_dense = X_bal.astype(np.float32)
    X_test_dense = X_test_combined.astype(np.float32)

print(f"Dense train shape: {X_bal_dense.shape}")
print(f"Dense test shape: {X_test_dense.shape}")

## STEP 2: DEFINE NEURAL NETWORK ARCHITECTURE

In [None]:
print("\n=== STEP 2: BUILDING NEURAL NETWORK MODEL ===")

def build_healthbot_model(input_dim, num_labels):
    """
    Build neural network model untuk multilabel classification
    """
    model = keras.Sequential([
        # Input layer
        layers.Input(shape=(input_dim,), dtype=tf.float32),

        # Hidden layers dengan dropout untuk regularisasi
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.2),

        # Output layer untuk multilabel (sigmoid)
        layers.Dense(num_labels, activation='sigmoid')
    ])

    # Compile dengan optimizer dan loss untuk multilabel
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

# Build model
INPUT_DIM = X_bal_dense.shape[1]
NUM_LABELS = y_bal.shape[1]

model = build_healthbot_model(INPUT_DIM, NUM_LABELS)
model.summary()

## STEP 3: TRAIN THE MODEL

In [None]:
print("\n=== STEP 3: TRAINING THE MODEL ===")

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# Train
EPOCHS = 100
BATCH_SIZE = 32

history = model.fit(
    X_bal_dense,
    y_bal.values,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

## STEP 4: EVALUATE THE MODEL

In [None]:
print("\n=== STEP 4: EVALUATING THE MODEL ===")

# Load best model
model.load_weights('best_model.h5')

# Predictions
y_pred_proba = model.predict(X_test_dense)
y_pred = (y_pred_proba > 0.5).astype(int)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# F1 Scores
f1_macro = f1_score(y_test, y_pred, average='macro', zero_division=0)
f1_micro = f1_score(y_test, y_pred, average='micro', zero_division=0)

print(f"\nF1 Scores:")
print(f"Macro F1: {f1_macro:.4f}")
print(f"Micro F1: {f1_micro:.4f}")

# Compare dengan XGBoost baseline
target_f1 = 0.95
if f1_macro >= target_f1:
    print(f"✅ Target achieved! F1-score ({f1_macro:.4f}) >= {target_f1}")
else:
    print(f"⚠️ F1-score ({f1_macro:.4f}) < {target_f1}. Consider tuning.")

## STEP 5: EXPORT TO SAVEDMODEL FORMAT

In [None]:
print("\n=== STEP 5: EXPORTING TO SAVEDMODEL ===")

# Create directory
saved_model_dir = 'healthbot_classifier'
if os.path.exists(saved_model_dir):
    import shutil
    shutil.rmtree(saved_model_dir)
os.makedirs(saved_model_dir)

# Save model
tf.saved_model.save(model, saved_model_dir)
print(f"✅ Model saved to {saved_model_dir}")

# Save label mapping
label_mapping = {
    'labels': y_test.columns.tolist(),
    'index_to_label': {i: label for i, label in enumerate(y_test.columns)},
    'label_to_index': {label: i for i, label in enumerate(y_test.columns)}
}

with open('label_mapping.json', 'w') as f:
    json.dump(label_mapping, f, indent=2)
print("✅ Label mapping saved to label_mapping.json")

## STEP 6: CONVERT TO TENSORFLOW LITE

In [None]:
print("\n=== STEP 6: CONVERTING TO TENSORFLOW LITE ===")

# Load SavedModel
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

# Optimization options
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Representative dataset untuk quantization
def representative_dataset():
    for i in range(min(100, len(X_bal_dense))):
        yield [X_bal_dense[i:i+1].astype(np.float32)]

converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]

# Convert
try:
    tflite_model = converter.convert()

    # Save TFLite model
    tflite_path = 'healthbot_classifier.tflite'
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)

    model_size = os.path.getsize(tflite_path) / (1024 * 1024)  # MB
    print(f"✅ TFLite model saved to {tflite_path}")
    print(f"Model size: {model_size:.2f} MB")

except Exception as e:
    print(f"❌ Error converting to TFLite: {e}")
    print("Trying without quantization...")

    # Fallback tanpa quantization
    converter.optimizations = []
    tflite_model = converter.convert()

    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)

    model_size = os.path.getsize(tflite_path) / (1024 * 1024)
    print(f"✅ TFLite model saved (no quantization): {model_size:.2f} MB")

## STEP 7: CREATE PREPROCESSING ARTIFACTS

In [None]:
print("\n=== STEP 7: CREATING PREPROCESSING ARTIFACTS ===")

# Load preprocessing components dari notebook asli
try:
    vectorizer = joblib.load("vectorizer_tfidf.pkl")
    scaler = joblib.load("scaler_embed.pkl")

    # Save vocabulary info untuk Android
    vocab_info = {
        'tfidf_vocab_size': len(vectorizer.vocabulary_),
        'tfidf_max_features': vectorizer.max_features,
        'tfidf_ngram_range': vectorizer.ngram_range,
        'embedding_dim': 300,
        'total_features': INPUT_DIM
    }

    with open('preprocessing_params.json', 'w') as f:
        json.dump(vocab_info, f, indent=2)

    print("✅ Preprocessing params saved to preprocessing_params.json")

except Exception as e:
    print(f"⚠️ Could not save preprocessing params: {e}")
    print("You may need to generate these from the original notebook")

## STEP 8: TEST TFLITE MODEL

In [None]:
print("\n=== STEP 8: TESTING TFLITE MODEL ===")

def test_tflite_model(tflite_path, test_input):
    """
    Test TFLite model inference
    """
    # Load TFLite model
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()

    # Get input/output details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Prepare input
    input_data = test_input.astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run inference
    interpreter.invoke()

    # Get output
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return output_data

# Test dengan 5 samples
print("\nTesting TFLite model with sample inputs:")
test_samples = X_test_dense[:5]
tflite_predictions = test_tflite_model('healthbot_classifier.tflite', test_samples)

for i in range(5):
    original_pred = y_pred_proba[i]
    tflite_pred = tflite_predictions[i]

    # Calculate difference
    diff = np.mean(np.abs(original_pred - tflite_pred))
    print(f"Sample {i}: Avg difference = {diff:.6f}")

## STEP 9: PREPARE FILES FOR ANDROID

In [None]:
print("\n=== STEP 9: PREPARING ANDROID ASSETS ===")

# Create android_assets directory
assets_dir = 'android_assets'
if os.path.exists(assets_dir):
    import shutil
    shutil.rmtree(assets_dir)
os.makedirs(assets_dir)

# Copy files ke android_assets
import shutil
files_to_copy = [
    ('healthbot_classifier.tflite', 'healthbot_classifier.tflite'),
    ('label_mapping.json', 'label_mapping.json'),
    ('preprocessing_params.json', 'preprocessing_params.json')
]

for src, dst in files_to_copy:
    if os.path.exists(src):
        shutil.copy2(src, os.path.join(assets_dir, dst))
        print(f"✅ Copied {src} to {assets_dir}/{dst}")
    else:
        print(f"⚠️ File not found: {src}")

print(f"\n✅ Files prepared in {assets_dir}/ directory")

## FINAL SUMMARY

In [None]:
print("\n" + "="*60)
print("CONVERSION SUMMARY")
print("="*60)

if os.path.exists('healthbot_classifier.tflite'):
    model_size = os.path.getsize('healthbot_classifier.tflite') / (1024 * 1024)
    print(f"✅ Neural Network Model: Trained with F1-score: {f1_macro:.4f}")
    print(f"✅ TensorFlow SavedModel: {saved_model_dir}")
    print(f"✅ TensorFlow Lite Model: healthbot_classifier.tflite")
    print(f"✅ Model Size: {model_size:.2f} MB")
    print(f"✅ Label Mapping: {len(label_mapping['labels'])} categories")
    print(f"✅ Android Assets: {assets_dir}/")
    print("\nNext steps:")
    print("1. Copy files from android_assets/ to your Android project")
    print("2. Implement TensorFlow Lite interpreter in Kotlin")
    print("3. Add preprocessing pipeline in Android")
    print("4. Test with sample data")
else:
    print("⚠️ Model conversion failed. Please check the errors above.")

print("="*60)