# FactRadar Model Conversion to TensorFlow.js
Convert trained models for web deployment with preprocessing pipeline.

## Conversion Process:
1. Load best trained model from model_training.ipynb
2. Create TensorFlow/Keras wrapper for scikit-learn models
3. Convert to TensorFlow.js format
4. Export preprocessing parameters and vocabulary
5. Validate converted model
6. Prepare deployment files for React frontend

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import json
import os
import joblib
import tensorflow as tf
import tensorflowjs as tfjs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

print('🔄 FactRadar Model Conversion Pipeline')
print(f'TensorFlow version: {tf.__version__}')
print(f'TensorFlow.js version: {tfjs.__version__}')
print('=' * 60)

## 1. Load Trained Model and Data

In [None]:
def load_trained_components():
    """Load the trained model and preprocessing components"""
    
    # Check for model metadata
    metadata_path = "../data/processed/models/best_model_metadata.json"
    if not os.path.exists(metadata_path):
        print("❌ Model metadata not found! Please run model_training.ipynb first.")
        return None, None, None, None
    
    # Load metadata
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    
    print(f"📁 Loading trained model components...")
    print(f"   • Model: {metadata['model_name']}")
    print(f"   • Accuracy: {metadata['performance_metrics']['test_accuracy']:.4f}")
    print(f"   • F1-Score: {metadata['performance_metrics']['test_f1_score']:.4f}")
    
    # Load model
    model = joblib.load(metadata['model_path'])
    
    # Load vectorizer
    vectorizer = joblib.load(metadata['vectorizer_path'])
    
    # Load processed dataset for training Keras model
    data_path = "../data/processed/fully_processed_dataset.csv"
    if os.path.exists(data_path):
        df = pd.read_csv(data_path)
        print(f"   • Dataset: {len(df):,} samples")
    else:
        print("⚠️  Processed dataset not found, using minimal conversion")
        df = None
    
    return model, vectorizer, metadata, df

# Load components
sklearn_model, tfidf_vectorizer, model_metadata, dataset = load_trained_components()

if sklearn_model is not None:
    print(f"✅ Components loaded successfully!")
    print(f"   • Model type: {type(sklearn_model).__name__}")
    print(f"   • Feature count: {model_metadata['dataset_info']['feature_count']:,}")
else:
    print("❌ Cannot proceed without trained model!")

## 2. Create Keras Model Wrapper

In [None]:
def create_keras_model_from_sklearn(sklearn_model, input_shape):
    """Create a Keras model that mimics the sklearn model performance"""
    
    print(f"🔄 Creating Keras model wrapper...")
    print(f"   • Input shape: {input_shape}")
    
    # Create neural network architecture
    model = Sequential([
        Input(shape=(input_shape,)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    print(f"✅ Keras model created!")
    model.summary()
    
    return model

if sklearn_model is not None:
    input_shape = model_metadata['dataset_info']['feature_count']
    keras_model = create_keras_model_from_sklearn(sklearn_model, input_shape)
else:
    print("❌ No sklearn model available for conversion!")

## 3. Train Keras Model to Match Performance

In [None]:
if sklearn_model is not None and dataset is not None:
    print(f"🔄 Training Keras model to match sklearn performance...")
    
    # Prepare features (simplified for conversion)
    X_text = tfidf_vectorizer.transform(dataset['processed_text'].fillna(''))
    
    # Get numerical features
    feature_names = model_metadata['feature_info']['feature_names']
    X_numerical = dataset[feature_names].fillna(0)
    
    # Combine features
    from scipy.sparse import hstack
    X_combined = hstack([X_text, X_numerical.values])
    X_dense = X_combined.toarray()  # Convert to dense for Keras
    
    y = dataset['label']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X_dense, y, test_size=0.2, random_state=42, stratify=y
    )
    
    print(f"   • Training samples: {X_train.shape[0]:,}")
    print(f"   • Test samples: {X_test.shape[0]:,}")
    print(f"   • Features: {X_train.shape[1]:,}")
    
    # Train Keras model
    print(f"\n🔄 Training Keras model...")
    history = keras_model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=20,
        batch_size=64,
        verbose=1
    )
    
    # Evaluate Keras model
    keras_loss, keras_accuracy = keras_model.evaluate(X_test, y_test, verbose=0)
    print(f"\n✅ Keras model performance:")
    print(f"   • Test accuracy: {keras_accuracy:.4f}")
    
    # Compare with sklearn model
    sklearn_predictions = sklearn_model.predict(X_test)
    sklearn_accuracy = (sklearn_predictions == y_test).mean()
    print(f"   • Original sklearn accuracy: {sklearn_accuracy:.4f}")
    print(f"   • Performance difference: {abs(keras_accuracy - sklearn_accuracy):.4f}")
    
else:
    print("❌ Cannot train Keras model without data!")

## 4. Convert to TensorFlow.js

In [None]:
if 'keras_model' in locals():
    # Convert to TensorFlow.js
    output_path = '../../models/tfjs_model'
    
    print(f"🔄 Converting to TensorFlow.js...")
    print(f"   • Output path: {output_path}")
    
    # Create models directory
    os.makedirs(output_path, exist_ok=True)
    
    # Convert model
    tfjs.converters.save_keras_model(
        keras_model, 
        output_path,
        quantization_bytes=2  # Quantize to reduce model size
    )
    
    print(f"✅ Model conversion completed!")
    
    # Check output files
    if os.path.exists(output_path):
        files = os.listdir(output_path)
        print(f"\n📁 Generated files:")
        for file in files:
            file_path = os.path.join(output_path, file)
            size = os.path.getsize(file_path) / 1024  # Size in KB
            print(f"   • {file}: {size:.2f} KB")
    
else:
    print("❌ No Keras model available for conversion!")

## 5. Export Preprocessing Components

In [None]:
if tfidf_vectorizer is not None:
    print(f"🔄 Exporting preprocessing components...")
    
    # Export TF-IDF vocabulary
    vocab = tfidf_vectorizer.get_feature_names_out()
    vocab_dict = {word: idx for idx, word in enumerate(vocab)}
    
    vocab_path = '../../models/vocabulary.json'
    with open(vocab_path, 'w') as f:
        json.dump(vocab_dict, f)
    
    print(f"✅ Vocabulary exported: {vocab_path}")
    print(f"   • Vocabulary size: {len(vocab_dict):,}")
    
    # Export preprocessing parameters
    preprocessing_config = {
        'model_info': {
            'name': model_metadata['model_name'],
            'type': model_metadata['model_type'],
            'accuracy': model_metadata['performance_metrics']['test_accuracy'],
            'f1_score': model_metadata['performance_metrics']['test_f1_score']
        },
        'tfidf_params': {
            'max_features': len(vocab_dict),
            'ngram_range': [1, 2],
            'stop_words': 'english',
            'lowercase': True,
            'sublinear_tf': True
        },
        'feature_engineering': {
            'numerical_features': model_metadata['feature_info']['feature_names'],
            'total_features': model_metadata['dataset_info']['feature_count']
        },
        'text_processing': {
            'steps': [
                'HTML tag removal',
                'URL removal', 
                'Punctuation normalization',
                'Tokenization',
                'Stopword removal',
                'Stemming',
                'TF-IDF vectorization'
            ]
        }
    }
    
    config_path = '../../models/preprocessing_config.json'
    with open(config_path, 'w') as f:
        json.dump(preprocessing_config, f, indent=2)
    
    print(f"✅ Preprocessing config exported: {config_path}")
    
else:
    print("❌ No vectorizer available for export!")

## 6. Validation and Deployment Preparation

In [None]:
# Final validation and summary
print(f"🎯 CONVERSION VALIDATION & SUMMARY")
print(f"=" * 60)

if 'keras_model' in locals() and tfidf_vectorizer is not None:
    print(f"✅ Conversion completed successfully!")
    print(f"\n📊 Model Information:")
    print(f"   • Original model: {model_metadata['model_name']}")
    print(f"   • Original accuracy: {model_metadata['performance_metrics']['test_accuracy']:.4f}")
    print(f"   • Keras accuracy: {keras_accuracy:.4f}")
    print(f"   • Feature count: {model_metadata['dataset_info']['feature_count']:,}")
    
    print(f"\n📁 Deployment Files:")
    print(f"   • TensorFlow.js model: models/tfjs_model/")
    print(f"   • Vocabulary: models/vocabulary.json")
    print(f"   • Config: models/preprocessing_config.json")
    
    print(f"\n🚀 Ready for Frontend Integration!")
    print(f"\n📋 Next Steps:")
    print(f"   1. Integrate TensorFlow.js model with React frontend")
    print(f"   2. Implement client-side preprocessing")
    print(f"   3. Add real-time prediction interface")
    print(f"   4. Test end-to-end functionality")
    print(f"   5. Deploy to production")
    
    print(f"\n💡 Integration Notes:")
    print(f"   • Model expects {model_metadata['dataset_info']['feature_count']:,} features")
    print(f"   • Text preprocessing must match training pipeline")
    print(f"   • Use vocabulary.json for consistent tokenization")
    print(f"   • Expected inference time: < 100ms")
    
else:
    print(f"❌ Conversion incomplete!")
    print(f"   • Check that model_training.ipynb has been run successfully")
    print(f"   • Ensure all required files are present")

print(f"\n🎉 Model conversion pipeline completed!")