In [3]:
import joblib
import numpy as np
from scipy.sparse import hstack

# Load the saved models and encoders
xgb_sub = joblib.load('xgb_model.joblib')
rf_category = joblib.load('log_reg_pipeline_model.joblib')
vectorizer = joblib.load('vectorizer.joblib')
le = joblib.load('le.joblib')
le_category = joblib.load('le_cat.joblib')

def predict_categories(text):
    """
    Predict the subcategory and main category for a given text.
    
    Args:
        text (str): The input text to be classified.
    
    Returns:
        Tuple[str, str]: The predicted subcategory and main category labels.
    """
    # Preprocess and vectorize the text
    X_new = vectorizer.transform([text])
    
    # Predict subcategory
    subcategory_pred = xgb_sub.predict(X_new)
    subcategory_label = le.inverse_transform(subcategory_pred)[0]
    
    # Combine features for category prediction
    X_combined_new = hstack((X_new, subcategory_pred.reshape(-1, 1)))
    
    # Predict category
    category_pred = rf_category.predict(X_combined_new)
    category_label = le_category.inverse_transform(category_pred)[0]
    
    return subcategory_label, category_label

# Example usage
new_text = "victim website url frd sent rupee th dec tm transaction ID als add whats app group many group nm zyra contact dtd"

subcategory, main_category = predict_categories(new_text)
print(f"Predicted Subcategory: {subcategory}")
print(f"Predicted Main Category: {main_category}")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Predicted Subcategory: Fake/Impersonating Profile
Predicted Main Category: Other Cyber Crime
