In [4]:
import joblib
import re
import string
import numpy as np
from flask import Flask, request, jsonify
from scipy.sparse import hstack

# -------------------
# Load models
# -------------------
vectorizer = joblib.load("tfidf_vectorizer.pkl")
lr_model = joblib.load("logistic_regression_model.pkl")
xgb_model = joblib.load("xgboost_model.pkl")

suspicious_keywords = ["earn", "money", "income", "from home", "investment", "bonus", "credit card"]

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    return text

def predict_job_posting(title, company_profile, description, requirements, model="xgb"):
    text = f"{title} {company_profile} {description} {requirements}"
    text_clean = clean_text(text)

    # TF-IDF features
    X_text = vectorizer.transform([text_clean])

    # Custom features
    desc_length = len(text.split())
    suspicious_words = sum(1 for word in suspicious_keywords if word in text.lower())
    exclaim_count = text.count("!")
    X_custom = np.array([[desc_length, suspicious_words, exclaim_count]])

    # Combine features
    X_input = hstack([X_text, X_custom])

    # Prediction
    if model == "lr":
        pred = lr_model.predict(X_input)[0]
    else:
        pred = xgb_model.predict(X_input)[0]

    return "Fake Job" if pred == 1 else "Real Job"

# Flask app
app = Flask(__name__)

@app.route("/")
def home():
    return "🚀 Fake Job Detection API is running!"

@app.route("/predict", methods=["POST"])
def predict():
    data = request.json
    title = data.get("title", "")
    company_profile = data.get("company_profile", "")
    description = data.get("description", "")
    requirements = data.get("requirements", "")
    model = data.get("model", "xgb")

    result = predict_job_posting(title, company_profile, description, requirements, model)
    return jsonify({"prediction": result})
