In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib
import os
print(os.listdir("/kaggle/input"))

df = pd.read_csv("/kaggle/input/real-or-fake-fake-jobposting-prediction/fake_job_postings.csv")

df = df[['title', 'description', 'fraudulent']].dropna()

df['text'] = df['title'] + " " + df['description']

X = df['text']
y = df['fraudulent']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=500)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

os.makedirs("model", exist_ok=True)
joblib.dump(model, "model/fake_job_model.pkl")
joblib.dump(vectorizer, "model/vectorizer.pkl")

import pickle
import os

os.makedirs("/kaggle/working/model", exist_ok=True)

with open("/kaggle/working/model/fake_job_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("/kaggle/working/model/vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("✅ Model and vectorizer saved in /kaggle/working/model/")

['real-or-fake-fake-jobposting-prediction']
Accuracy: 0.9658836689038032
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      3394
           1       0.95      0.35      0.51       182

    accuracy                           0.97      3576
   macro avg       0.96      0.67      0.75      3576
weighted avg       0.97      0.97      0.96      3576

✅ Model and vectorizer saved in /kaggle/working/model/
