In [7]:
import sys
import pickle
from pathlib import Path

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# -----------------------------
# PATH SETUP
# -----------------------------
HERE = Path.cwd()

ROOT = HERE.parent
SRC_DIR = ROOT / "src"
if not SRC_DIR.exists():
    ROOT = HERE
    SRC_DIR = ROOT / "src"

DATA_DIR = ROOT / "data"
MODEL_DIR = ROOT / "models"

print("Notebook:", HERE)
print("ROOT:", ROOT)
print("SRC_DIR exists?", SRC_DIR.exists())
print("DATA_DIR exists?", DATA_DIR.exists())

sys.path.insert(0, str(SRC_DIR))

from feature_engineering import prepare_intent_features, prepare_verification_features

MODEL_DIR.mkdir(exist_ok=True)

# -----------------------------
# TRAIN INTENT MODEL
# -----------------------------
intent_csv = DATA_DIR / "intent_train.csv"
print("\nLooking for intent csv:", intent_csv)

if not intent_csv.exists():
    raise FileNotFoundError(f"‚ùå intent_train.csv not found inside {DATA_DIR}")

X_intent, y_intent = prepare_intent_features(str(intent_csv))
print("‚úÖ Intent features created:", X_intent.shape)

intent_model = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000))
])

intent_model.fit(X_intent, y_intent)

with open(MODEL_DIR / "intent_model.pkl", "wb") as f:
    pickle.dump(intent_model, f)

print("‚úÖ Saved:", MODEL_DIR / "intent_model.pkl")

# -----------------------------
# TRAIN VERIFICATION MODEL
# -----------------------------
verify_csv = DATA_DIR / "fake_train.csv"
print("\nLooking for verification csv:", verify_csv)

if not verify_csv.exists():
    raise FileNotFoundError(f"‚ùå fake_train1.csv not found inside {DATA_DIR}")

X_ver, y_ver = prepare_verification_features(str(verify_csv))
print("‚úÖ Verification features created:", X_ver.shape)

verification_model = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000))
])

verification_model.fit(X_ver, y_ver)

with open(MODEL_DIR / "verification_model.pkl", "wb") as f:
    pickle.dump(verification_model, f)

print("‚úÖ Saved:", MODEL_DIR / "verification_model.pkl")

print("\nüéâ DONE: Both models trained + saved.")


Notebook: C:\Users\Lenovo\PROJECT
ROOT: C:\Users\Lenovo\PROJECT
SRC_DIR exists? True
DATA_DIR exists? True

Looking for intent csv: C:\Users\Lenovo\PROJECT\data\intent_train.csv
‚úÖ Intent features created: (400, 3)
‚úÖ Saved: C:\Users\Lenovo\PROJECT\models\intent_model.pkl

Looking for verification csv: C:\Users\Lenovo\PROJECT\data\fake_train.csv
‚úÖ Verification features created: (400, 4)
‚úÖ Saved: C:\Users\Lenovo\PROJECT\models\verification_model.pkl

üéâ DONE: Both models trained + saved.
