In [None]:
import psycopg2
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

In [None]:
conn = psycopg2.connect(
    dbname="fakenews",
    user="postgres",
    password="123",
    host="localhost",  # ganti ke "postgres" jika notebook jalan dari container
    port=5432
)

df = pd.read_sql_query("SELECT title, content FROM cleaned_news", conn)
conn.close()

df.dropna(inplace=True)
df.head()


In [None]:
# Untuk contoh awal, label 1 = hoax, 0 = asli (secara manual/sintetik)
# Kamu bisa ubah ini ke label dari dataset nyata nantinya
df["label"] = [1 if i % 2 == 0 else 0 for i in range(len(df))]  # alternasi


In [None]:
X = df["content"]
y = df["label"]

tfidf = TfidfVectorizer(max_features=3000)
X_vec = tfidf.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
joblib.dump(model, "../models/fake_news_model.pkl")
joblib.dump(tfidf, "../models/tfidf_vectorizer.pkl")
