In [17]:
# 1. Gerekli kütüphaneleri yükleme
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
# 2. Veri kümesini yükleme ve ön işleme
data = pd.read_csv("news.csv")

# Veri kümesinin ilk 5 satırını inceleme
print(data.head())

   Unnamed: 0                                              title  \
0        8476                       You Can Smell Hillary’s Fear   
1       10294  Watch The Exact Moment Paul Ryan Committed Pol...   
2        3608        Kerry to go to Paris in gesture of sympathy   
3       10142  Bernie supporters on Twitter erupt in anger ag...   
4         875   The Battle of New York: Why This Primary Matters   

                                                text label  
0  Daniel Greenfield, a Shillman Journalism Fello...  FAKE  
1  Google Pinterest Digg Linkedin Reddit Stumbleu...  FAKE  
2  U.S. Secretary of State John F. Kerry said Mon...  REAL  
3  — Kaydee King (@KaydeeKing) November 9, 2016 T...  FAKE  
4  It's primary day in New York and front-runners...  REAL  


In [5]:
# Shuffling 
data = data.sample(frac=1) 
data.reset_index(inplace=True) 
data.drop(["index"], axis=1, inplace=True) 

In [6]:
# Etiketleri ve metin verilerini ayırma
labels = data.label
texts = data.text

In [7]:
# 3. Eğitim ve test veri setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=16)

In [8]:
# 4. TF-IDF vektörleştirici ile metin verilerini sayısal verilere dönüştürme
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train) 
tfidf_test = tfidf_vectorizer.transform(X_test)

In [9]:
# 5. Model eğitme
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

In [10]:
# 6. Modeli değerlendirme
y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 93.76%


In [11]:
# Confusion matrix ile sonuçları değerlendirme
confusion_matrix(y_test, y_pred, labels=['FAKE','REAL'])

array([[598,  34],
       [ 45, 590]], dtype=int64)

In [16]:
# Kullanıcıdan girdi alarak tahmin yapma
def predict_news(news_text):
    news_tfidf = tfidf_vectorizer.transform([news_text])
    prediction = pac.predict(news_tfidf)
    return prediction[0]

# Örnek kullanım
news_text = "Local Community Rallies to Support Small Businesses During Pandemic"
prediction = predict_news(news_text)
print(f"Bu haber: {prediction}")

Bu haber: REAL


In [18]:
with open('model.pk1','wb') as model_file:
    pickle.dump(pac,model_file)

In [19]:
with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(tfidf_vectorizer, vectorizer_file)