In [None]:
import pandas as pd
import json

# Veri yükle
url = "https://raw.githubusercontent.com/amankharwal/Website-data/master/Sarcasm.json"
data = pd.read_json(url, lines=True)

# Sadece ihtiyacımız olan sütunlar
df = data[["headline", "is_sarcastic"]]

print(df.head())
print(df['is_sarcastic'].value_counts())


                                            headline  is_sarcastic
0  former versace store clerk sues over secret 'b...             0
1  the 'roseanne' revival catches up to our thorn...             0
2  mom starting to fear son's web series closest ...             1
3  boehner just wants wife to listen, not come up...             1
4  j.k. rowling wishes snape happy birthday in th...             0
is_sarcastic
0    14985
1    11724
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Eğitim/veri böl
X_train, X_test, y_train, y_test = train_test_split(df["headline"], df["is_sarcastic"], test_size=0.2, random_state=42)

# TF-IDF vektörleştirme
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Tahmin ve değerlendirme
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.8395731935604642
              precision    recall  f1-score   support

           0       0.85      0.87      0.86      2996
           1       0.83      0.80      0.81      2346

    accuracy                           0.84      5342
   macro avg       0.84      0.84      0.84      5342
weighted avg       0.84      0.84      0.84      5342



In [None]:
import pickle

# Model ve vectorizer birlikte kaydet
with open("sarcasm_model.pkl", "wb") as f:
    pickle.dump((model, vectorizer), f)
