In [11]:
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

In [12]:
file_path = 'Artikelbewertung (1).xlsx'
df = pd.read_excel(file_path)
df['Bewertungstext'] = df['Bewertungstext'].fillna('').astype(str)

In [13]:
model_name = "cardiffnlp/twitter-xlm-roberta-base-sentiment-multilingual"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Device set to use cpu


In [14]:
def analyze_sentiment(text):
    try:
        result = sentiment_pipeline(text[:512])[0]
        label_map = {'LABEL_0': 'Negatif', 'LABEL_1': 'Nötr', 'LABEL_2': 'Pozitif'}
        label = label_map.get(result['label'], result['label'])
        return {'label': label, 'score': float(result['score'])}
    except Exception as e:
        return {'label': 'Hata', 'score': 0.0}

df['sentiment'] = df['Bewertungstext'].apply(analyze_sentiment)
df['sentiment_label'] = df['sentiment'].apply(lambda x: x['label'])
df['sentiment_score'] = df['sentiment'].apply(lambda x: x['score'])

In [15]:
def rating_to_sentiment(rating):
    try:
        if rating >= 4:
            return "Pozitif"
        elif rating == 3:
            return "Nötr"
        elif rating < 3:
            return "Negatif"
        else:
            return "no info"
    except:
        return "no info"

df['rating_sentiment'] = df['Bewertung'].apply(rating_to_sentiment)

In [7]:
summary = []
for artikel, group in df.groupby('Artikel'):
    total = len(group)
    ai_pos = group[group['sentiment_label'].astype(str).str.lower() == 'positive']
    rating_pos = group[group['rating_sentiment'].astype(str).str.lower() == 'pozitif']
    ai_happy_pct = round(100 * len(ai_pos) / total, 2) if total else 0
    rating_happy_pct = round(100 * len(rating_pos) / total, 2) if total else 0
    ort_puan = round(group['Bewertung'].mean(), 2) if total else 0

    # Eğer grupta hiç yorum yoksa, güvenli şekilde boş değer ata:
    if total == 0 or group['sentiment_score'].isnull().all():
        en_pozitif_yorum = ""
        en_negatif_yorum = ""
    else:
        # Sadece geçerli (NaN olmayan) skorlar üzerinde işlem yap
        valid_scores = group.dropna(subset=['sentiment_score'])
        if valid_scores.empty:
            en_pozitif_yorum = ""
            en_negatif_yorum = ""
        else:
            best_row = valid_scores.loc[valid_scores['sentiment_score'].idxmax()]
            worst_row = valid_scores.loc[valid_scores['sentiment_score'].idxmin()]
            en_pozitif_yorum = best_row['Bewertungstext']
            en_negatif_yorum = worst_row['Bewertungstext']

    summary.append({
        'Ürün': str(artikel),
        'Yorum Sayısı': total,
        'AI Mutluluk %': ai_happy_pct,
        'Puan Mutluluk %': rating_happy_pct,
        'Ortalama Puan': ort_puan,
        'En Pozitif Yorum': en_pozitif_yorum,
        'En Negatif Yorum': en_negatif_yorum
    })

summary_df = pd.DataFrame(summary)
df.to_pickle('tum_yorumlar.pkl')
summary_df.to_pickle('urun_ozet.pkl')