In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from tqdm import tqdm

# 📌 GPU kontrolü
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print("Kullanılan cihaz:", device)

# 📌 Veri yükle (Etiketlenen data )
df = pd.read_csv("yorumlar_etiketli.csv")

# Label mapping
labels = sorted(df["label"].unique().tolist())  # sıralı olsun diye
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for label, i in label2id.items()}
df["label_id"] = df["label"].map(label2id)

# Train/test split
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["raw_answer"].astype(str).tolist(),
    df["label_id"].tolist(),
    test_size=0.2,
    random_state=42
)

# 📌 Tokenizer
model_name = "dbmdz/bert-base-turkish-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

# Dataset class
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Dataset & Dataloader
train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
test_dataset = CustomDataset(test_texts, test_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

# 📌 Model
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id
)
model.to(device)

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# 📌 Eğitim döngüsü
epochs = 2
model.train()
for epoch in range(epochs):
    print(f"\n🔹 Epoch {epoch+1}/{epochs}")
    loop = tqdm(train_loader, leave=True)
    for batch in loop:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        loop.set_description(f"Epoch {epoch+1}")
        loop.set_postfix(loss=loss.item())



Kullanılan cihaz: mps


2025-09-26 12:50:27.201880: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔹 Epoch 1/2


Epoch 1: 100%|██████████| 666/666 [24:18<00:00,  2.19s/it, loss=0.0293] 



🔹 Epoch 2/2


Epoch 2: 100%|██████████| 666/666 [23:57<00:00,  2.16s/it, loss=0.00494]


In [3]:
# 📌 Değerlendirme
model.eval()
preds, trues = [], []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        preds.extend(predictions.cpu().numpy())
        trues.extend(labels.cpu().numpy())

print("\n📊 Classification Report:")
print(classification_report(
    trues,
    preds,
    labels=list(id2label.keys()),
    target_names=[id2label[i] for i in id2label.keys()]
))




📊 Classification Report:
              precision    recall  f1-score   support

       Diğer       0.98      0.98      0.98       323
   Güler Yüz       0.99      1.00      0.99       252
         Hız       1.00      0.95      0.97        19
  Memnuniyet       0.95      0.99      0.97        91
     Negatif       0.94      0.89      0.91        36
        İlgi       0.99      0.99      0.99       611

    accuracy                           0.98      1332
   macro avg       0.97      0.96      0.97      1332
weighted avg       0.98      0.98      0.98      1332



In [4]:
# 📌 Modeli kaydet
save_path = "kategorimodeli"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"✅ Model kaydedildi: {save_path}")


✅ Model kaydedildi: kategorimodeli


In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_path = "kategorimodeli"
model = AutoModelForSequenceClassification.from_pretrained(model_path)

print("📌 Modeldeki kategori etiketleri:")
print(model.config.id2label)


📌 Modeldeki kategori etiketleri:
{0: 'Diğer', 1: 'Güler Yüz', 2: 'Hız', 3: 'Memnuniyet', 4: 'Negatif', 5: 'İlgi'}


TEST

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Kaydedilen modeli yükle
model_path = "kategorimodeli"   # sunucuda senin kaydettiğin klasör
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Örnek tahmin
yorum = "Çalışanlar çok ilgisizdi, 15 dakika bekledim."
print(classifier(yorum))


Device set to use mps:0


[{'label': 'İlgi', 'score': 0.9986386895179749}]


✅ Güncellenmiş Python kodu (hem update hem log kaydı) Veritabanına düzenli aktarım için gereklidir.

In [None]:
import torch
import pandas as pd
import mysql.connector
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm

# 📌 GPU kontrolü
device = torch.device("mps" if torch.backends.mps.is_available() else
                      "cuda" if torch.cuda.is_available() else "cpu")
print("Kullanılan cihaz:", device)

# 📌 Eğitilmiş modeli yükle
model_path = "kategorimodeli"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()

# 📌 Veritabanı bağlantısı
conn = mysql.connector.connect(
    user="****",
    password="****",
    host="****",
    database="****"
)
cursor = conn.cursor(dictionary=True)

# 📌 Kategorisi boş olanları çek
cursor.execute("SELECT transaction_id, cevap FROM yorumlar WHERE  IS NULL")
rows = cursor.fetchall()
df = pd.DataFrame(rows)

def predict_batch(texts, batch_size=128):
    preds = []
    for i in tqdm(range(0, len(texts), batch_size), desc="🔮 Tahmin ediliyor"):
        batch_texts = texts[i:i+batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt",
                           truncation=True, padding=True, max_length=128).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            batch_preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()
            preds.extend([model.config.id2label[int(p)] if int(p) in model.config.id2label else str(p)
                          for p in batch_preds])
    return preds

# 📊 Tahmin et
if not df.empty:
    df["cevap_kategorisi"] = predict_batch(df["cevap"].astype(str).tolist())

    # 📤 UPDATE işlemi
    update_data = list(zip(df["cevap_kategorisi"], df["transaction_id"]))
    cursor.executemany(
        "UPDATE nlp_answer SET cevap_kategorisi = %s WHERE transaction_id = %s",
        update_data
    )
    conn.commit()

    # 📌 Log tablosuna kayıt
    cursor.execute(
        "INSERT INTO nlp_logs (processed_count, success_count, fail_count) VALUES (%s, %s, %s)",
        (len(df), len(df), 0)   # burada tümü başarılı kabul ettik
    )
    conn.commit()

    print(f"🚀 {len(df)} yorum kategorize edildi ve log kaydı eklendi.")
else:
    print("ℹ️ Etiketlenecek yeni yorum yok.")

cursor.close()
conn.close()
