In [7]:
import os
import re
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# === 1. Metin temizleme fonksiyonu ===
def temizle_metin(text):
    text = text.lower()                                # Küçük harfe çevir
    text = re.sub(r'[^\w\s]', '', text)                # Noktalama işaretlerini kaldır
    text = re.sub(r'\d+', '', text)                    # Sayıları kaldır
    text = text.replace("i̇", "i")                      # Türkçede bozulmuş karakter düzeltme
    text = re.sub(r'\s+', ' ', text).strip()           # Fazla boşlukları sil
    return text

# === 2. Veri okuma ===
main_folder = r'C:\Users\YUCE037\Downloads\AAydintasbas'

texts = []
labels = []

for author_folder in os.listdir(main_folder):
    author_path = os.path.join(main_folder, author_folder)
    if os.path.isdir(author_path):
        for txt_file in os.listdir(author_path):
            file_path = os.path.join(author_path, txt_file)
            if file_path.endswith(".txt"):
                with open(file_path, "r", encoding="utf-8") as f:
                    raw_text = f.read()
                    cleaned = temizle_metin(raw_text)
                    texts.append(cleaned)
                    labels.append(author_folder)

# === 3. TF-IDF vektörleştirme ===
vectorizer = TfidfVectorizer(ngram_range=(1,2), max_features=5000, stop_words='english')
X = vectorizer.fit_transform(texts)

# === 4. Label encoding ===
encoder = LabelEncoder()
labels_encoded = encoder.fit_transform(labels)

# === 5. Eğitim/test bölme ===
X_train, X_test, y_train, y_test = train_test_split(X, labels_encoded, test_size=0.2, random_state=42)

# === 6. SVM modeli eğit ===
model = SVC(kernel='linear', C=1)
model.fit(X_train, y_train)

# === 7. Tahmin ve doğruluk hesaplama ===
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2%}")

# === 8. Raporu yazdır ve CSV olarak kaydet ===
report = classification_report(y_test, y_pred, target_names=encoder.classes_)
print(report)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2%}")


Accuracy: 80.80%
              precision    recall  f1-score   support

      AAltan       0.88      1.00      0.93         7
AAydintasbas       0.83      0.77      0.80        13
      AHakan       1.00      1.00      1.00         9
 ATuranAlkan       1.00      0.86      0.92         7
    AYArslan       1.00      1.00      1.00         6
     BCoskun       0.67      1.00      0.80         6
     CCandar       0.88      1.00      0.93         7
    COzdemir       1.00      0.91      0.95        11
  DCundioglu       1.00      0.69      0.82        13
  DUAribogan       1.00      0.78      0.88         9
      EArdic       1.00      1.00      1.00         9
      ECakir       0.60      0.86      0.71         7
    GGokturk       0.50      0.88      0.64         8
   HBabaoglu       1.00      0.47      0.64        15
      HCemal       0.80      0.50      0.62         8
       HUluc       0.60      1.00      0.75         3
  IKucukkaya       0.75      0.86      0.80         7
    MABira

In [None]:

from sklearn.naive_bayes import MultinomialNB

# === Naive Bayes modeli eğitimi ===
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

# === Naive Bayes testi ve değerlendirme ===
nb_pred = nb_model.predict(X_test)
print("=== Naive Bayes Classification Report ===")
print(classification_report(y_test, nb_pred))
