Veri Hazırlığı ve Temizleme

In [1]:
import os
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from langdetect import detect, DetectorFactory
import nltk

# NLTK ve LangDetect ayarları
nltk.download('stopwords')
nltk.download('punkt')
DetectorFactory.seed = 0  # LangDetect için deterministik sonuç

# Stopwords listesi (desteklenen diller için)
stopwords_dict = {
    "en": set(stopwords.words('english')),
    "fr": set(stopwords.words('french')),
    "it": set(stopwords.words('italian')),
    "es": set(stopwords.words('spanish')),
    "pl": {
        "i", "w", "na", "do", "po", "z", "za", "że", "nie", "tak", "jak", "czy", "ale", "lub",
        "od", "dla", "być", "przez", "ze", "ten", "to", "te", "może", "też", "tylko", "jeszcze",
        "już", "więc", "oni", "ona", "ono", "on", "ja", "ty", "my", "wy", "ich", "go", "mu", "jej",
        "jaki", "kiedy", "gdzie", "dlaczego", "co", "kto", "kogo", "czego", "komu", "kim", "czym",
        "jaka", "która", "który", "którzy", "które", "ich", "ich", "są", "był", "była", "było"
    }
}

# Dil bazlı metin temizleme fonksiyonu
def clean_text_by_language(text, lang):
    # Noktalama işaretlerini ve özel karakterleri kaldır
    text = re.sub(r'[^\w\s]', '', text)
    # Sayıları kaldır
    text = re.sub(r'\d+', '', text)
    # Küçük harfe çevir
    text = text.lower()
    # Tokenize et
    tokens = word_tokenize(text)
    # Stopwords kaldır
    if lang in stopwords_dict:
        tokens = [word for word in tokens if word not in stopwords_dict[lang]]
    return ' '.join(tokens)

# Eğitim veri setinin yolu
articles = []  # Makale verilerini depolamak için liste
for j in range(1, 21):
    if j < 10:
        training_dataset_path = f'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem0000{j}'
    else:
        training_dataset_path = f'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem000{j}'

    # Tüm problem klasörlerini listeleme
    problems = [os.path.join(training_dataset_path, d) for d in os.listdir(training_dataset_path) if os.path.isdir(os.path.join(training_dataset_path, d))]
    print(f"Problem Klasörleri: {problems}")  # Tüm klasörleri göster

    # Problem klasörlerinden dosyaları okuma
    for i in range(0, len(problems)):
        problem_path = problems[i]
        files = os.listdir(problem_path)
        print(f"Problemdeki Dosyalar: {files}")

        # Makale içeriği, yazar ve dil bilgisi
        for file_name in files:
            file_path = os.path.join(problem_path, file_name)
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Dil algılama
            try:
                language = detect(content)
            except Exception:
                language = "unknown"  # Dil algılanamazsa "unknown" olarak işaretle
            
            # Desteklenen dillerde temizleme işlemi
            if language in stopwords_dict:
                cleaned_content = clean_text_by_language(content, language)
            else:
                cleaned_content = "unsupported_language"

            # Yazar bilgisini almak
            author = problem_path.split(os.sep)[-1]  # Dosya yolundan yazar ismini almak
            
            # Veri setine ekle
            articles.append({"file_name": file_name, "content": cleaned_content, "author": author, "language": language})

# Veri çerçevesine dönüştürme
df = pd.DataFrame(articles)

# Bilinmeyen veya desteklenmeyen dilleri temizleme
df = df[(df['language'] != 'unknown') & (df['content'] != "unsupported_language")]

# Örnek temizlenmiş metin gösterme
print(f"Örnek Temizlenmiş Metin: {df['content'].iloc[0][:100]}")  # İlk 100 karakteri göster

# Verinin ilk 5 satırını göster
print(df.head())

[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading punkt: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


Problem Klasörleri: ['pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00001', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00002', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00003', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00004', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00005', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00006', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00007', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00008', 'pan19-cross-domain-authorship-attribution-training-dataset-2019-01-23/problem00001\\candidate00009', 'pan19-cross-domain-authorship-attribution-training-dataset-2

Word2Vec ile WordEmbedding

In [2]:
df.head()

Unnamed: 0,file_name,content,author,language
0,known00001.txt,speak anyone saw funeral said words thats went...,candidate00001,en
1,known00002.txt,could tell completely naked wrists twisted beh...,candidate00001,en
2,known00003.txt,many monitors glowing chest wand nowhere seen ...,candidate00001,en
3,known00004.txt,white horse voice sounded young course time tr...,candidate00001,en
4,known00005.txt,bit lip breathes coming quicker anticipation l...,candidate00001,en


In [3]:
from gensim.models import Word2Vec
import numpy as np
from nltk.tokenize import word_tokenize

# Word2Vec için veri hazırlama
def prepare_sentences(df):
    sentences = []
    for content in df['content']:
        tokens = word_tokenize(content)
        sentences.append(tokens)
    return sentences

# Word2Vec modelini eğitme
def train_word2vec(sentences, vector_size=100, window=5, min_count=2, workers=4):
    model = Word2Vec(sentences, vector_size=vector_size, window=window, min_count=min_count, workers=workers)
    return model

# Veri setinden cümleleri hazırlama
sentences = prepare_sentences(df)

# Word2Vec modelini eğitme
word2vec_model = train_word2vec(sentences)

# Örnek: "example" kelimesinin vektörünü alma
if "example" in word2vec_model.wv:
    example_vector = word2vec_model.wv["example"]
    print(f"'example' kelimesinin embedding vektörü:\n{example_vector}")

# Makale bazında embedding hesaplama (ortalama vektör)
def get_document_embedding(tokens, model):
    embeddings = [model.wv[word] for word in tokens if word in model.wv]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Veri setindeki her makale için embedding hesaplama
df['embedding'] = df['content'].apply(lambda x: get_document_embedding(word_tokenize(x), word2vec_model))

# İlk makalenin embedding vektör boyutunu göster
print(f"İlk makale embedding vektörü boyutu: {df['embedding'][0].shape}")


'example' kelimesinin embedding vektörü:
[-0.05821775  0.04408797 -0.01841447  0.06328914  0.05307474 -0.10869525
  0.06246477  0.09954587 -0.09403481  0.04183749 -0.0486215  -0.15894957
  0.01639337  0.04376337  0.10434577 -0.00376748  0.00658012 -0.05046147
  0.08086132 -0.06292623  0.05247228 -0.01794806 -0.07442671 -0.00919024
 -0.00135582  0.01254781 -0.04575526 -0.00101882 -0.00968162  0.01360416
  0.03627067  0.01298887  0.04640528  0.04372231 -0.02944582  0.09741972
 -0.00899204 -0.05744597 -0.05791776 -0.0657212   0.07649823 -0.13805863
 -0.01742668  0.03560346  0.04597436 -0.06055414 -0.02498817  0.05777322
  0.05806503  0.02822231 -0.0036599   0.01485876  0.08051632 -0.00272405
 -0.06920693  0.00734441 -0.02827662 -0.05062358  0.00022376  0.09270012
  0.03850315  0.03004441 -0.02967604  0.02028426  0.05846172  0.09788375
 -0.01633471  0.083006    0.04558882  0.07384809 -0.08013186 -0.03971261
  0.01042549 -0.02344354  0.12244113  0.15292548  0.00464651  0.06083092
  0.019667

Random Forest

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np

# Veri setindeki 'embedding' sütunlarını numpy dizisine çevirme
X = np.vstack(df['embedding'].values)
y = df['author']  # Burada etiketlerin 'label' sütununda olduğunu varsayıyoruz

# Eğitim ve test setine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Random Forest modelini eğitme
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred = clf.predict(X_test)

# Sonuçları değerlendirme
print("Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred))


Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.00      0.00      0.00        39
candidate00002       0.10      0.03      0.04        37
candidate00003       0.00      0.00      0.00        51
candidate00004       0.00      0.00      0.00        40
candidate00005       0.00      0.00      0.00        40
candidate00006       0.00      0.00      0.00        23
candidate00007       0.17      0.02      0.04        48
candidate00008       0.00      0.00      0.00        39
candidate00009       0.00      0.00      0.00        41
       unknown       0.78      0.97      0.87      1236

      accuracy                           0.75      1594
     macro avg       0.10      0.10      0.09      1594
  weighted avg       0.61      0.75      0.67      1594



Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Logistic Regression modelini eğitme
logreg = LogisticRegression(max_iter=1000, random_state=42)
logreg.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred_logreg = logreg.predict(X_test)

# Sonuçları değerlendirme
print("Logistic Regression Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_logreg))


Logistic Regression Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.00      0.00      0.00        39
candidate00002       0.00      0.00      0.00        37
candidate00003       0.00      0.00      0.00        51
candidate00004       0.00      0.00      0.00        40
candidate00005       0.00      0.00      0.00        40
candidate00006       0.00      0.00      0.00        23
candidate00007       0.00      0.00      0.00        48
candidate00008       0.00      0.00      0.00        39
candidate00009       0.00      0.00      0.00        41
       unknown       0.78      1.00      0.87      1236

      accuracy                           0.78      1594
     macro avg       0.08      0.10      0.09      1594
  weighted avg       0.60      0.78      0.68      1594



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Support Vector Machine (SVM)

In [6]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# SVM modelini eğitme
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred_svm = svm.predict(X_test)

# Sonuçları değerlendirme
print("Support Vector Machine Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_svm))


Support Vector Machine Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.00      0.00      0.00        39
candidate00002       0.00      0.00      0.00        37
candidate00003       0.00      0.00      0.00        51
candidate00004       0.00      0.00      0.00        40
candidate00005       0.00      0.00      0.00        40
candidate00006       0.00      0.00      0.00        23
candidate00007       0.00      0.00      0.00        48
candidate00008       0.00      0.00      0.00        39
candidate00009       0.00      0.00      0.00        41
       unknown       0.78      1.00      0.87      1236

      accuracy                           0.78      1594
     macro avg       0.08      0.10      0.09      1594
  weighted avg       0.60      0.78      0.68      1594



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


K-Nearest Neighbors (KNN)

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# KNN modelini eğitme
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred_knn = knn.predict(X_test)

# Sonuçları değerlendirme
print("K-Nearest Neighbors Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_knn))


K-Nearest Neighbors Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.05      0.03      0.03        39
candidate00002       0.00      0.00      0.00        37
candidate00003       0.00      0.00      0.00        51
candidate00004       0.06      0.03      0.04        40
candidate00005       0.00      0.00      0.00        40
candidate00006       0.18      0.09      0.12        23
candidate00007       0.00      0.00      0.00        48
candidate00008       0.11      0.05      0.07        39
candidate00009       0.06      0.02      0.04        41
       unknown       0.80      0.95      0.87      1236

      accuracy                           0.74      1594
     macro avg       0.13      0.12      0.12      1594
  weighted avg       0.63      0.74      0.68      1594



Gradient Boosting (GBM)

In [8]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

# Gradient Boosting modelini eğitme
gbm = GradientBoostingClassifier(random_state=42)
gbm.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred_gbm = gbm.predict(X_test)

# Sonuçları değerlendirme
print("Gradient Boosting Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_gbm))


Gradient Boosting Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.07      0.03      0.04        39
candidate00002       0.08      0.03      0.04        37
candidate00003       0.00      0.00      0.00        51
candidate00004       0.00      0.00      0.00        40
candidate00005       0.11      0.03      0.04        40
candidate00006       0.00      0.00      0.00        23
candidate00007       0.00      0.00      0.00        48
candidate00008       0.00      0.00      0.00        39
candidate00009       0.22      0.05      0.08        41
       unknown       0.78      0.95      0.86      1236

      accuracy                           0.74      1594
     macro avg       0.13      0.11      0.11      1594
  weighted avg       0.62      0.74      0.67      1594



XGBoost

In [9]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import xgboost as xgb
import pandas as pd
from nltk.tokenize import word_tokenize

# Word2Vec modelini ve embedding işlemini tekrar eden kısımlar
# (Sizden sağlanan kodlar bu kısımda yer alıyor)
from gensim.models import Word2Vec
import numpy as np

def prepare_sentences(df):
    sentences = []
    for content in df['content']:
        tokens = word_tokenize(content)
        sentences.append(tokens)
    return sentences

def train_word2vec(sentences, vector_size=100, window=5, min_count=2, workers=4):
    model = Word2Vec(sentences, vector_size=vector_size, window=window, min_count=min_count, workers=workers)
    return model

def get_document_embedding(tokens, model):
    embeddings = [model.wv[word] for word in tokens if word in model.wv]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Veri hazırlığı
sentences = prepare_sentences(df)
word2vec_model = train_word2vec(sentences)

# Makale bazında embedding hesaplama
df['embedding'] = df['content'].apply(lambda x: get_document_embedding(word_tokenize(x), word2vec_model))

# X ve y oluşturma
X = np.array(df['embedding'].tolist())
y = df['author']  # Hedef değişken

# Veriyi eğitim ve test setlerine ayırma
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LabelEncoder ile hedef değişkeni sayısallaştırma
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# XGBoost modelini eğitme
xgb_model = xgb.XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train_encoded)

# Test setinde tahmin yapma
y_pred_xgb = xgb_model.predict(X_test)

# Tahmin edilen sınıfları orijinal etiketlere geri çevirme
y_pred_xgb_decoded = label_encoder.inverse_transform(y_pred_xgb)

# Sonuçları değerlendirme
print("XGBoost Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_xgb_decoded))


XGBoost Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.14      0.04      0.06        28
candidate00002       0.00      0.00      0.00        19
candidate00003       0.25      0.03      0.06        31
candidate00004       0.11      0.04      0.05        28
candidate00005       0.33      0.04      0.07        27
candidate00006       0.00      0.00      0.00        13
candidate00007       0.00      0.00      0.00        33
candidate00008       0.25      0.04      0.07        25
candidate00009       0.00      0.00      0.00        30
       unknown       0.79      0.97      0.87       829

      accuracy                           0.76      1063
     macro avg       0.19      0.12      0.12      1063
  weighted avg       0.65      0.76      0.69      1063



Neural Networks (MLPClassifier)

In [10]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Neural Network modelini eğitme
mlp = MLPClassifier(random_state=42)
mlp.fit(X_train, y_train)

# Test setinde tahmin yapma
y_pred_mlp = mlp.predict(X_test)

# Sonuçları değerlendirme
print("Neural Network (MLP) Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_mlp))


Neural Network (MLP) Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.00      0.00      0.00        28
candidate00002       0.00      0.00      0.00        19
candidate00003       0.00      0.00      0.00        31
candidate00004       0.00      0.00      0.00        28
candidate00005       0.00      0.00      0.00        27
candidate00006       0.00      0.00      0.00        13
candidate00007       0.00      0.00      0.00        33
candidate00008       0.00      0.00      0.00        25
candidate00009       0.00      0.00      0.00        30
       unknown       0.78      1.00      0.88       829

      accuracy                           0.78      1063
     macro avg       0.08      0.10      0.09      1063
  weighted avg       0.61      0.78      0.68      1063



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Decision Tree

In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Özellikler (X) ve etiketler (y) belirleme
X = df['embedding'].apply(lambda x: x.flatten()).to_list()  # embedding vektörlerini kullanıyoruz
y = df['author']  # Yazarları etiket olarak kullanıyoruz

# Eğitim ve test veri setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Modeli oluşturma (Decision Tree)
dt_model = DecisionTreeClassifier(random_state=42)

# Modeli eğitme
dt_model.fit(X_train, y_train)

# Test veri seti ile tahmin yapma
y_pred = dt_model.predict(X_test)

# Sonuçları değerlendirme
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Modeli Başarı Oranı: {accuracy * 100:.2f}%")

# Detaylı sınıflandırma raporu
print("Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred))

Decision Tree Modeli Başarı Oranı: 60.66%
Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.07      0.10      0.08        39
candidate00002       0.04      0.05      0.04        37
candidate00003       0.04      0.04      0.04        51
candidate00004       0.05      0.07      0.06        40
candidate00005       0.07      0.07      0.07        40
candidate00006       0.03      0.09      0.05        23
candidate00007       0.07      0.06      0.06        48
candidate00008       0.10      0.15      0.12        39
candidate00009       0.07      0.07      0.07        41
       unknown       0.83      0.76      0.79      1236

      accuracy                           0.61      1594
     macro avg       0.14      0.15      0.14      1594
  weighted avg       0.66      0.61      0.63      1594



Naive Bayes

In [12]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import numpy as np
from nltk.tokenize import word_tokenize

# Word2Vec ve embedding işlemleri
from gensim.models import Word2Vec

def prepare_sentences(df):
    sentences = []
    for content in df['content']:
        tokens = word_tokenize(content)
        sentences.append(tokens)
    return sentences

def train_word2vec(sentences, vector_size=100, window=5, min_count=2, workers=4):
    model = Word2Vec(sentences, vector_size=vector_size, window=window, min_count=min_count, workers=workers)
    return model

def get_document_embedding(tokens, model):
    embeddings = [model.wv[word] for word in tokens if word in model.wv]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Veri hazırlığı
sentences = prepare_sentences(df)
word2vec_model = train_word2vec(sentences)

# Makale bazında embedding hesaplama
df['embedding'] = df['content'].apply(lambda x: get_document_embedding(word_tokenize(x), word2vec_model))

# X ve y oluşturma
X = np.array(df['embedding'].tolist())
y = df['author']  # Hedef değişken

# Embedding vektörlerini normalleştirme (pozitif değerler gereklidir)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Veriyi eğitim ve test setlerine ayırma
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LabelEncoder ile hedef değişkeni sayısallaştırma
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Naive Bayes modelini eğitme
nb = MultinomialNB()
nb.fit(X_train, y_train_encoded)

# Test setinde tahmin yapma
y_pred_nb = nb.predict(X_test)

# Tahmin edilen sınıfları orijinal etiketlere geri çevirme
y_pred_nb_decoded = label_encoder.inverse_transform(y_pred_nb)

# Sonuçları değerlendirme
print("Naive Bayes Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred_nb_decoded))


Naive Bayes Sınıflandırma Raporu:
                precision    recall  f1-score   support

candidate00001       0.00      0.00      0.00        28
candidate00002       0.00      0.00      0.00        19
candidate00003       0.00      0.00      0.00        31
candidate00004       0.00      0.00      0.00        28
candidate00005       0.00      0.00      0.00        27
candidate00006       0.00      0.00      0.00        13
candidate00007       0.00      0.00      0.00        33
candidate00008       0.00      0.00      0.00        25
candidate00009       0.00      0.00      0.00        30
       unknown       0.78      1.00      0.88       829

      accuracy                           0.78      1063
     macro avg       0.08      0.10      0.09      1063
  weighted avg       0.61      0.78      0.68      1063



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
