In [48]:
%pip install numpy transformers pandas torch scikit-learn pyarrow accelerate transformers[torch] transformers[sentencepiece] ipywidgets tqdm datetime imblearn sentencepiece papermill xlsxwriter

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [49]:
import os
import time
from datetime import datetime
import string
import random
import logging
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizerFast, BertForSequenceClassification, DistilBertTokenizerFast, DistilBertForSequenceClassification, AlbertTokenizerFast, AlbertForSequenceClassification, RobertaTokenizerFast, RobertaForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from collections import defaultdict

In [50]:
log_filename = f'logs/log_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.log'
logging.basicConfig(filename=log_filename, filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)

In [51]:
def load_category_data(dataset: str, category: str):
    df = pd.read_feather(os.path.join(os.path.realpath('.'), f'../data/{category}/{dataset}.feather'))
    # add a column for the category
    df['category'] = category
    df['dataset'] = dataset
    return df

In [19]:
# load the testing datasets
testing_datasets = {
  'crime': ['FA-KES-Dataset', 'snope'],
  'health': ['covid_claims', 'covid_fake_news_dataset', 'covid_FNIR'],
  'politics': ['fake_news_dataset', 'isot_dataset', 'liar_dataset', 'pheme', 'politifact_dataset'],
  'science': ['climate_dataset', 'isot_multipurpose_small'],
  'social_media': ['gossipcop', 'isot_dataset']
}

data = pd.concat([load_category_data(dataset, category) for category, datasets in testing_datasets.items() for dataset in datasets])
print(data.groupby('category').count())

                text   label  metadata  dataset  title  author
category                                                      
crime           1119    1119      1119     1119      0       0
health         13521   13528     13528    13528   5939       0
politics      106071  106110    106110   106110      0    6424
science         2907    2907      2907     2907      0       0
social_media   67038   67038     67038    67038      0       0


In [20]:
# keep only the text and category columns
data = data[['text', 'category', 'label', 'dataset']]
data.dropna(inplace=True)
print(data.groupby('category').count())
logging.info(f'Number of samples: {len(data)}')
logging.info(data.groupby('category').count())

                text   label  dataset
category                             
crime           1119    1119     1119
health         13521   13521    13521
politics      106071  106071   106071
science         2907    2907     2907
social_media   67038   67038    67038


In [21]:
categories = ['crime', 'health', 'politics', 'science', 'social_media']
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Domain-specific Detection

In [22]:
classification_tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
classification_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(categories))
classification_model_state_dir = 'models/classification/distilbert-base-uncased-2023-12-13_17-42-42.pt'
classification_model.load_state_dict(torch.load(classification_model_state_dir, map_location=device))
classification_model.to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [23]:
detection_models = {
  'crime': {
    'model': DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2),
    'tokenizer': DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased'),
    'state_dir': 'models/detection/bert_model_wPwIZtuKPF.pt'
  },
  'health': {
    'model': DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2),
    'tokenizer': DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased'),
    'state_dir': 'models/detection/bert_model_VTSrphxrbr.pt'
  },
  'politics': {
    'model': BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2),
    'tokenizer': BertTokenizerFast.from_pretrained('bert-base-uncased'),
    'state_dir': 'models/detection/bert_model_DMaHx8fEE3.pt'
  },
  'science': {
    'model': BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2),
    'tokenizer': BertTokenizerFast.from_pretrained('bert-base-cased'),
    'state_dir': 'models/detection/bert_model_tV8F6Cjsy0.pt'
  },
  'social_media': {
    'model': DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2),
    'tokenizer': DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased'),
    'state_dir': 'models/detection/bert_model_mLXTbOJuAN.pt'
  }
}

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
S

In [24]:
# load the detection models
for category, model in detection_models.items():
  print(f'Loading {category} model')
  if model is not None:
    model['model'].load_state_dict(torch.load(model['state_dir'], map_location=device))
    model['model'].to(device)

Loading crime model
Loading health model
Loading politics model
Loading science model
Loading social_media model


In [25]:
def get_category(input: str):
    # tokenize the input
    input = classification_tokenizer(input, return_tensors='pt', padding=True, truncation=True)
    input = input.to(device)
    # get the prediction
    output = classification_model(**input)
    # get the prediction
    prediction = torch.argmax(output.logits, dim=1).item()
    return categories[prediction]

In [26]:
def get_detection(input: str, category: str):
    if detection_models[category] is None:
        return 0
    # tokenize the input
    input = detection_models[category]['tokenizer'](input, return_tensors='pt', padding=True, truncation=True)
    input = input.to(device)
    # get the prediction
    output = detection_models[category]['model'](**input)
    # get the prediction
    prediction = torch.argmax(output.logits, dim=1).item()
    return prediction

In [27]:
def get_prediction(input: str):
    category = get_category(input)
    detection = get_detection(input, category)
    return category, detection

In [28]:
# get the predictions 
results = []
start_time = time.time()
for index, row in data.iterrows():
    category, detection = get_prediction(row['text'])
    results.append({
        'text': row['text'],
        'category': row['category'],
        'label': row['label'],
        'dataset': row['dataset'],
        'category_prediction': category,
        'detection_prediction': detection
    })
print(f'Elapsed time: {time.time() - start_time}')
results = pd.DataFrame(results)

Elapsed time: 7406.596935033798


In [29]:
# write the results to a csv
results.to_csv('results.csv', index=False)
# drop the text column
results.drop(columns=['text'], inplace=True)

# Baseline Detection

In [30]:
# load the baseline detection model
baseline_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
baseline_model_state_dir = 'models/detection-baseline/bert-base-uncased-2023-12-21_20-36-38.pt'
baseline_model.load_state_dict(torch.load(baseline_model_state_dir, map_location=device))
baseline_model.to(device)
baseline_tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [31]:
# get the predictions
baseline_results = []
start_time = time.time()

for index, row in data.iterrows():
    # tokenize the input
    input = baseline_tokenizer(row['text'], return_tensors='pt', padding=True, truncation=True)
    input = input.to(device)
    # get the prediction
    output = baseline_model(**input)
    # get the prediction
    overall_prediction = torch.argmax(output.logits, dim=1).item()
    baseline_results.append({
        'text': row['text'],
        'category': row['category'],
        'label': row['label'],
        'dataset': row['dataset'],
        'prediction': overall_prediction
    })

In [32]:
# write the results to a csv
baseline_results = pd.DataFrame(baseline_results)
baseline_results.to_csv('baseline_results.csv', index=False)
# drop the text column
baseline_results.drop(columns=['text'], inplace=True)

# Metrics

## Domain-specific detection

In [89]:
# Category detection metrics
category_detection_prediction = precision_score(results['category'], results['category_prediction'], average='micro')
category_detection_recall = recall_score(results['category'], results['category_prediction'], average='micro')
category_detection_f1 = f1_score(results['category'], results['category_prediction'], average='micro')
category_detection_accuracy = accuracy_score(results['category'], results['category_prediction'])
category_detection_gmean = np.sqrt(category_detection_prediction * category_detection_recall)

print(f'Category precision: {category_detection_prediction}')
print(f'Category recall: {category_detection_recall}')
print(f'Category f-score: {category_detection_f1}')
print(f'Category accuracy: {category_detection_accuracy}')
print(f'Category g-mean: {category_detection_gmean}')

Category precision: 0.7284218697549514
Category recall: 0.7284218697549514
Category f-score: 0.7284218697549514
Category accuracy: 0.7284218697549514
Category g-mean: 0.7284218697549514


In [90]:
# Category detection metrics by category
category_detection_metrics = defaultdict(dict)
for category in categories:
    category_results = results[results['category'] == category]
    category_detection_metrics[category]['precision'] = precision_score(category_results['category'], category_results['category_prediction'], average='macro')
    category_detection_metrics[category]['recall'] = recall_score(category_results['category'], category_results['category_prediction'], average='macro')
    category_detection_metrics[category]['f1'] = f1_score(category_results['category'], category_results['category_prediction'], average='macro')
    category_detection_metrics[category]['accuracy'] = accuracy_score(category_results['category'], category_results['category_prediction'])
    category_detection_metrics[category]['gmean'] = np.sqrt(category_detection_metrics[category]['precision'] * category_detection_metrics[category]['recall'])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [91]:
# Category detection metrics by dataset
dataset_detection_metrics = defaultdict(dict)
for dataset in data['dataset'].unique():
    dataset_results = results[results['dataset'] == dataset]
    dataset_detection_metrics[dataset]['precision'] = precision_score(dataset_results['category'], dataset_results['category_prediction'], average='macro')
    dataset_detection_metrics[dataset]['recall'] = recall_score(dataset_results['category'], dataset_results['category_prediction'], average='macro')
    dataset_detection_metrics[dataset]['f1'] = f1_score(dataset_results['category'], dataset_results['category_prediction'], average='macro')
    dataset_detection_metrics[dataset]['accuracy'] = accuracy_score(dataset_results['category'], dataset_results['category_prediction'])
    dataset_detection_metrics[dataset]['gmean'] = np.sqrt(dataset_detection_metrics[dataset]['precision'] * dataset_detection_metrics[dataset]['recall'])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [92]:
dataset_detection_metrics

defaultdict(dict,
            {'FA-KES-Dataset': {'precision': 1.0,
              'recall': 1.0,
              'f1': 1.0,
              'accuracy': 1.0,
              'gmean': 1.0},
             'snope': {'precision': 0.3333333333333333,
              'recall': 0.326984126984127,
              'f1': 0.3301282051282051,
              'accuracy': 0.9809523809523809,
              'gmean': 0.330143467290675},
             'covid_claims': {'precision': 0.2,
              'recall': 0.19466950959488274,
              'f1': 0.19729875742841707,
              'accuracy': 0.9733475479744137,
              'gmean': 0.19731675529203432},
             'covid_fake_news_dataset': {'precision': 0.3333333333333333,
              'recall': 0.3255316875066795,
              'f1': 0.3293863206271965,
              'accuracy': 0.9765950625200385,
              'gmean': 0.3294094147140098},
             'covid_FNIR': {'precision': 0.2,
              'recall': 0.1908539799683711,
              'f1': 0.19531

In [93]:
# calculate the precision, f-score, accuracy, recall, and g-mean
overall_prediction = precision_score(results['label'], results['detection_prediction'])
overall_recall = recall_score(results['label'], results['detection_prediction'])
overall_f1 = f1_score(results['label'], results['detection_prediction'])
overall_accuracy = accuracy_score(results['label'], results['detection_prediction'])
overall_gmean = np.sqrt(overall_prediction * overall_recall)

# print
print(f'Overall precision: {overall_prediction}')
print(f'Overall recall: {overall_recall}')
print(f'Overall f-score: {overall_f1}')
print(f'Overall accuracy: {overall_accuracy}')
print(f'Overall g-mean: {overall_gmean}')

Overall precision: 0.9093540885577334
Overall recall: 0.6408861338120032
Overall f-score: 0.7518737001818111
Overall accuracy: 0.8010028533064787
Overall g-mean: 0.7634084267820888


In [94]:
# get the metrics for each category
overall_category_metrics = defaultdict(list)
for category in categories:
    category_results = results[results['category'] == category]
    detection_prediction = precision_score(category_results['label'], category_results['detection_prediction'])
    detection_recall = recall_score(category_results['label'], category_results['detection_prediction'])
    detection_f1 = f1_score(category_results['label'], category_results['detection_prediction'])
    detection_accuracy = accuracy_score(category_results['label'], category_results['detection_prediction'])
    detection_gmean = np.sqrt(detection_prediction * detection_recall)

    overall_category_metrics[category].append({
        'precision': detection_prediction,
        'recall': detection_recall,
        'f1': detection_f1,
        'accuracy': detection_accuracy,
        'gmean': detection_gmean
    })

    # print
    print(f'Category: {category}')
    print(f'Precision: {detection_prediction}')
    print(f'Recall: {detection_recall}')
    print(f'F-score: {detection_f1}')
    print(f'Accuracy: {detection_accuracy}')
    print(f'G-mean: {detection_gmean}')
    print()

Category: crime
Precision: 0.8362068965517241
Recall: 0.33856893542757416
F-score: 0.4819875776397516
Accuracy: 0.6273458445040214
G-mean: 0.5320842778758952

Category: health
Precision: 0.4735125260960334
Recall: 0.5631595282433272
F-score: 0.5144598809186277
Accuracy: 0.49338066711042083
G-mean: 0.5163943171778214

Category: politics
Precision: 0.9692012260423206
Recall: 0.4987742070354815
F-score: 0.6586112574970514
Accuracy: 0.7434925662999312
G-mean: 0.6952787735700515

Category: science
Precision: 0.8969404186795491
Recall: 0.8890662410215483
F-score: 0.8929859719438877
Accuracy: 0.9081527347781218
G-mean: 0.8929946508550431

Category: social_media
Precision: 0.9742554452511483
Recall: 0.9131023468962645
F-score: 0.9426881720430108
Accuracy: 0.9522957128792625
G-mean: 0.9431834039757531



In [95]:
# Get the metrics for each dataset
overall_dataset_metrics = defaultdict(list)
for dataset in data['dataset'].unique():
    dataset_results = results[results['dataset'] == dataset]
    detection_prediction = precision_score(dataset_results['label'], dataset_results['detection_prediction'])
    detection_recall = recall_score(dataset_results['label'], dataset_results['detection_prediction'])
    detection_f1 = f1_score(dataset_results['label'], dataset_results['detection_prediction'])
    detection_accuracy = accuracy_score(dataset_results['label'], dataset_results['detection_prediction'])
    detection_gmean = np.sqrt(detection_prediction * detection_recall)

    overall_dataset_metrics[dataset].append({
        'precision': detection_prediction,
        'recall': detection_recall,
        'f1': detection_f1,
        'accuracy': detection_accuracy,
        'gmean': detection_gmean
    })

    print(f'Dataset: {dataset}')
    print(f'Precision: {detection_prediction}')
    print(f'Recall: {detection_recall}')
    print(f'F-score: {detection_f1}')
    print(f'Accuracy: {detection_accuracy}')
    print(f'G-mean: {detection_gmean}')
    print()

Dataset: FA-KES-Dataset
Precision: 0.75
Recall: 0.007936507936507936
F-score: 0.015706806282722512
Accuracy: 0.5323383084577115
G-mean: 0.07715167498104596

Dataset: snope
Precision: 0.8377192982456141
Recall: 0.9794871794871794
F-score: 0.9030732860520094
Accuracy: 0.8698412698412699
G-mean: 0.9058340425489515

Dataset: covid_claims
Precision: 0.9568699938385705
Recall: 0.9761156505342552
F-score: 0.9663970130678282
Accuracy: 0.9616204690831557
G-mean: 0.9664449164398585

Dataset: covid_fake_news_dataset
Precision: 0.7900232018561485
Recall: 0.6436672967863895
F-score: 0.7093750000000001
Accuracy: 0.8210965052901571
G-mean: 0.7131003426848673

Dataset: covid_FNIR
Precision: 0.2693570187294845
Recall: 0.3675889328063241
F-score: 0.3108981502117228
Accuracy: 0.18502899314707433
G-mean: 0.3146627703727663

Dataset: fake_news_dataset
Precision: 0.8394648829431438
Recall: 0.14517061885482938
F-score: 0.24753451676528598
Accuracy: 0.5589807812725784
G-mean: 0.3490925902446987

Dataset: isot

# Baseline

In [96]:
# calculate the precision, f-score, accuracy, recall, and g-mean
baseline_prediction = precision_score(baseline_results['label'], baseline_results['prediction'])
baseline_recall = recall_score(baseline_results['label'], baseline_results['prediction'])
baseline_f1 = f1_score(baseline_results['label'], baseline_results['prediction'])
baseline_accuracy = accuracy_score(baseline_results['label'], baseline_results['prediction'])
baseline_gmean = np.sqrt(baseline_prediction * baseline_recall)

# print
print(f'Overall precision: {baseline_prediction}')
print(f'Overall recall: {baseline_recall}')
print(f'Overall f-score: {baseline_f1}')
print(f'Overall accuracy: {baseline_accuracy}')
print(f'Overall g-mean: {baseline_gmean}')

Overall precision: 0.5733203679772765
Overall recall: 0.4995819071722431
Overall f-score: 0.5339171879654453
Overall accuracy: 0.5896693521315878
Overall g-mean: 0.5351826630738145


In [97]:
# get the metrics for each category
baseline_category_metrics = defaultdict(list)
for category in categories:
    category_results = baseline_results[baseline_results['category'] == category]
    detection_prediction = precision_score(category_results['label'], category_results['prediction'])
    detection_recall = recall_score(category_results['label'], category_results['prediction'])
    detection_f1 = f1_score(category_results['label'], category_results['prediction'])
    detection_accuracy = accuracy_score(category_results['label'], category_results['prediction'])
    detection_gmean = np.sqrt(detection_prediction * detection_recall)

    baseline_category_metrics[category].append({
        'precision': detection_prediction,
        'recall': detection_recall,
        'f1': detection_f1,
        'accuracy': detection_accuracy,
        'gmean': detection_gmean
    })

    print(f'Category: {category}')
    print(f'Precision: {detection_prediction}')
    print(f'Recall: {detection_recall}')
    print(f'F-score: {detection_f1}')
    print(f'Accuracy: {detection_accuracy}')
    print(f'G-mean: {detection_gmean}')
    print()

Category: crime
Precision: 0.5257985257985258
Recall: 0.37347294938917974
F-score: 0.43673469387755104
Accuracy: 0.5067024128686327
G-mean: 0.4431382698599368

Category: health
Precision: 0.856718082140866
Recall: 0.7153941651148356
F-score: 0.7797040169133193
Accuracy: 0.8073367354485615
G-mean: 0.782873627804608

Category: politics
Precision: 0.5626445325404691
Recall: 0.5178737718314678
F-score: 0.5393316246251892
Accuracy: 0.5611335803376983
G-mean: 0.5397951891848294

Category: science
Precision: 0.5005324813631523
Recall: 0.3750997605746209
F-score: 0.42883211678832117
Accuracy: 0.5693154454764362
G-mean: 0.4333008353547675

Category: social_media
Precision: 0.5333072441081833
Recall: 0.4258089154284127
F-score: 0.4735338403922629
Accuracy: 0.5931859542349115
G-mean: 0.4765364405833213



In [98]:
# Get the metrics for each dataset
baseline_dataset_metrics = defaultdict(list)
for dataset in data['dataset'].unique():
    dataset_results = baseline_results[baseline_results['dataset'] == dataset]
    detection_prediction = precision_score(dataset_results['label'], dataset_results['prediction'])
    detection_recall = recall_score(dataset_results['label'], dataset_results['prediction'])
    detection_f1 = f1_score(dataset_results['label'], dataset_results['prediction'])
    detection_accuracy = accuracy_score(dataset_results['label'], dataset_results['prediction'])
    detection_gmean = np.sqrt(detection_prediction * detection_recall)

    baseline_dataset_metrics[dataset].append({
        'precision': detection_prediction,
        'recall': detection_recall,
        'f1': detection_f1,
        'accuracy': detection_accuracy,
        'gmean': detection_gmean
    })

    print(f'Dataset: {dataset}')
    print(f'Precision: {detection_prediction}')
    print(f'Recall: {detection_recall}')
    print(f'F-score: {detection_f1}')
    print(f'Accuracy: {detection_accuracy}')
    print(f'G-mean: {detection_gmean}')
    print()

Dataset: FA-KES-Dataset
Precision: 0.458128078817734
Recall: 0.24603174603174602
F-score: 0.3201376936316695
Accuracy: 0.5087064676616916
G-mean: 0.33572913358494294

Dataset: snope
Precision: 0.5931372549019608
Recall: 0.6205128205128205
F-score: 0.606516290726817
Accuracy: 0.5015873015873016
G-mean: 0.606670644576155

Dataset: covid_claims
Precision: 0.5694945848375451
Recall: 0.3966059082338152
F-score: 0.4675805854020007
Accuracy: 0.48933901918976547
G-mean: 0.4752524771673832

Dataset: covid_fake_news_dataset
Precision: 0.5175600739371534
Recall: 0.2646502835538752
F-score: 0.3502188868042526
Accuracy: 0.6668804103879449
G-mean: 0.3700978523629018

Dataset: covid_FNIR
Precision: 0.9911575562700965
Recall: 0.974703557312253
F-score: 0.9828616978876046
Accuracy: 0.9829994728518714
G-mean: 0.9828961267363824

Dataset: fake_news_dataset
Precision: 0.5765467625899281
Recall: 0.38625409678041256
F-score: 0.46259524359270376
Accuracy: 0.5515630268291508
G-mean: 0.47190417357324105

Datas

# Excel sheet generation

In [99]:
def generate_metrics_worksheet(workbook, sheet, sheet_name, category_metrics, dataset_metrics, 
                              prediction, recall, f1, accuracy, gmean):
    category_rows = len(category_detection_metrics)
    dataset_rows = len(dataset_detection_metrics)
 # add the metrics to the sheet
    sheet.write('A1', 'Category')
    sheet.write('B1', 'Precision')
    sheet.write('C1', 'Recall')
    sheet.write('D1', 'F1')
    sheet.write('E1', 'Accuracy')
    sheet.write('F1', 'G-Mean')

    # add the category metrics
    row = 1
    for category, metrics in category_metrics.items():
        metrics = metrics[0]
        sheet.write(row, 0, category)
        sheet.write(row, 1, metrics['precision'])
        sheet.write(row, 2, metrics['recall'])
        sheet.write(row, 3, metrics['f1'])
        sheet.write(row, 4, metrics['accuracy'])
        sheet.write(row, 5, metrics['gmean'])
        row += 1

    # add the dataset metrics
    sheet.write('H1', 'Dataset')
    sheet.write('I1', 'Precision')
    sheet.write('J1', 'Recall')
    sheet.write('K1', 'F1')
    sheet.write('L1', 'Accuracy')
    sheet.write('M1', 'G-Mean')
    row = 1
    for dataset, metrics in dataset_metrics.items():
        metrics = metrics[0]
        sheet.write(row, 7, dataset)
        sheet.write(row, 8, metrics['precision'])
        sheet.write(row, 9, metrics['recall'])
        sheet.write(row, 10, metrics['f1'])
        sheet.write(row, 11, metrics['accuracy'])
        sheet.write(row, 12, metrics['gmean'])
        row += 1

    # create a chart for the category metrics
    category_metrics_chart = workbook.add_chart({'type': 'column'})
    
    # add the category metrics to the chart
    category_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={sheet_name}!$B$2:$B${category_rows + 1}'
    })

    category_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={sheet_name}!$C$2:$C${category_rows + 1}'
    })

    category_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={sheet_name}!$D$2:$D${category_rows + 1}'
    })

    category_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={sheet_name}!$E$2:$E${category_rows + 1}'
    })

    category_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={sheet_name}!$F$2:$F${category_rows + 1}'
    })

    # between 0 and 1
    category_metrics_chart.set_y_axis({
        'min': 0,
        'max': 1
    })

    # add the chart to the sheet
    sheet.insert_chart('A10', category_metrics_chart)

    # create a chart for the dataset metrics
    dataset_metrics_chart = workbook.add_chart({'type': 'column'})

    # add the dataset metrics to the chart
    dataset_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={sheet_name}!$I$2:$I${dataset_rows + 1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={sheet_name}!$J$2:$J${dataset_rows + 1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={sheet_name}!$K$2:$K${dataset_rows + 1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={sheet_name}!$L$2:$L${dataset_rows + 1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={sheet_name}!$M$2:$M${dataset_rows + 1}'
    })

    # between 0 and 1
    dataset_metrics_chart.set_y_axis({
        'min': 0,
        'max': 1
    })

    # add the chart to the sheet
    sheet.insert_chart('H16', dataset_metrics_chart)

    # add the overall metrics
    sheet.write('H32', 'Overall')
    sheet.write('I32', prediction)
    sheet.write('J32', recall)
    sheet.write('K32', f1)
    sheet.write('L32', accuracy)
    sheet.write('M32', gmean)

    # create a chart for the overall metrics
    overall_metrics_chart = workbook.add_chart({'type': 'column'})

    # add the overall metrics to the chart
    overall_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={sheet_name}!$H$32',
        'values': f'={sheet_name}!$I$32'
    })

    overall_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={sheet_name}!$H$32',
        'values': f'={sheet_name}!$J$32'
    })

    overall_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={sheet_name}!$H$32',
        'values': f'={sheet_name}!$K$32'
    })

    overall_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={sheet_name}!$H$32',
        'values': f'={sheet_name}!$L$32'
    })

    overall_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={sheet_name}!$H$32',
        'values': f'={sheet_name}!$M$32'
    })

    # between 0 and 1
    overall_metrics_chart.set_y_axis({
        'min': 0,
        'max': 1
    })


    # add the chart to the sheet
    sheet.insert_chart('A25', overall_metrics_chart)



In [100]:
def comparison_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name):
    category_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name)
    dataset_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name)
    overall_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name)

def category_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name):
    category_rows = len(category_detection_metrics)
    # create a chart for the category metrics
    category_metrics_chart = workbook.add_chart({'type': 'column'})
    
    # add the category metrics to the chart, with series from both the metrics sheet and the baseline sheet. make ones of the same metric the same color (use material design colors)
    category_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={metrics_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={metrics_sheet_name}!$B$2:$B${category_rows + 1}',
        'fill': {'color': '#2196f3'}
    })
    category_metrics_chart.add_series({
        'name': 'Baseline Precision',
        'categories': f'={baseline_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={baseline_sheet_name}!$B$2:$B${category_rows + 1}',
        'fill': {'color': '#2196f3', 'transparency': 50}
    })

    category_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={metrics_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={metrics_sheet_name}!$C$2:$C${category_rows + 1}',
        'fill': {'color': '#f44336'}
    })
    category_metrics_chart.add_series({
        'name': 'Baseline Recall',
        'categories': f'={baseline_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={baseline_sheet_name}!$C$2:$C${category_rows + 1}',
        'fill': {'color': '#f44336', 'transparency': 50}
    })

    category_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={metrics_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={metrics_sheet_name}!$D$2:$D${category_rows + 1}',
        'fill': {'color': '#4caf50'}
    })
    category_metrics_chart.add_series({
        'name': 'Baseline F1',
        'categories': f'={baseline_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={baseline_sheet_name}!$D$2:$D${category_rows + 1}',
        'fill': {'color': '#4caf50', 'transparency': 50}
    })

    category_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={metrics_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={metrics_sheet_name}!$E$2:$E${category_rows + 1}',
        'fill': {'color': '#9c27b0'}
    })
    category_metrics_chart.add_series({
        'name': 'Baseline Accuracy',
        'categories': f'={baseline_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={baseline_sheet_name}!$E$2:$E${category_rows + 1}',
        'fill': {'color': '#9c27b0', 'transparency': 50}
    })

    category_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={metrics_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={metrics_sheet_name}!$F$2:$F${category_rows + 1}',
        'fill': {'color': '#009688'}
    })
    category_metrics_chart.add_series({
        'name': 'Baseline G-Mean',
        'categories': f'={baseline_sheet_name}!$A$2:$A${category_rows + 1}',
        'values': f'={baseline_sheet_name}!$F$2:$F${category_rows + 1}',
        'fill': {'color': '#009688', 'transparency': 50}
    })

    # make chart bigger
    category_metrics_chart.set_size({'width': 900, 'height': 450})

    # make range between 0 and 1
    category_metrics_chart.set_y_axis({'min': 0, 'max': 1})

    # add the chart to the sheet
    sheet.insert_chart('B2', category_metrics_chart)

    
def dataset_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name):
    dataset_rows = len(dataset_detection_metrics)
    # create a chart for the dataset metrics
    dataset_metrics_chart = workbook.add_chart({'type': 'column'})

    # add the dataset metrics to the chart, with series from both the metrics sheet and the baseline sheet. make ones of the same metric the same color (use material design colors)
    dataset_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={metrics_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={metrics_sheet_name}!$I$2:$I${dataset_rows + 1}',
        'fill': {'color': '#2196f3'}
    })
    dataset_metrics_chart.add_series({
        'name': 'Baseline Precision',
        'categories': f'={baseline_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={baseline_sheet_name}!$I$2:$I${dataset_rows + 1}',
        'fill': {'color': '#2196f3', 'transparency': 50}
    })

    dataset_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={metrics_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={metrics_sheet_name}!$J$2:$J${dataset_rows + 1}',
        'fill': {'color': '#f44336'}
    })
    dataset_metrics_chart.add_series({
        'name': 'Baseline Recall',
        'categories': f'={baseline_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={baseline_sheet_name}!$J$2:$J${dataset_rows + 1}',
        'fill': {'color': '#f44336', 'transparency': 50}
    })

    dataset_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={metrics_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={metrics_sheet_name}!$K$2:$K${dataset_rows + 1}',
        'fill': {'color': '#4caf50'}
    })
    dataset_metrics_chart.add_series({
        'name': 'Baseline F1',
        'categories': f'={baseline_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={baseline_sheet_name}!$K$2:$K${dataset_rows + 1}',
        'fill': {'color': '#4caf50', 'transparency': 50}
    })

    dataset_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={metrics_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={metrics_sheet_name}!$L$2:$L${dataset_rows + 1}',
        'fill': {'color': '#9c27b0'}
    })
    dataset_metrics_chart.add_series({
        'name': 'Baseline Accuracy',
        'categories': f'={baseline_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={baseline_sheet_name}!$L$2:$L${dataset_rows + 1}',
        'fill': {'color': '#9c27b0', 'transparency': 50}
    })

    dataset_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={metrics_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={metrics_sheet_name}!$M$2:$M${dataset_rows + 1}',
        'fill': {'color': '#009688'}
    })
    dataset_metrics_chart.add_series({
        'name': 'Baseline G-Mean',
        'categories': f'={baseline_sheet_name}!$H$2:$H${dataset_rows + 1}',
        'values': f'={baseline_sheet_name}!$M$2:$M${dataset_rows + 1}',
        'fill': {'color': '#009688', 'transparency': 50}
    })

    # make chart bigger
    dataset_metrics_chart.set_size({'width': 900, 'height': 450})

    # make range between 0 and 1
    dataset_metrics_chart.set_y_axis({'min': 0, 'max': 1})

    # add the chart to the sheet
    sheet.insert_chart('B25', dataset_metrics_chart)


def overall_metrics_charts(workbook, sheet, metrics_sheet_name, baseline_sheet_name):
    # create a chart for the overall metrics
    overall_metrics_chart = workbook.add_chart({'type': 'column'})

    # add the overall metrics to the chart, with series from both the metrics sheet and the baseline sheet. make ones of the same metric the same color (use material design colors)
    overall_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={metrics_sheet_name}!$H$32',
        'values': f'={metrics_sheet_name}!$I$32',
        'fill': {'color': '#2196f3'}
    })
    overall_metrics_chart.add_series({
        'name': 'Baseline Precision',
        'categories': f'={baseline_sheet_name}!$H$32',
        'values': f'={baseline_sheet_name}!$I$32',
        'fill': {'color': '#2196f3', 'transparency': 50}
    })

    overall_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={metrics_sheet_name}!$H$32',
        'values': f'={metrics_sheet_name}!$J$32',
        'fill': {'color': '#f44336'}
    })
    overall_metrics_chart.add_series({
        'name': 'Baseline Recall',
        'categories': f'={baseline_sheet_name}!$H$32',
        'values': f'={baseline_sheet_name}!$J$32',
        'fill': {'color': '#f44336', 'transparency': 50}
    })

    overall_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={metrics_sheet_name}!$H$32',
        'values': f'={metrics_sheet_name}!$K$32',
        'fill': {'color': '#4caf50'}
    })
    overall_metrics_chart.add_series({
        'name': 'Baseline F1',
        'categories': f'={baseline_sheet_name}!$H$32',
        'values': f'={baseline_sheet_name}!$K$32',
        'fill': {'color': '#4caf50', 'transparency': 50}
    })

    overall_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={metrics_sheet_name}!$H$32',
        'values': f'={metrics_sheet_name}!$L$32',
        'fill': {'color': '#9c27b0'}
    })
    overall_metrics_chart.add_series({
        'name': 'Baseline Accuracy',
        'categories': f'={baseline_sheet_name}!$H$32',
        'values': f'={baseline_sheet_name}!$L$32',
        'fill': {'color': '#9c27b0', 'transparency': 50}
    })

    overall_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={metrics_sheet_name}!$H$32',
        'values': f'={metrics_sheet_name}!$M$32',
        'fill': {'color': '#009688'}
    })
    overall_metrics_chart.add_series({
        'name': 'Baseline G-Mean',
        'categories': f'={baseline_sheet_name}!$H$32',
        'values': f'={baseline_sheet_name}!$M$32',
        'fill': {'color': '#009688', 'transparency': 50}
    })

    # make chart bigger
    overall_metrics_chart.set_size({'width': 900, 'height': 450})

    # make range between 0 and 1
    overall_metrics_chart.set_y_axis({'min': 0, 'max': 1})

    # add the chart to the sheet
    sheet.insert_chart('Q2', overall_metrics_chart)
    

In [101]:
def set_column_widths(sheet):
    sheet.set_column('A:A', 20)
    sheet.set_column('B:B', 12)
    sheet.set_column('C:C', 12)
    sheet.set_column('D:D', 12)
    sheet.set_column('E:E', 12)
    sheet.set_column('F:F', 12)
    sheet.set_column('H:H', 20)
    sheet.set_column('I:I', 12)
    sheet.set_column('J:J', 12)
    sheet.set_column('K:K', 12)
    sheet.set_column('L:L', 12)
    sheet.set_column('M:M', 12)

In [102]:
def category_detection_charts(workbook, sheet, sheet_name, category_detection_metrics, dataset_detection_metrics, prediction, recall, f1, accuracy, gmean):
    category_rows = len(category_detection_metrics)
    dataset_rows = len(dataset_detection_metrics)
    # add the metrics to the sheet
    sheet.write('A1', 'Category')
    sheet.write('B1', 'Precision')
    sheet.write('C1', 'Recall')
    sheet.write('D1', 'F1')
    sheet.write('E1', 'Accuracy')
    sheet.write('F1', 'G-Mean')

    # add the category metrics
    row = 1
    for category, metrics in category_detection_metrics.items():
        sheet.write(row, 0, category)
        sheet.write(row, 1, metrics['precision'])
        sheet.write(row, 2, metrics['recall'])
        sheet.write(row, 3, metrics['f1'])
        sheet.write(row, 4, metrics['accuracy'])
        sheet.write(row, 5, metrics['gmean'])
        row += 1

    # add the dataset metrics
    sheet.write('H1', 'Dataset')
    sheet.write('I1', 'Precision')
    sheet.write('J1', 'Recall')
    sheet.write('K1', 'F1')
    sheet.write('L1', 'Accuracy')
    sheet.write('M1', 'G-Mean')
    row = 1
    for dataset, metrics in dataset_detection_metrics.items():
        sheet.write(row, 7, dataset)
        sheet.write(row, 8, metrics['precision'])
        sheet.write(row, 9, metrics['recall'])
        sheet.write(row, 10, metrics['f1'])
        sheet.write(row, 11, metrics['accuracy'])
        sheet.write(row, 12, metrics['gmean'])
        row += 1

    # add the overall metrics
    sheet.write('A8', 'Overall')
    sheet.write('B8', prediction)
    sheet.write('C8', recall)
    sheet.write('D8', f1)
    sheet.write('E8', accuracy)
    sheet.write('F8', gmean)

    # create a chart for the category metrics
    category_metrics_chart = workbook.add_chart({'type': 'column'})
    
    # add the category metrics to the chart
    category_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={sheet_name}!$A$2:$A${category_rows+1}',
        'values': f'={sheet_name}!$B$2:$B${category_rows+1}'
    })

    category_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={sheet_name}!$A$2:$A${category_rows+1}',
        'values': f'={sheet_name}!$C$2:$C${category_rows+1}'
    })

    category_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={sheet_name}!$A$2:$A${category_rows+1}',
        'values': f'={sheet_name}!$D$2:$D${category_rows+1}'
    })

    category_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={sheet_name}!$A$2:$A${category_rows+1}',
        'values': f'={sheet_name}!$E$2:$E${category_rows+1}'
    })

    category_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={sheet_name}!$A$2:$A${category_rows+1}',
        'values': f'={sheet_name}!$F$2:$F${category_rows+1}'
    })

    # make chart bigger
    category_metrics_chart.set_size({'width': 600, 'height': 450})

    # make the sheet between 0 and 1
    category_metrics_chart.set_y_axis({'min': 0, 'max': 1})

    # add the chart to the sheet
    sheet.insert_chart('A10', category_metrics_chart)

    # create a chart for the dataset metrics
    dataset_metrics_chart = workbook.add_chart({'type': 'column'})

    # add the dataset metrics to the chart
    dataset_metrics_chart.add_series({
        'name': 'Precision',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows+1}',
        'values': f'={sheet_name}!$I$2:$I${dataset_rows+1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'Recall',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows+1}',
        'values': f'={sheet_name}!$J$2:$J${dataset_rows+1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'F1',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows+1}',
        'values': f'={sheet_name}!$K$2:$K${dataset_rows+1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'Accuracy',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows+1}',
        'values': f'={sheet_name}!$L$2:$L${dataset_rows+1}'
    })

    dataset_metrics_chart.add_series({
        'name': 'G-Mean',
        'categories': f'={sheet_name}!$H$2:$H${dataset_rows+1}',
        'values': f'={sheet_name}!$M$2:$M${dataset_rows+1}'
    })

    # make chart bigger
    dataset_metrics_chart.set_size({'width': 600, 'height': 450})

    # make the sheet between 0 and 1
    dataset_metrics_chart.set_y_axis({'min': 0, 'max': 1})

    # add the chart to the sheet
    sheet.insert_chart('A33', dataset_metrics_chart)
    


In [103]:
# write the results to sheets in an excel file
with pd.ExcelWriter('results.xlsx', engine='xlsxwriter') as writer:
    results.to_excel(writer, index=False, sheet_name='Domain specific results')
    baseline_results.to_excel(writer, index=False, sheet_name='Baseline results')

    workbook = writer.book

    # create a sheet for the metrics graphs
    metrics_sheet = workbook.add_worksheet('Domain_Specific_Metrics')

    generate_metrics_worksheet(workbook, metrics_sheet, 'Domain_Specific_Metrics', overall_category_metrics, overall_dataset_metrics, 
                              overall_prediction, overall_recall, overall_f1, overall_accuracy, overall_gmean)
    set_column_widths(metrics_sheet)

    
    # create a sheet for the baseline metrics graphs
    baseline_metrics_sheet = workbook.add_worksheet('Baseline_metrics')

    generate_metrics_worksheet(workbook, baseline_metrics_sheet, 'Baseline_metrics', baseline_category_metrics, baseline_dataset_metrics,
                                baseline_prediction, baseline_recall, baseline_f1, baseline_accuracy, baseline_gmean)
    set_column_widths(baseline_metrics_sheet)

    # create a sheet for comparing the metrics
    compare_metrics_sheet = workbook.add_worksheet('Compare_metrics')

    # add charts for the category metrics
    comparison_metrics_charts(workbook, compare_metrics_sheet, 'Domain_Specific_Metrics', 'Baseline_metrics')

    # create a sheet for the category detection metrics
    category_detection_sheet = workbook.add_worksheet('Category_detection')

    category_detection_charts(workbook, category_detection_sheet, 'Category_detection', category_detection_metrics, dataset_detection_metrics,
                                category_detection_prediction, category_detection_recall, category_detection_f1, category_detection_accuracy, category_detection_gmean)
    
    set_column_widths(category_detection_sheet)