# F1 scores of embeddings from fine-tuning

 The notebook processes test data through a fine-tuned BERT model and generates CSV files containing F1 scores, specifically F1_micro, F1_weighted, and F1_macro scores.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import json

def parse_test_data(data_path):
    directories = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    directories = sorted(directories)
    PRED = {}
    for folder in directories:
      words = [d for d in os.listdir(data_path+'/'+folder)]
      for word_file in words:
        if 'test.gold' in word_file:
          path = f"{data_path}/{folder}/{word_file}"
          with open(path, "r", encoding="utf-8") as f:
            p = []
            lines = f.readlines()
            for line in lines:
              line = line.strip('\ufeff')
              p.append(int(line.strip('\n')))
              PRED[word_file+'_'+folder] = p
    return PRED, directories
gold_path = '/content/drive/MyDrive/RD_project/MERGED_DATA/WSD_full'
t, words = parse_test_data(gold_path)
for k, v in t.items():
  print(f'{k}:{v}')

In [None]:
import os
import json


def parse_out_data(data_path):
    directories = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    PRED = {}
    for folder in directories[1:len(directories)]:
      words = [d for d in os.listdir(data_path+'/'+folder)]
      for word_file in words:
        if '_acc' not in word_file:
          path = f"{data_path}/{folder}/{word_file}"
          with open(path, "r") as f:
            p = []
            lines = f.readlines()
            for line in lines:
              p.append(int(line.strip('\n')))
              PRED[folder+'_'+word_file] = p
    return PRED
data_path = '/content/drive/MyDrive/RD_project/output'

d = parse_out_data(data_path)
for k, v in d.items():
  print(f'{k}:{v}')


In [None]:
import os
import json

w_classes = {}
def parse_classes(path):
    directories = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    PRED = {}
    for w_folder in directories:
      files = [d for d in os.listdir(path+'/'+w_folder)]
      #print("files:", files)
      for file in files:
          #print(file)
          if file == 'classes_map.txt':
              file_path = os.path.join(path, w_folder, file)
              with open(file_path, 'r', encoding='utf-8') as f:
                  for line in f:
                      classes = json.loads(line)
                      classes["ALL"] = 'ALL'
                      w_classes[w_folder] = classes
    return (w_classes)

classes_path = '/content/drive/MyDrive/RD_project/MERGED_DATA/WSD_full'
w_classes = parse_classes(classes_path)
print(w_classes)

In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score

ru_summary_path = '/content/drive/MyDrive/RD_project/results/WSD_full/ft/DeepPavlov/summary_f1.csv'
m_summary_path = '/content/drive/MyDrive/RD_project/results/WSD_full/ft/mBERT/summary_f1.csv'

ru_directory = os.path.dirname(ru_summary_path)
m_directory = os.path.dirname(m_summary_path)

# Create the directory if it doesn't exist
if not os.path.exists(ru_directory):
    os.makedirs(ru_directory)
if not os.path.exists(m_directory):
    os.makedirs(m_directory)

def gold_info(GOLD_test):
    gold_by_senses = {}
    all_preds = {}
    for n in range(len(set(GOLD_test))):
        list_all = []
        for i, sense in enumerate(GOLD_test):
            if sense == n:
                list_all.append(i)
            gold_by_senses[str(n)] = len(list_all)
            all_preds['all_%d' %(n)] =list_all
    #print(gold_by_senses)
    return gold_by_senses, all_preds


def correct_pred(GOLD_test, PRED, all_preds):
    correct_pred = {}
    for n in range(len(set(GOLD_test))):
        for i, pred in enumerate(PRED):
            if i in all_preds['all_%d' %(n)] and PRED[i] == GOLD_test[i]:
                correct_pred[str(n)] = correct_pred.get(str(n), 0) + 1
    return correct_pred


def accuracies(GOLD_BY_SENSES, correct):
    senses_accs = {}
    ALL = 0
    for s in gold_by_senses:
        if s in correct:
            sense_acc = correct[s] / gold_by_senses[s]
            senses_accs[s] = round(sense_acc,3)
        else:
            senses_accs[s] = 0
    ALL = sum(correct.values())/sum(GOLD_BY_SENSES.values())
    senses_accs['ALL'] = round(ALL,3)
    return senses_accs


r_summary = {
    'word': [],
    'sense': [],
    'number of instances': [],
    'f1_micro': [],
    'f1_weighted': [],
    'f1_macro': []
}
m_summary = {
    'word': [],
    'sense': [],
    'number of instances': [],
    'f1_micro': [],
    'f1_weighted': [],
    'f1_macro': []
}
for word in words:

    if (f'test.gold.txt_%s' %word in t.keys() and
        f'multilingual_%s.txt' %word in d.keys() or
        f'rubert_%s.txt' %word in d.keys()):
        GOLD_test = t['test.gold.txt_%s' %word]
        PRED_M = d['multilingual_%s.txt' %word]
        PRED_RU = d['rubert_%s.txt' %word]
        gold_by_senses, all_preds = gold_info(GOLD_test)
        m = correct_pred(GOLD_test, PRED_M, all_preds)
        r = correct_pred(GOLD_test, PRED_RU, all_preds)
        ru_accuracy = accuracies(gold_by_senses, r)
        m_accuracy = accuracies(gold_by_senses, m)
        ru_f1_macro = np.round(f1_score(GOLD_test, PRED_RU, average=None),3)
        m_f1_macro = np.round(f1_score(GOLD_test, PRED_M, average=None),3)
        ru_f1_weighted = f1_score(GOLD_test, PRED_RU, average='weighted')
        m_f1_weighted = f1_score(GOLD_test, PRED_M, average='weighted')

        r_summary['word'].extend([word] * len(ru_accuracy))
        r_summary['sense'].extend(list(w_classes[word].values()))
        r_summary['number of instances'].extend(list(gold_by_senses.values()) + [sum(gold_by_senses.values())])
        r_summary['f1_micro'].extend(list(ru_accuracy.values()))
        #print([round(ru_f1_weighted,3)])
        r_summary['f1_weighted'].extend(list(ru_accuracy.values())[:-1] + [round(ru_f1_weighted,3)])
        r_summary['f1_macro'].extend(list(ru_f1_macro) + [sum(ru_f1_macro/len(ru_f1_macro))])

        m_summary['word'].extend([word] * len(m_accuracy))
        m_summary['sense'].extend(list(m_accuracy.keys()))
        m_summary['number of instances'].extend(list(gold_by_senses.values()) + [sum(gold_by_senses.values())])
        m_summary['f1_micro'].extend(list(m_accuracy.values()))
        m_summary['f1_weighted'].extend(list(m_accuracy.values())[:-1] + [round(m_f1_weighted,3)])
        m_summary['f1_macro'].extend(list(m_f1_macro) + [sum(m_f1_macro/len(m_f1_macro))])


r_df = pd.DataFrame(r_summary)
m_df = pd.DataFrame(m_summary)

r_df.to_csv(ru_summary_path, index=False)
m_df.to_csv(m_summary_path, index=False)

