# F1 scores of embeddings from fine-tuning

 The notebook processes through test data & fine-tuned BERT models and generates CSV files containing F1 scores, specifically F1_micro, F1_weighted, and F1_macro scores as well as TXT files with predictions of the 3 independent rounds for each word for the further analysis

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Parsing gold_dataset, outputs from FT-models predictions and senses maps

In [None]:
import os
import json

def parse_test_data(data_path):
    directories = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    directories = sorted(directories)
    PRED = {}
    for folder in directories:
      words = [d for d in os.listdir(data_path+'/'+folder)]
      for word_file in words:
        if 'test.gold' in word_file:
          path = f"{data_path}/{folder}/{word_file}"
          with open(path, "r", encoding="utf-8") as f:
            p = []
            lines = f.readlines()
            for line in lines:
              line = line.strip('\ufeff')
              p.append(int(line.strip('\n')))
              PRED[word_file+'_'+folder] = p
    return PRED, directories
gold_path = '/content/drive/MyDrive/RD_project/MERGED_DATA/WSD_full'
t, words = parse_test_data(gold_path)
# for k, v in t.items():
#   print(f'{k}:{v}')

In [None]:
import os
import json


def parse_out_data(data_path):
    directories = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
    PRED = {}
    for folder in directories[0:len(directories)]:
      words = [d for d in os.listdir(data_path+'/'+folder)]
      for word_file in words:
        if '_acc' not in word_file:
          path = f"{data_path}/{folder}/{word_file}"
          with open(path, "r") as f:
            p = []
            lines = f.readlines()
            for line in lines:
              p.append(int(line.strip('\n')))
              PRED[folder+'_'+word_file] = p
    return PRED
data_path = '/content/drive/MyDrive/RD_project/output'
data_path_2 = '/content/drive/MyDrive/RD_project/output2'
data_path_3 = '/content/drive/MyDrive/RD_project/output3'


d1 = parse_out_data(data_path)
d2 = parse_out_data(data_path_2)
d3 = parse_out_data(data_path_3)


In [None]:
import os
import json

w_classes = {}
def parse_classes(path):
    directories = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    PRED = {}
    for w_folder in directories:
      files = [d for d in os.listdir(path+'/'+w_folder)]
      #print("files:", files)
      for file in files:
          #print(file)
          if file == 'classes_map.txt':
              file_path = os.path.join(path, w_folder, file)
              with open(file_path, 'r', encoding='utf-8') as f:
                  for line in f:
                      classes = json.loads(line)
                      classes["ALL"] = 'ALL'
                      w_classes[w_folder] = classes
    return (w_classes)

classes_path = '/content/drive/MyDrive/RD_project/MERGED_DATA/WSD_full'
w_classes = parse_classes(classes_path)
print(w_classes)

#Calculating F1-scores for all FT-models

In [None]:
from inspect import modulesbyfile
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.metrics import f1_score, precision_score, recall_score
import os

def gold_info(GOLD_test):
    gold_by_senses = {}
    all_preds = {}
    for n in range(len(set(GOLD_test))):
        list_all = [i for i, sense in enumerate(GOLD_test) if sense == n]
        gold_by_senses[str(n)] = len(list_all)
        all_preds['all_%d' %(n)] = list_all
    return gold_by_senses, all_preds

def correct_pred(GOLD_test, PRED, all_preds):
    correct_pred = {}
    for n in range(len(set(GOLD_test))):
        correct_pred[str(n)] = sum(1 for i, pred in enumerate(PRED) if i in all_preds['all_%d' %(n)] and PRED[i] == GOLD_test[i])
    return correct_pred

def accuracies(GOLD_BY_SENSES, correct):
    senses_accs = {}
    for s in GOLD_BY_SENSES:
        if s in correct:
            sense_acc = correct[s] / GOLD_BY_SENSES[s]
            senses_accs[s] = round(sense_acc, 3)
        else:
            senses_accs[s] = 0
    senses_accs['ALL'] = round(sum(correct.values()) / sum(GOLD_BY_SENSES.values()), 3)
    return senses_accs

def process_data(data, gold_test, w_classes, result_path, summary_path):
    summary = {
        'word': [],
        'sense': [],
        'number of instances': [],
        'f1_micro': [],
        'f1_weighted': [],
        'f1_macro': []
    }

    for word in words:
        f1_scores = {model: 0 for model in ['ru', 'm', 'b']}
        weighted_f1 = {model: 0 for model in ['ru', 'm', 'b']}
        accuracy = {model: [] for model in ['ru', 'm', 'b']}
        predictions = {model: [] for model in ['ru', 'm', 'b']}

        for d in data:
            if f'test.gold.txt_{word}' in t.keys() and f'multilingual_{word}.txt' in d.keys():
                gold_test_word = t[f'test.gold.txt_{word}']
                pred_m = d[f'multilingual_{word}.txt']
                pred_b = d[f'other_model_{word}.txt']
                pred_ru = d[f'rubert_{word}.txt']
                gold_by_senses, all_preds = gold_info(gold_test_word)
                m = correct_pred(gold_test_word, pred_m, all_preds)
                r = correct_pred(gold_test_word, pred_ru, all_preds)
                b = correct_pred(gold_test_word, pred_b, all_preds)

                accuracy['ru'].append(accuracies(gold_by_senses, r))
                accuracy['m'].append(accuracies(gold_by_senses, m))
                accuracy['b'].append(accuracies(gold_by_senses, b))

                for model, pred in zip(['ru', 'm', 'b'], [pred_ru, pred_m, pred_b]):
                    f1_scores[model] += np.round(f1_score(gold_test_word, pred, average=None), 3)
                    weighted_f1[model] += f1_score(gold_test_word, pred, average='weighted')
                    predictions[model].append(pred)

        for model in ['ru', 'm', 'b']:

            avg_accuracy = {k1: round(((x + y + z) / 3),3) for (k1, x), (k2, y), (k3, z) in zip(accuracy[model][0].items(),
                                                                                                accuracy[model][1].items(),
                                                                                                accuracy[model][2].items())}


            #avg_accuracy = {k: round(sum(v) / len(v), 3) for k, v in accuracy[model][i].items() for i in range(3)}
            summary['word'].extend([word] * len(avg_accuracy))
            summary['sense'].extend(list(w_classes[word].values()))
            summary['number of instances'].extend(list(gold_by_senses.values()) + [sum(gold_by_senses.values())])
            summary['f1_micro'].extend(list(avg_accuracy.values()))
            summary['f1_weighted'].extend(list(avg_accuracy.values())[:-1] + [round(weighted_f1[model] / len(data), 3)])
            summary['f1_macro'].extend(list(np.round(f1_scores[model] / len(data), 3)) +
                                       [sum(np.round(f1_scores[model] / len(data), 3)) / len(np.round(f1_scores[model] / len(data), 3))])

        with open(result_path, 'a', encoding='utf-8') as file:
            file.write('%s,%s,%s\n'  % ('word', word, 'results:'))
            for prediction in predictions['ru']:
                file.write('%s\n' % (prediction))

    df = pd.DataFrame(summary)
    df.to_csv(summary_path, index=False)

# Define file paths
file_paths = {
    'ru_summary_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/DeepPavlov/summary_f1_all.csv',
    'm_summary_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/mBERT/summary_f1_all.csv',
    'b_summary_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/bBERT/summary_f1_all.csv',
    'ru_result_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/DeepPavlov/all_results.txt',
    'm_result_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/mBERT/all_results.txt',
    'b_result_path': '/content/drive/MyDrive/RD_project/results/WSD_full/ft/bBERT/all_results.txt'
}

# Create directories if they don't exist
for file_path in file_paths.values():
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

process_data([d1, d2, d3], t, w_classes, file_paths['ru_result_path'], file_paths['ru_summary_path'])
process_data([d1, d2, d3], t, w_classes, file_paths['m_result_path'], file_paths['m_summary_path'])
process_data([d1, d2, d3], t, w_classes, file_paths['b_result_path'], file_paths['b_summary_path'])