In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
import ast

In [3]:
csv_files = [
    'forms/TAR v.1.csv',
    'forms/TAR v.2.csv',
    'forms/TAR v.3.csv',
    'forms/TAR v.4.csv',
    'forms/TAR v.5.csv',
    'forms/TAR v.6.csv',
    'forms/TAR v.7.csv',
    'forms/TAR v.8.csv'
]

dataframes = []

for file in csv_files:
    df = pd.read_csv(file, header=None)
    df.columns = df.iloc[0].astype(str)
    df = df[1:]  
    dataframes.append(df)


In [4]:
percentage_results = {}
columns_of_interest = ['Spol', 'Dob', 'Stupanj obrazovanja', 'Radni status', 'Područje struke']

combined_df = pd.concat([df[columns_of_interest] for df in dataframes])

for column in columns_of_interest:
    if column in combined_df.columns:
        value_counts = combined_df[column].value_counts(normalize=True) * 100
        percentage_results[column] = value_counts

for column, percentages in percentage_results.items():
    print(f"Percentages for {column}:")
    print(percentages)
    print("\n")

Percentages for Spol:
Spol
Ženski                 55.844156
Muški                  41.558442
Ne želim odgovoriti     2.597403
Name: proportion, dtype: float64


Percentages for Dob:
Dob
18-24    40.259740
25-35    28.571429
55-64    12.987013
45-54    11.688312
35-44     5.194805
>64       1.298701
Name: proportion, dtype: float64


Percentages for Stupanj obrazovanja:
Stupanj obrazovanja
Visoka stručna sprema (diplomski studij, magisterij)    45.454545
Srednja stručna sprema                                  25.974026
Viša stručna sprema (preddiplomski studij)              23.376623
Specijalistički i postdiplomski studij                   3.896104
Niža stručna sprema                                      1.298701
Name: proportion, dtype: float64


Percentages for Radni status:
Radni status
Zaposlen        53.246753
Student         42.857143
Nezaposlen       2.597403
Umirovljenik     1.298701
Name: proportion, dtype: float64


Percentages for Područje struke:
Područje struke
Tehničke zna

In [5]:
df_spol = pd.DataFrame((combined_df['Spol'].value_counts(normalize=True) * 100).reset_index())
df_dob = pd.DataFrame((combined_df['Dob'].value_counts(normalize=True) * 100).reset_index())
df_obrazovanje = pd.DataFrame((combined_df['Stupanj obrazovanja'].value_counts(normalize=True) * 100).reset_index())
df_radni_status = pd.DataFrame((combined_df['Radni status'].value_counts(normalize=True) * 100).reset_index())
df_struka = pd.DataFrame((combined_df['Područje struke'].value_counts(normalize=True) * 100).reset_index())

df_spol.to_csv('annotator_data/gender.csv', index=False)
df_dob.to_csv('annotator_data/age.csv', index=False)
df_obrazovanje.to_csv('annotator_data/education.csv', index=False)
df_radni_status.to_csv('annotator_data/working_status.csv', index=False)
df_struka.to_csv('annotator_data/field.csv', index=False)

In [6]:
for df in dataframes:
    df.replace({"Različiti autori": 1, "Isti autori": 0}, inplace=True)
    print(df.shape)

(10, 33)
(13, 36)
(9, 39)
(14, 33)
(8, 43)
(7, 35)
(7, 36)
(9, 35)


In [7]:
filtered_dataframes = []
for df in dataframes:
    filtered_df = df.filter(regex="^Usporedi")
    filtered_dataframes.append(filtered_df)

In [8]:
csv_file_path = 'csvs\dataset_validation_allLevels.csv' 
df = pd.read_csv(csv_file_path)

In [9]:
filtered_dataframes[0]

Unnamed: 0,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragraf 1 i 2],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragraf 2 i 3],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 3 i 4],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 4 i 5],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 5 i 6],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 1 i 2],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 2 i 3],Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 3 i 4].1,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 1 i 2].1,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 2 i 3].1,...,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 5 i 6].1,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 1 i 2].2,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 2 i 3].2,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 3 i 4].2,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 1 i 2].3,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 2 i 3].3,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 3 i 4].3,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 4 i 5].1,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 5 i 6].2,Usporedi paragrafe. Je li ih napisala ista osoba ili se radi o različitim autorima? [Paragrafi 6 i 7]
1,1,0,1,1,0,1,0,1,0,1,...,0,0,0,1,0,0,0,0,0,0
2,1,1,0,1,0,1,1,1,1,1,...,0,0,1,1,1,0,0,1,1,1
3,0,1,0,0,1,1,1,1,0,1,...,1,1,0,1,1,0,1,1,1,1
4,1,1,0,1,0,1,0,1,0,1,...,0,0,0,0,0,1,0,0,1,0
5,1,1,0,0,1,1,1,1,0,1,...,1,0,1,1,1,0,1,1,0,1
6,1,1,0,1,0,0,1,0,0,0,...,1,0,0,0,1,0,1,1,0,1
7,1,1,1,0,0,1,1,1,1,0,...,1,1,1,0,1,0,0,0,1,1
8,1,1,0,0,1,1,1,0,0,1,...,1,1,0,1,0,0,0,0,1,0
9,1,1,0,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
10,1,0,0,1,1,1,1,0,1,1,...,1,1,1,0,0,0,0,1,1,0


In [10]:
annotator_id = 'hard-252'
annotator_change = df.loc[df['id'] == annotator_id, 'change'].values
annotator_change = ast.literal_eval(annotator_change[0])

In [11]:
annotator_change

[0, 1, 1, 0, 0]

In [12]:
annotator_agreement = []
for filtered_df in filtered_dataframes:
    first_5 = filtered_df.iloc[:, :5]
    annotator_agreement.append(first_5)

annotator_agreement = pd.concat(annotator_agreement, ignore_index=True)
row_concatenated_values = [row.values for _, row in annotator_agreement.iterrows()]
annotator_f1 = []
for i, el in enumerate(row_concatenated_values):
    current_f1 = f1_score(annotator_change, row_concatenated_values[i])
    annotator_f1.append(current_f1)

avg_annotator_f1 = np.mean(annotator_f1)
print("Annotator F1 score: ", avg_annotator_f1)

Annotator F1 score:  0.45355596784168206


In [13]:
import numpy as np
from sklearn.metrics import cohen_kappa_score
from itertools import combinations

# Convert the list to a numpy array for easier manipulation
data = np.array(row_concatenated_values)  # Shape should be (77, 5)

# Initialize variables to store kappa values
num_annotators = data.shape[0]
kappa_values = {}

# Iterate over all pairs of annotators
for i in range(num_annotators):
    for j in range(i+1, num_annotators):
        kappa = cohen_kappa_score(data[j], data[i])
        kappa_values[(i,j)] = kappa

In [14]:
the_same = True
for key, value in kappa_values.items():
    if np.isnan(value):
        annotator_1 = data[key[0]]
        annotator_2 = data[key[1]]
        if (not np.array_equal(annotator_1, annotator_2)):
            the_same = False

if the_same:
    print('All NaN are actually perfect agreement')
# everywhere there is NaN there is complete agreement so cohen's kappa = 1 (checked by hand)

All NaN are actually perfect agreement


In [15]:
values = []
for key, value in kappa_values.items():
    if np.isnan(value):
        kappa_values[key] = 1
        values.append(1)
    else:
        values.append(value)

np.mean(values)

0.037881249799749266

In [16]:
#v1
medium_534 = []
medium_677 = []
hard_51 = []
medium_84 = []

#v2
medium_492 = []
medium_564 = []
hard_72 = []
hard_656 = []

#v3
hard_370 = []
medium_17 = []
hard_501 = []
hard_784 = []

#v4
medium_659 = []
hard_555 = []
hard_3 = []
hard_624 = []

#v5
medium_445 = []
hard_808 = []
medium_85 = []
medium_746 = []

#v6
hard_547 = []
hard_862 = []
medium_192 = []
hard_24 = []

#v7
medium_31 = []
medium_277 = []
hard_121 = []
medium_889 = []

#v8
hard_118 = []
hard_762 = []
hard_693 = []
medium_253 = []

In [17]:
for i, filtered_df in enumerate(filtered_dataframes):
    if i == 0:  # prva anketa
        f = filtered_df.iloc[:, 5:8]
        medium_534.append(f)

        f = filtered_df.iloc[:, 8:13]
        medium_677.append(f)

        f = filtered_df.iloc[:, 13:16]
        hard_51.append(f)

        f = filtered_df.iloc[:, 16:22]
        medium_84.append(f)

    if i == 1:  # druga anketa
        f = filtered_df.iloc[:, 5:11]
        medium_492.append(f)

        f = filtered_df.iloc[:, 11:16]
        medium_564.append(f)

        f = filtered_df.iloc[:, 16:21]
        hard_72.append(f)

        f = filtered_df.iloc[:, 21:25]
        hard_656.append(f)

    if i == 2:  # treca anketa
        f = filtered_df.iloc[:, 5:10]
        hard_370.append(f)

        f = filtered_df.iloc[:, 10:17]
        medium_17.append(f)

        f = filtered_df.iloc[:, 17:24]
        hard_501.append(f)

        f = filtered_df.iloc[:, 24:28]
        hard_784.append(f)

    if i == 3:  # cetvrta anketa
        f = filtered_df.iloc[:, 5:8]
        medium_659.append(f)

        f = filtered_df.iloc[:, 8:13]
        hard_555.append(f)

        f = filtered_df.iloc[:, 13:18]
        hard_3.append(f)

        f = filtered_df.iloc[:, 18:22]
        hard_624.append(f)

    if i == 4:  # peta anketa
        f = filtered_df.iloc[:, 5:13]
        medium_445.append(f)

        f = filtered_df.iloc[:, 13:18]
        hard_808.append(f)

        f = filtered_df.iloc[:, 18:27]
        medium_85.append(f)

        f = filtered_df.iloc[:, 27:32]
        medium_746.append(f)

    if i == 5:  # sesta anketa
        f = filtered_df.iloc[:, 5:9]
        hard_547.append(f)

        f = filtered_df.iloc[:, 9:14]
        hard_862.append(f)

        f = filtered_df.iloc[:, 14:18]
        medium_192.append(f)

        f = filtered_df.iloc[:, 18:24]
        hard_24.append(f)

    if i == 6:  # sedma anketa
        f = filtered_df.iloc[:, 5:12]
        medium_31.append(f)

        f = filtered_df.iloc[:, 12:16]
        medium_277.append(f)

        f = filtered_df.iloc[:, 16:21]
        hard_121.append(f)

        f = filtered_df.iloc[:, 21:25]
        medium_889.append(f)

    if i == 7:  # osma anketa
        f = filtered_df.iloc[:, 5:10]
        hard_118.append(f)

        f = filtered_df.iloc[:, 10:15]
        hard_762.append(f)

        f = filtered_df.iloc[:, 15:19]
        hard_693.append(f)

        f = filtered_df.iloc[:, 19:24]
        medium_253.append(f)

medium_534 = np.vstack(medium_534)
medium_677 = np.vstack(medium_677)
hard_51 = np.vstack(hard_51)
medium_84 = np.vstack(medium_84)

medium_492 = np.vstack(medium_492)
medium_564 = np.vstack(medium_564)
hard_72 = np.vstack(hard_72)
hard_656 = np.vstack(hard_656)

hard_370 = np.vstack(hard_370)
medium_17 = np.vstack(medium_17)
hard_501 = np.vstack(hard_501)
hard_784 = np.vstack(hard_784)

medium_659 = np.vstack(medium_659)
hard_555 = np.vstack(hard_555)
hard_3 = np.vstack(hard_3)
hard_624 = np.vstack(hard_624)

medium_445 = np.vstack(medium_445)
hard_808 = np.vstack(hard_808)
medium_85 = np.vstack(medium_85)
medium_746 = np.vstack(medium_746)

hard_547 = np.vstack(hard_547)
hard_862 = np.vstack(hard_862)
medium_192 = np.vstack(medium_192)
hard_24 = np.vstack(hard_24)

medium_31 = np.vstack(medium_31)
medium_277 = np.vstack(medium_277)
hard_121 = np.vstack(hard_121)
medium_889 = np.vstack(medium_889)

hard_118 = np.vstack(hard_118)
hard_762 = np.vstack(hard_762)
hard_693 = np.vstack(hard_693)
medium_253 = np.vstack(medium_253)

In [18]:
predictions_v1 = [np.vstack(medium_534), np.vstack(medium_677), np.vstack(hard_51), np.vstack(medium_84)]
predictions_v2 = [np.vstack(medium_492), np.vstack(medium_564), np.vstack(hard_72), np.vstack(hard_656)]
predictions_v3 = [np.vstack(hard_370), np.vstack(medium_17), np.vstack(hard_501), np.vstack(hard_784)]
predictions_v4 = [np.vstack(medium_659), np.vstack(hard_555), np.vstack(hard_3), np.vstack(hard_624)]
predictions_v5 = [np.vstack(medium_445), np.vstack(hard_808), np.vstack(medium_85), np.vstack(medium_746)]
predictions_v6 = [np.vstack(hard_547), np.vstack(hard_862), np.vstack(medium_192), np.vstack(hard_24)]
predictions_v7 = [np.vstack(medium_31), np.vstack(medium_277), np.vstack(hard_121), np.vstack(medium_889)]
predictions_v8 = [np.vstack(hard_118), np.vstack(hard_762), np.vstack(hard_693), np.vstack(medium_253)]

In [19]:
predictions_v1[0]

array([[1, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 1],
       [1, 1, 1],
       [0, 1, 0],
       [1, 1, 1],
       [1, 1, 0],
       [0, 1, 0],
       [1, 1, 0]], dtype=int64)

In [20]:
ids_to_extract = [
    'medium-534', 'medium-677', 'hard-51', 'medium-84',
    'medium-492', 'medium-564', 'hard-72', 'hard-656',
    'hard-370', 'medium-17', 'hard-501', 'hard-656',
    'medium-659', 'hard-555', 'hard-3', 'hard-624',
    'medium-445', 'hard-808', 'medium-85', 'medium-746',
    'hard-547', 'hard-862', 'medium-192', 'hard-24',
    'medium-31', 'medium-277', 'hard-121', 'medium-889',
    'hard-118', 'hard-762', 'hard-693', 'medium-253'
]

In [21]:
changes = df[df['id'].isin(ids_to_extract)].set_index('id')['change'].to_dict()
changes = {key: ast.literal_eval(value) if isinstance(value, str) else value for key, value in changes.items()}

for id, change in changes.items():
    if(id == "medium-277"):
        changes[id] = np.delete(change,1) #jer nemamo jedan paragraf
    print(f"{id}: {change}")

medium-17: [1, 0, 1, 1, 1, 1, 1]
medium-31: [1, 0, 0, 0, 1, 1, 1]
medium-84: [0, 0, 0, 0, 1, 1]
medium-85: [1, 1, 0, 0, 1, 1, 1, 1, 1]
medium-192: [0, 0, 1, 0]
medium-253: [1, 1, 1, 1, 0]
medium-277: [0, 1, 1, 1, 0]
medium-445: [1, 0, 0, 0, 0, 1, 1, 1]
medium-492: [1, 1, 0, 0, 1, 1]
medium-534: [1, 1, 1]
medium-564: [0, 0, 1, 1, 0]
medium-659: [1, 1, 1]
medium-677: [1, 1, 1, 1, 0]
medium-746: [1, 1, 1, 1, 1]
medium-889: [1, 0, 0, 1]
hard-3: [1, 0, 1, 1, 1]
hard-24: [0, 0, 1, 0, 1, 1]
hard-51: [0, 0, 1]
hard-72: [0, 0, 1, 0, 1]
hard-118: [0, 1, 1, 0, 0]
hard-121: [0, 1, 1, 1, 0]
hard-370: [0, 1, 1, 0, 0]
hard-501: [0, 1, 1, 0, 1, 0, 0]
hard-547: [1, 1, 0, 1]
hard-555: [0, 1, 0, 0, 0]
hard-624: [1, 1, 0, 1]
hard-656: [0, 1, 0, 1]
hard-693: [1, 0, 0, 1]
hard-762: [1, 0, 0, 0, 0]
hard-808: [0, 0, 1, 1, 1]
hard-862: [0, 1, 1, 0, 1]


In [22]:
pd.DataFrame({'text-id': changes.keys(), 'changes': changes.values()})

Unnamed: 0,text-id,changes
0,medium-17,"[1, 0, 1, 1, 1, 1, 1]"
1,medium-31,"[1, 0, 0, 0, 1, 1, 1]"
2,medium-84,"[0, 0, 0, 0, 1, 1]"
3,medium-85,"[1, 1, 0, 0, 1, 1, 1, 1, 1]"
4,medium-192,"[0, 0, 1, 0]"
5,medium-253,"[1, 1, 1, 1, 0]"
6,medium-277,"[0, 1, 1, 0]"
7,medium-445,"[1, 0, 0, 0, 0, 1, 1, 1]"
8,medium-492,"[1, 1, 0, 0, 1, 1]"
9,medium-534,"[1, 1, 1]"


In [23]:
all_predictions = [
    predictions_v1, predictions_v2, predictions_v3, predictions_v4,
    predictions_v5, predictions_v6, predictions_v7, predictions_v8
]

all_ids = [
    ['medium-534', 'medium-677', 'hard-51', 'medium-84'],
    ['medium-492', 'medium-564', 'hard-72', 'hard-656'],
    ['hard-370', 'medium-17', 'hard-501', 'hard-656'],
    ['medium-659', 'hard-555', 'hard-3', 'hard-624'],
    ['medium-445', 'hard-808', 'medium-85', 'medium-746'],
    ['hard-547', 'hard-862', 'medium-192', 'hard-24'],
    ['medium-31', 'medium-277', 'hard-121', 'medium-889'],
    ['hard-118', 'hard-762', 'hard-693', 'medium-253']
]

In [24]:
predictions_dataframes = []

for version_idx, (predictions, ids) in enumerate(zip(all_predictions, all_ids), start=1):
    print(f"Version {version_idx}")

    dict_predictions = {
        'text-id': [],
        'annotator_label': []
    }
    for id_idx, (pred_set, id) in enumerate(zip(predictions, ids)):
        print(f"ID: {id}")

        ver_predictions = []
        for pred_idx, pred in enumerate(pred_set):
            print(f"Prediction {pred_idx + 1}: {pred}")
            ver_predictions.append(pred)

        dict_predictions['text-id'].append(id)
        dict_predictions['annotator_label'].append(ver_predictions)
    print("\n")

    df_version = pd.DataFrame(dict_predictions)
    predictions_dataframes.append(df_version)

Version 1
ID: medium-534
Prediction 1: [1 0 1]
Prediction 2: [1 1 1]
Prediction 3: [1 1 1]
Prediction 4: [1 0 1]
Prediction 5: [1 1 1]
Prediction 6: [0 1 0]
Prediction 7: [1 1 1]
Prediction 8: [1 1 0]
Prediction 9: [0 1 0]
Prediction 10: [1 1 0]
ID: medium-677
Prediction 1: [0 1 1 1 0]
Prediction 2: [1 1 1 0 0]
Prediction 3: [0 1 1 0 1]
Prediction 4: [0 1 0 0 0]
Prediction 5: [0 1 1 0 1]
Prediction 6: [0 0 1 1 1]
Prediction 7: [1 0 1 0 1]
Prediction 8: [0 1 0 0 1]
Prediction 9: [1 0 1 0 0]
Prediction 10: [1 1 0 0 1]
ID: hard-51
Prediction 1: [0 0 1]
Prediction 2: [0 1 1]
Prediction 3: [1 0 1]
Prediction 4: [0 0 0]
Prediction 5: [0 1 1]
Prediction 6: [0 0 0]
Prediction 7: [1 1 0]
Prediction 8: [1 0 1]
Prediction 9: [0 0 0]
Prediction 10: [1 1 0]
ID: medium-84
Prediction 1: [0 0 0 0 0 0]
Prediction 2: [1 0 0 1 1 1]
Prediction 3: [1 0 1 1 1 1]
Prediction 4: [0 1 0 0 1 0]
Prediction 5: [1 0 1 1 0 1]
Prediction 6: [1 0 1 1 0 1]
Prediction 7: [1 0 0 0 1 1]
Prediction 8: [0 0 0 0 1 0]
Predict

In [25]:
for i in range(8):
    df = predictions_dataframes[i]
    df.to_csv(f'annotator_labels/ver_{i+1}.csv', index=False)

In [26]:
all_f1_scores = {}

for version_idx, (predictions, ids) in enumerate(zip(all_predictions, all_ids), start=1):
    version_f1_scores = []
    print(f"Version {version_idx} Results:")
    texts = {}
    for pred_set, id in zip(predictions, ids):
        correct_change = np.array(changes[id], dtype=int)

        print(f"\nID: {id}")
        texts[id] = []
        for pred_idx, pred in enumerate(pred_set):
            if len(pred) == len(correct_change):  
                f1 = f1_score(correct_change, pred)
                texts[id].append(f1)
                version_f1_scores.append(f1)
                print(f"    F1 Score: {f1:.4f}")
    # Average the F1 scores for this version
    
    all_f1_scores[version_idx] = texts
    
    if version_f1_scores:
        average_f1 = np.mean(version_f1_scores)
        print(f"\nAverage F1 Score for Version {version_idx}: {average_f1:.4f}\n")

Version 1 Results:

ID: medium-534
    F1 Score: 0.8000
    F1 Score: 1.0000
    F1 Score: 1.0000
    F1 Score: 0.8000
    F1 Score: 1.0000
    F1 Score: 0.5000
    F1 Score: 1.0000
    F1 Score: 0.8000
    F1 Score: 0.5000
    F1 Score: 0.8000

ID: medium-677
    F1 Score: 0.8571
    F1 Score: 0.8571
    F1 Score: 0.5714
    F1 Score: 0.4000
    F1 Score: 0.5714
    F1 Score: 0.5714
    F1 Score: 0.5714
    F1 Score: 0.3333
    F1 Score: 0.6667
    F1 Score: 0.5714

ID: hard-51
    F1 Score: 1.0000
    F1 Score: 0.6667
    F1 Score: 0.6667
    F1 Score: 0.0000
    F1 Score: 0.6667
    F1 Score: 0.0000
    F1 Score: 0.0000
    F1 Score: 0.6667
    F1 Score: 0.0000
    F1 Score: 0.0000

ID: medium-84
    F1 Score: 0.0000
    F1 Score: 0.6667
    F1 Score: 0.5714
    F1 Score: 0.5000
    F1 Score: 0.3333
    F1 Score: 0.3333
    F1 Score: 0.8000
    F1 Score: 0.6667
    F1 Score: 0.0000
    F1 Score: 0.5000

Average F1 Score for Version 1: 0.5552

Version 2 Results:

ID: medium-492
    F

In [27]:
all_f1_scores[1]

{'medium-534': [0.8, 1.0, 1.0, 0.8, 1.0, 0.5, 1.0, 0.8, 0.5, 0.8],
 'medium-677': [0.8571428571428571,
  0.8571428571428571,
  0.5714285714285714,
  0.4,
  0.5714285714285714,
  0.5714285714285714,
  0.5714285714285714,
  0.3333333333333333,
  0.6666666666666666,
  0.5714285714285714],
 'hard-51': [1.0,
  0.6666666666666666,
  0.6666666666666666,
  0.0,
  0.6666666666666666,
  0.0,
  0.0,
  0.6666666666666666,
  0.0,
  0.0],
 'medium-84': [0.0,
  0.6666666666666666,
  0.5714285714285714,
  0.5,
  0.3333333333333333,
  0.3333333333333333,
  0.8,
  0.6666666666666666,
  0.0,
  0.5]}

In [28]:
# List to collect rows
rows = []

# Populate the list with rows
for version, texts in all_f1_scores.items():
    version_row = []
    for text_id, f1_scores in texts.items():
        # Prepare a dictionary to represent a row
        row = {'text-id': text_id, f'f1': f1_scores}
        version_row.append(row)
        
    rows.append(version_row)

In [29]:
df_version1 = pd.DataFrame(rows[0])
df_version2 = pd.DataFrame(rows[1])
df_version3 = pd.DataFrame(rows[2])
df_version4 = pd.DataFrame(rows[3])
df_version5 = pd.DataFrame(rows[4])
df_version6 = pd.DataFrame(rows[5])
df_version7 = pd.DataFrame(rows[6])
df_version8 = pd.DataFrame(rows[7])

In [30]:
# df_version1['overall'] = df_version1['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version1['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version1 = pd.concat([df_version1, overall_row], ignore_index=True)


# df_version2['overall'] = df_version2['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version2['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version2 = pd.concat([df_version2, overall_row], ignore_index=True)


# df_version3['overall'] = df_version3['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version3['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version3 = pd.concat([df_version3, overall_row], ignore_index=True)


# df_version4['overall'] = df_version4['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version4['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version4 = pd.concat([df_version4, overall_row], ignore_index=True)


# df_version5['overall'] = df_version5['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version5['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version5 = pd.concat([df_version5, overall_row], ignore_index=True)


# df_version6['overall'] = df_version6['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version6['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version6 = pd.concat([df_version6, overall_row], ignore_index=True)


# df_version7['overall'] = df_version7['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version7['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version7 = pd.concat([df_version7, overall_row], ignore_index=True)


# df_version8['overall'] = df_version8['f1'].apply(lambda x: np.mean(x))
# overall_mean = df_version8['overall'].mean()
# overall_row = pd.DataFrame({'text-id': ['overall'], 'f1': [overall_mean]})
# df_version8 = pd.concat([df_version8, overall_row], ignore_index=True)


In [31]:
# df_version1.to_csv('f1_scores/ver_1.csv', index=False)
# df_version2.to_csv('f1_scores/ver_2.csv', index=False)
# df_version3.to_csv('f1_scores/ver_3.csv', index=False)
# df_version4.to_csv('f1_scores/ver_4.csv', index=False)
# df_version5.to_csv('f1_scores/ver_5.csv', index=False)
# df_version6.to_csv('f1_scores/ver_6.csv', index=False)
# df_version7.to_csv('f1_scores/ver_7.csv', index=False)
# df_version8.to_csv('f1_scores/ver_8.csv', index=False)