## Dependencies and imports

In [10]:
import json

import numpy as np
import pandas as pd

import matplotlib
import matplotlib as mpl
from matplotlib import pyplot as plt

In [11]:
FULL_TEXTS_PREFIXES = ['original', 'human1', 'human2', 'gpt3_5', 'gpt4', 'gemini', 'llama3', 'phi3', 'semplit_mt5', 'semplit_umt5', 'semplit_gpt2_small_italian']
FULL_TEXTS_NAMES = ['Original', 'Human1', 'Human2', 'GPT-3.5-Turbo', 'GPT-4', 'Gemini 1.5 Flash', 'Llama3', 'Phi-3', 'mt5', 'umT5', 'gpt2-italian']

In [12]:
def merge_sets(sets):
  merged = set()
  for s in sets:
    merged = merged.union(s)
  return merged

## Load datasets

In [13]:
dfs_maps = dict()
jsons_maps = dict()
for PREFIXES in FULL_TEXTS_PREFIXES:
  print(PREFIXES)
  tmp_df = pd.read_csv(f'./texts_with_metrics/{PREFIXES}.csv', encoding='utf-8')
  tmp_json = json.load(open(f'./texts_with_metrics/{PREFIXES}.json', 'r', encoding='utf-8'))
  print(tmp_df.shape)
  dfs_maps[PREFIXES] = tmp_df
  jsons_maps[PREFIXES] = tmp_json

original
(619, 37)
human1
(619, 44)
human2
(619, 44)
gpt3_5
(619, 44)
gpt4
(619, 44)
gemini
(619, 44)
llama3
(619, 44)
phi3
(619, 44)
semplit_mt5
(619, 44)
semplit_umt5
(619, 44)
semplit_gpt2_small_italian
(619, 44)


# Basic

In [14]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'Tokens': df[f'{TEXT_PREFIX}_n_tokens'].sum(),
    'Tokens (con punteg.)': df[f'{TEXT_PREFIX}_n_tokens_all'].sum(),
    'Caratteri': df[f'{TEXT_PREFIX}_n_chars'].sum(),
    'Caratteri (con punt)': df[f'{TEXT_PREFIX}_n_chars_all'].sum(),
    'Sillabe': df[f'{TEXT_PREFIX}_n_syllables'].sum(),
    'Frasi': df[f'{TEXT_PREFIX}_n_sentences'].sum(),
    'Types': len(merge_sets([set(j[f'{TEXT_PREFIX}_tokens']) for j in jsons_maps[TEXT_PREFIX]])),
    'Lemmi': len(merge_sets([set(j[f'{TEXT_PREFIX}_lemmas']) for j in jsons_maps[TEXT_PREFIX]])),
  })

d = pd.DataFrame(d)
d.head(10)

Unnamed: 0,Reviewer,Tokens,Tokens (con punteg.),Caratteri,Caratteri (con punt),Sillabe,Frasi,Types,Lemmi
0,original,33295,37429,191925,196071,79438,1314,5622,4096
1,human1,34135,38193,181872,185945,76008,1506,5270,3640
2,human2,29755,33933,166464,170654,69169,1744,5143,3693
3,gpt3_5,30032,33836,169761,173567,70641,1515,5054,3560
4,gpt4,31722,36017,175147,179442,73110,1840,4930,3376
5,gemini,39255,46577,217602,225499,91026,2225,4925,3519
6,llama3,36035,41655,199251,204884,83598,1944,5246,3735
7,phi3,36056,41748,199362,205060,83673,1900,5269,3758
8,semplit_mt5,27853,31163,151195,154506,63362,1417,4232,2944
9,semplit_umt5,26998,29684,147628,150314,61788,1167,4177,3024


# Pos

In [15]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'Altro':                      df[f'{TEXT_PREFIX}_n_other'].sum(),
    'Nomi':                       df[f'{TEXT_PREFIX}_n_nouns'].sum(),
    'Verbi':                      df[f'{TEXT_PREFIX}_n_verbs'].sum(),
    'Numeri':                     df[f'{TEXT_PREFIX}_n_number'].sum(),
    'Simboli':                    df[f'{TEXT_PREFIX}_n_symbols'].sum(),
    'Avverbi':                    df[f'{TEXT_PREFIX}_n_adverbs'].sum(),
    'Articoli':                   df[f'{TEXT_PREFIX}_n_articles'].sum(),
    'Pronomi':                    df[f'{TEXT_PREFIX}_n_pronouns'].sum(),
    'Particelle':                 df[f'{TEXT_PREFIX}_n_particles'].sum(),
    'Agettivi':                   df[f'{TEXT_PREFIX}_n_adjectives'].sum(),
    'Preposizioni':               df[f'{TEXT_PREFIX}_n_prepositions'].sum(),
    'Nomi propri':                df[f'{TEXT_PREFIX}_n_proper_nouns'].sum(),
    'Punteggiatura':              df[f'{TEXT_PREFIX}_n_punctuations'].sum(),
    'Interiezioni':               df[f'{TEXT_PREFIX}_n_interjections'].sum(),
    'Cong. coord.':               df[f'{TEXT_PREFIX}_n_coordinating_conjunctions'].sum(),
    'Cong. sub.':                 df[f'{TEXT_PREFIX}_n_subordinating_conjunctions'].sum(),
  })

d = pd.DataFrame(d)
d.head(10)

Unnamed: 0,Reviewer,Altro,Nomi,Verbi,Numeri,Simboli,Avverbi,Articoli,Pronomi,Particelle,Agettivi,Preposizioni,Nomi propri,Punteggiatura,Interiezioni,Cong. coord.,Cong. sub.
0,original,26,10206,3496,739,32,786,3313,658,0,3630,7786,1055,4089,0,1445,168
1,human1,22,9493,5047,763,34,944,4039,1266,0,2858,6584,1163,4013,0,1543,424
2,human2,22,8891,3975,707,33,796,3838,650,0,2740,5620,1021,4135,0,1196,309
3,gpt3_5,16,8395,4390,700,32,784,3775,750,0,2911,5708,961,3775,1,1257,381
4,gpt4,9,8484,5142,780,36,1035,4400,917,0,2794,5327,954,4264,0,1378,497
5,gemini,17,11143,5835,514,36,852,5544,923,0,3554,7398,1670,6966,1,1528,596
6,llama3,9,10736,4839,743,36,797,4830,640,0,3272,7016,979,5582,1,1807,368
7,phi3,9,10811,4829,734,34,797,4779,621,0,3289,7043,996,5615,0,1805,386
8,semplit_mt5,10,7467,4575,602,31,747,3746,842,0,2233,4950,983,3282,0,1240,455
9,semplit_umt5,20,7251,4494,588,38,778,3525,747,0,2243,4799,771,2698,0,1282,450


In [16]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'Altro':                      df[f'{TEXT_PREFIX}_n_other'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Nomi':                       df[f'{TEXT_PREFIX}_n_nouns'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Verbi':                      df[f'{TEXT_PREFIX}_n_verbs'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Numeri':                     df[f'{TEXT_PREFIX}_n_number'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Simboli':                    df[f'{TEXT_PREFIX}_n_symbols'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Avverbi':                    df[f'{TEXT_PREFIX}_n_adverbs'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Articoli':                   df[f'{TEXT_PREFIX}_n_articles'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Pronomi':                    df[f'{TEXT_PREFIX}_n_pronouns'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Particelle':                 df[f'{TEXT_PREFIX}_n_particles'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Agettivi':                   df[f'{TEXT_PREFIX}_n_adjectives'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Preposizioni':               df[f'{TEXT_PREFIX}_n_prepositions'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Nomi propri':                df[f'{TEXT_PREFIX}_n_proper_nouns'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Punteggiatura':              df[f'{TEXT_PREFIX}_n_punctuations'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Interiezioni':               df[f'{TEXT_PREFIX}_n_interjections'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Cong. coordinati':           df[f'{TEXT_PREFIX}_n_coordinating_conjunctions'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
    'Cong. subordiante':          df[f'{TEXT_PREFIX}_n_subordinating_conjunctions'].sum() / df[f'{TEXT_PREFIX}_n_tokens_all'].sum() * 100,
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,Altro,Nomi,Verbi,Numeri,Simboli,Avverbi,Articoli,Pronomi,Particelle,Agettivi,Preposizioni,Nomi propri,Punteggiatura,Interiezioni,Cong. coordinati,Cong. subordiante
0,original,0.069465,27.267627,9.340351,1.974405,0.085495,2.099976,8.851425,1.757995,0.0,9.698362,20.802052,2.81867,10.924684,0.0,3.860643,0.44885
1,human1,0.057602,24.85534,13.214463,1.997748,0.089022,2.471657,10.575236,3.314744,0.0,7.483047,17.238761,3.045061,10.507161,0.0,4.040007,1.110151
2,human2,0.064834,26.201633,11.71426,2.083518,0.09725,2.345799,11.310524,1.915539,0.0,8.074736,16.562049,3.00887,12.185778,0.0,3.524593,0.910618
3,gpt3_5,0.047287,24.810852,12.974347,2.068802,0.094574,2.317059,11.156756,2.216574,0.0,8.603263,16.869606,2.84017,11.156756,0.002955,3.714978,1.12602
4,gpt4,0.024988,23.555543,14.276592,2.165644,0.099953,2.873643,12.216453,2.54602,0.0,7.757448,14.790238,2.648749,11.838854,0.0,3.825971,1.379904
5,gemini,0.036499,23.923825,12.527642,1.103549,0.077291,1.829229,11.902871,1.981665,0.0,7.630376,15.883376,3.585461,14.95588,0.002147,3.280589,1.279602
6,llama3,0.021606,25.773617,11.616853,1.783699,0.086424,1.913336,11.595247,1.53643,0.0,7.854999,16.843116,2.350258,13.400552,0.002401,4.338015,0.883447
7,phi3,0.021558,25.895851,11.567021,1.758168,0.081441,1.909073,11.447255,1.487496,0.0,7.878222,16.870269,2.385743,13.449746,0.0,4.32356,0.924595
8,semplit_mt5,0.032089,23.961108,14.680872,1.931778,0.099477,2.397073,12.020666,2.701922,0.0,7.165549,15.884222,3.154382,10.53172,0.0,3.979078,1.460065
9,semplit_umt5,0.067376,24.427301,15.139469,1.980865,0.128015,2.620941,11.875084,2.516507,0.0,7.556259,16.166959,2.597359,9.089072,0.0,4.318825,1.515968


## Verbs

In [17]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'Verbi attivi': df[f'{TEXT_PREFIX}_n_active_verbs'].sum(),
    'Verbi passivi':  df[f'{TEXT_PREFIX}_n_passive_verbs'].sum()
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,Verbi attivi,Verbi passivi
0,original,2684,812
1,human1,4124,923
2,human2,3184,791
3,gpt3_5,3662,728
4,gpt4,4450,692
5,gemini,4813,1022
6,llama3,4056,783
7,phi3,4020,809
8,semplit_mt5,3599,976
9,semplit_umt5,3480,1014


In [18]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'Verbi attivi': df[f'{TEXT_PREFIX}_n_active_verbs'].sum() / df[f'{TEXT_PREFIX}_n_verbs'].sum() * 100,
    'Verbi passivi':  df[f'{TEXT_PREFIX}_n_passive_verbs'].sum() / df[f'{TEXT_PREFIX}_n_verbs'].sum() * 100,
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,Verbi attivi,Verbi passivi
0,original,76.773455,23.226545
1,human1,81.711908,18.288092
2,human2,80.100629,19.899371
3,gpt3_5,83.416856,16.583144
4,gpt4,86.542201,13.457799
5,gemini,82.485004,17.514996
6,llama3,83.818971,16.181029
7,phi3,83.247049,16.752951
8,semplit_mt5,78.666667,21.333333
9,semplit_umt5,77.436582,22.563418


## VdB

In [19]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'ALL': df[f'{TEXT_PREFIX}_n_vdb'].sum(),
    'FO':  df[f'{TEXT_PREFIX}_n_vdb_fo'].sum(),
    'AU': df[f'{TEXT_PREFIX}_n_vdb_au'].sum(),
    'AD': df[f'{TEXT_PREFIX}_n_vdb_ad'].sum(),
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,ALL,FO,AU,AD
0,original,24185,20113,4205,3823
1,human1,27155,24097,3174,3553
2,human2,22780,19561,3196,2794
3,gpt3_5,23293,20166,3148,2850
4,gpt4,25513,22549,2987,2931
5,gemini,30887,27294,3577,3895
6,llama3,28829,25200,3655,3898
7,phi3,28849,25203,3670,3966
8,semplit_mt5,22401,19997,2404,2668
9,semplit_umt5,21682,19461,2225,2651


In [20]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'ALL': df[f'{TEXT_PREFIX}_n_vdb'].sum() / df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100,
    'FO':  df[f'{TEXT_PREFIX}_n_vdb_fo'].sum() / df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100,
    'AU': df[f'{TEXT_PREFIX}_n_vdb_au'].sum() / df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100,
    'AD': df[f'{TEXT_PREFIX}_n_vdb_ad'].sum() / df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100,
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,ALL,FO,AU,AD
0,original,72.638534,60.40847,12.629524,11.482205
1,human1,79.55178,70.593233,9.298374,10.408671
2,human2,76.558562,65.740212,10.741052,9.390018
3,gpt3_5,77.560602,67.148375,10.482152,9.489877
4,gpt4,80.426833,71.08316,9.416178,9.239644
5,gemini,78.68297,69.529996,9.112215,9.922303
6,llama3,80.002775,69.932011,10.142917,10.817261
7,phi3,80.011649,69.899601,10.178611,10.999556
8,semplit_mt5,80.425807,71.79478,8.631027,9.57886
9,semplit_umt5,80.309653,72.083117,8.241351,9.819246


# Readability

In [21]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  d.append({
    'Reviewer': TEXT_PREFIX,
    'ttr': round(df[f'{TEXT_PREFIX}_ttr'].mean(), 2),
    'gulpease_index': round(df[f'{TEXT_PREFIX}_gulpease'].mean(), 2),
    'flesch_vacca': round(df[f'{TEXT_PREFIX}_flesch_vacca'].mean(), 2),
    'lexical_density': round(df[f'{TEXT_PREFIX}_lexical_density'].mean(), 2)
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,ttr,gulpease_index,flesch_vacca,lexical_density
0,original,86.15,44.31,19.97,0.55
1,human1,84.56,49.72,34.23,0.54
2,human2,86.3,50.64,33.63,0.56
3,gpt3_5,87.51,48.49,30.33,0.56
4,gpt4,86.64,51.34,36.75,0.56
5,gemini,78.78,50.28,33.95,0.55
6,llama3,82.26,50.26,34.09,0.55
7,phi3,82.09,50.16,33.75,0.55
8,semplit_mt5,84.9,50.36,35.26,0.55
9,semplit_umt5,85.87,48.81,31.86,0.55


## Similarity

In [22]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  if TEXT_PREFIX == 'original':
    continue
  d.append({
    'Reviewer': f'Original vs {TEXT_PREFIX}',
    'semantic_similarity': round(df[f'{TEXT_PREFIX}_semantic_similarity'].mean(), 2)
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,semantic_similarity
0,Original vs human1,83.91
1,Original vs human2,87.87
2,Original vs gpt3_5,81.58
3,Original vs gpt4,80.02
4,Original vs gemini,79.56
5,Original vs llama3,79.04
6,Original vs phi3,79.17
7,Original vs semplit_mt5,76.01
8,Original vs semplit_umt5,75.45
9,Original vs semplit_gpt2_small_italian,72.54


# Diff

In [23]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  if TEXT_PREFIX == 'original':
    continue
  d.append({
    'Reviewer': f'Original vs {TEXT_PREFIX}',
    'editdistance': df[f'{TEXT_PREFIX}_editdistance'].sum(),
    'added_tokens': df[f'{TEXT_PREFIX}_n_added_tokens'].sum(),
    'added_vdb_tokens': df[f'{TEXT_PREFIX}_n_added_vdb_tokens'].sum(),
    '%_added_vdb_tokens': round(df[f'{TEXT_PREFIX}_n_added_vdb_tokens'].sum() / df[f'{TEXT_PREFIX}_n_added_tokens'].sum() * 100, 2),
    'deleted_tokens': df[f'{TEXT_PREFIX}_n_deleted_tokens'].sum(),
    'deleted_vdb_tokens': df[f'{TEXT_PREFIX}_n_deleted_vdb_tokens'].sum(),
    '%_deleted_vdb_tokens': round(df[f'{TEXT_PREFIX}_n_deleted_vdb_tokens'].sum() / df[f'{TEXT_PREFIX}_n_deleted_tokens'].sum() * 100, 2),
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,editdistance,added_tokens,added_vdb_tokens,%_added_vdb_tokens,deleted_tokens,deleted_vdb_tokens,%_deleted_vdb_tokens
0,Original vs human1,67468,10256,9044,88.18,9838,6629,67.38
1,Original vs human2,63642,6627,5776,87.16,9552,6778,70.96
2,Original vs gpt3_5,93928,10835,9047,83.5,13332,9506,71.3
3,Original vs gpt4,105741,13788,11952,86.68,14864,10456,70.34
4,Original vs gemini,128313,16931,13875,81.95,13145,9042,68.79
5,Original vs llama3,140701,16461,13806,83.87,14736,10190,69.15
6,Original vs phi3,141033,16373,13720,83.8,14748,10200,69.16
7,Original vs semplit_mt5,107741,11230,9806,87.32,16408,11363,69.25
8,Original vs semplit_umt5,108230,11270,9587,85.07,16913,11663,68.96
9,Original vs semplit_gpt2_small_italian,118999,13484,11782,87.38,17947,12528,69.81


In [24]:
d = []
for TEXT_PREFIX, df in dfs_maps.items():
  if TEXT_PREFIX == 'original':
    continue
  d.append({
    'Reviewer': f'Original vs {TEXT_PREFIX}',
    'editdistance': round(df[f'{TEXT_PREFIX}_editdistance'].sum() /  df[f'{TEXT_PREFIX}_n_chars'].sum() * 100, 2),
    'added_tokens': round(df[f'{TEXT_PREFIX}_n_added_tokens'].sum() /  df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100, 2),
    'added_vdb_tokens': round(df[f'{TEXT_PREFIX}_n_added_vdb_tokens'].sum() /  df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100, 2),
    'deleted_tokens': round(df[f'{TEXT_PREFIX}_n_deleted_tokens'].sum() /  df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100, 2),
    'deleted_vdb_tokens': round(df[f'{TEXT_PREFIX}_n_deleted_vdb_tokens'].sum() /  df[f'{TEXT_PREFIX}_n_tokens'].sum() * 100, 2),
  })

d = pd.DataFrame(d)
d.head(20)

Unnamed: 0,Reviewer,editdistance,added_tokens,added_vdb_tokens,deleted_tokens,deleted_vdb_tokens
0,Original vs human1,37.1,30.05,26.49,28.82,19.42
1,Original vs human2,38.23,22.27,19.41,32.1,22.78
2,Original vs gpt3_5,55.33,36.08,30.12,44.39,31.65
3,Original vs gpt4,60.37,43.47,37.68,46.86,32.96
4,Original vs gemini,58.97,43.13,35.35,33.49,23.03
5,Original vs llama3,70.61,45.68,38.31,40.89,28.28
6,Original vs phi3,70.74,45.41,38.05,40.9,28.29
7,Original vs semplit_mt5,71.26,40.32,35.21,58.91,40.8
8,Original vs semplit_umt5,73.31,41.74,35.51,62.65,43.2
9,Original vs semplit_gpt2_small_italian,77.47,47.18,41.22,62.79,43.83
