In [1]:
# %pip install spacy
# %pip install textstat
# %pip install -U sentence-transformers
# %pip install spacy nltk

# Import Libraries

In [1]:
import os
import glob
import csv
import numpy as np
import string
import spacy
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem import PorterStemmer
from textstat import flesch_reading_ease
from textstat import gulpease_index
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


  from tqdm.autonotebook import tqdm, trange


In [2]:
nlp = English()
# Create a blank Tokenizer with just the English vocab
tokenizer = Tokenizer(nlp.vocab)

In [3]:
PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
RESULT_PATH = PROJECT_PATH + '/results/LLM Reports/SegResNet'

# Functions

## TTR

In [5]:
def compute_TypeTokenRatio(content):
    tokenized_content = tokenizer(content)
    tokenized_text = [i.text for i in tokenized_content]
    types = len(set(tokenized_text))
    tokens = len(tokenized_text)
    type_token_ratio = types / tokens
    return type_token_ratio

## MAAS Index

In [6]:
def compute_Maas(content):
    tokenized_content = tokenizer(content)
    tokenized_text = [i.text for i in tokenized_content]
    types = len(set(tokenized_text))
    tokens = len(tokenized_text)
    maas = (np.log(tokens) - np.log(types)) / np.log(tokens)**2
    return maas

## Flesh Reading Ease (English)

In [7]:
def compute_FleschReadingEase(text):
    return flesch_reading_ease(text)

## Coherence Score

In [8]:
def remove_special_characters(text):
    cleaned_text = text.replace('-', '').replace('\n', '').replace('*', '').replace('#','')
    return cleaned_text

In [9]:
def compute_CoherenceScore(text):
    text = remove_special_characters(text)
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    sentences = text.split('.')
    embeddings = model.encode(sentences)
    similarities = [cosine_similarity([embeddings[i]], [embeddings[i+1]])[0][0] for i in range(len(embeddings)-1)]
    avg_similarity = sum(similarities) / len(similarities)
    return avg_similarity

## Coverage Score Embedding

In [10]:
def compute_CoverageScoreEmbedding(text, reference_text):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    reference_text = reference_text.replace('.\n\n', '. ').replace('\n\t', '. ').replace('\n\n', '.').replace('\n', '.').split('. ')
    
    reference_embeddings = model.encode(reference_text)
    text_embeddings = model.encode([text])

    similarities = cosine_similarity(text_embeddings, reference_embeddings)
    coverage = sum(similarities[0]) / len(reference_text)
    return coverage

## Coverage Token Score


In [11]:
def remove_stopwords_punctuation(text):
    # Load the English model
    nlp = nlp = English()
    # Process the text with spaCy
    doc = nlp(text)
    # Filter out stopwords and punctuation
    cleaned_tokens = [token.text for token in doc if not token.is_stop and token.text not in string.punctuation]

    # Join the tokens back into a single string
    cleaned_text = ' '.join(cleaned_tokens)
    cleaned_text = cleaned_text.replace('.', '')
    return cleaned_text

In [12]:
def stem_tokens(tokens):
    stemmer = PorterStemmer()
    return [stemmer.stem(token) for token in tokens]

In [13]:
def remove_numbers(text):
    return ''.join([i for i in text if not i.isdigit()])

In [14]:
def compute_CoverageScoreToken(text, reference_text):
    text = remove_special_characters(text)
    text = remove_stopwords_punctuation(text)
    text = remove_numbers(text)
    text = text.lower()
    reference_text = remove_special_characters(reference_text)
    reference_text = remove_stopwords_punctuation(reference_text)
    reference_text = remove_numbers(reference_text)
    reference_text = reference_text.lower()


    tokenized_text = tokenizer(text)
    tokenized_text = [i.text for i in tokenized_text]

    tokenized_reference_text = tokenizer(reference_text)
    tokenized_reference_text = [i.text for i in tokenized_reference_text]
    
    stemmed_text = stem_tokens(tokenized_text)
    stemmed_reference_text = stem_tokens(tokenized_reference_text)

    set_tokenized_text = set(stemmed_text)
    set_tokenized_reference_text = set(stemmed_reference_text)

    intersection = set_tokenized_text.intersection(set_tokenized_reference_text)
    union = set_tokenized_text.union(set_tokenized_reference_text)
    coverage = len(intersection) / len(union)
    
    
    return coverage

# Prompt Path

In [15]:
PATH_PROMPT1 = PROJECT_PATH + '/notebooks/Explainability/Prompt/PROMPT1.txt'
PATH_PROMPT2 = PROJECT_PATH + '/notebooks/Explainability/Prompt/PROMPT2.txt'

# GEMMA

In [16]:
md_files = glob.glob(RESULT_PATH+'/gemma/'+'*.md')
md_files

['d:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/gemma\\gemma-7b-it_response_PROMPT1.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/gemma\\gemma-7b-it_response_PROMPT1_IT.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/gemma\\gemma-7b-it_response_PROMPT2.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/gemma\\gemma-7b-it_response_PROMPT2_IT.md']

### PROMPT 1 - ENGLISH

In [18]:
path_prompt_1_response = md_files[0]

with open(path_prompt_1_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT1, 'r') as file:
    prompt = file.read()

type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'gemma-7b-it_response_PROMPT1_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.5512367491166078
Maas Index: 0.01868750806911318
Flesch Reading Ease: 36.96
Coherence Score: 0.30417519638484175
Coverage Score: 0.5443924963474274
Coverage Score Token: 0.24761904761904763
Metrics saved to gemma-7b-it_response_PROMPT1_metrics.csv


### PROMPT 2 - ENGLISH

In [17]:
path_prompt_2_response = md_files[2]

with open(path_prompt_2_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT2, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})

# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

csv_file = 'gemma-7b-it_response_PROMPT2_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.5333333333333333
Maas Index: 0.018692193319293227
Flesch Reading Ease: 17.6
Coherence Score: 0.38541838950638113
Coverage Score: 0.5945862299866147
Coverage Score Token: 0.2302158273381295
Metrics saved to gemma-7b-it_response_PROMPT2_metrics.csv


# LLAMA3 

In [16]:
md_files = glob.glob(RESULT_PATH+'/llama3/'+'*.md')
md_files

['d:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/llama3\\llama3-70b_response_PROMPT1.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/llama3\\llama3-70b_response_PROMPT1_IT.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/llama3\\llama3-70b_response_PROMPT2.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/llama3\\llama3-70b_response_PROMPT2_IT.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/

## LLAMA 3 8B

### PROMPT 1 - ENGLISH

In [17]:
path_prompt_1_response = md_files[4]

with open(path_prompt_1_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT1, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})

# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

csv_file = 'llama3_8b_response_PROMPT1_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.3577371048252912
Maas Index: 0.025107627281440226
Flesch Reading Ease: 39.23
Coherence Score: 0.37004680099875426
Coverage Score: 0.6857005755106608
Coverage Score Token: 0.319672131147541
Metrics saved to llama3_8b_response_PROMPT1_metrics.csv


### PROMPT 2 - ENGLISH

In [17]:
path_prompt_2_response = md_files[6]

with open(path_prompt_2_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT2, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'llama3_8b_response_PROMPT2_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.38489208633093525
Maas Index: 0.023898418268874996
Flesch Reading Ease: 39.43
Coherence Score: 0.4888792307795705
Coverage Score: 0.5760741978883743
Coverage Score Token: 0.2638888888888889
Metrics saved to llama3_8b_response_PROMPT2_metrics.csv


## LLAMA 3 70B

### PROMPT 1 - ENGLISH

In [17]:
path_prompt_1_response = md_files[0]

with open(path_prompt_1_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT1, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'llama3_70b_response_PROMPT1_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.3903420523138833
Maas Index: 0.024405060585031196
Flesch Reading Ease: 40.65
Coherence Score: 0.3603054212931639
Coverage Score: 0.6515092055002848
Coverage Score Token: 0.3305785123966942
Metrics saved to llama3_70b_response_PROMPT1_metrics.csv


### PROMPT 2 - ENGLISH

In [17]:
path_prompt_2_response = md_files[2]

with open(path_prompt_2_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT2, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'llama3_70b_response_PROMPT2_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.38085106382978723
Maas Index: 0.02550041839325452
Flesch Reading Ease: 41.77
Coherence Score: 0.3537936707958579
Coverage Score: 0.5410354302989112
Coverage Score Token: 0.29508196721311475
Metrics saved to llama3_70b_response_PROMPT2_metrics.csv


# MIXTRAL

In [16]:
md_files = glob.glob(RESULT_PATH+'/mixtral/'+'*.md')
md_files

['d:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/mixtral\\mixtral-8x7b_response_PROMPT1.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/mixtral\\mixtral-8x7b_response_PROMPT1_IT.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/mixtral\\mixtral-8x7b_response_PROMPT2.md',
 'd:\\User\\Salvarki\\Desktop\\GitHub Tesi Project\\Visual-and-Textual-Explainability-in-Brain-Multiple-Sclerosis-Detection-with-3D-MRI/results/LLM Reports/SegResNet/mixtral\\mixtral-8x7b_response_PROMPT2_IT.md']

## PROMPT 1 - ENGLISH

In [17]:
path_prompt_1_response = md_files[0]

with open(path_prompt_1_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT1, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'mistral_8x7b_response_PROMPT1_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.4488778054862843
Maas Index: 0.02229498116159587
Flesch Reading Ease: 58.79
Coherence Score: 0.3288361108438535
Coverage Score: 0.7037815054257711
Coverage Score Token: 0.3592233009708738
Metrics saved to mistral_8x7b_response_PROMPT1_metrics.csv


## PROMPT 2 - ENGLISH

In [17]:
path_prompt_2_response = md_files[2]

with open(path_prompt_2_response, 'r') as file:
    content = file.read()

with open(PATH_PROMPT2, 'r') as file:
    prompt = file.read()

In [18]:
type_token_ratio = compute_TypeTokenRatio(content)
maas_index = compute_Maas(content)
flesch_reading_ease = compute_FleschReadingEase(content)
coherence_score = compute_CoherenceScore(content)
coverage_score = compute_CoverageScoreEmbedding(content, prompt)
coverage_score_token = compute_CoverageScoreToken(content, prompt)

metric_names = ['Type Token Ratio', 'Maas Index', 'Flesch Reading Ease', 'Coherence Score', 'Coverage Score', 'Coverage Score Token']
metric_values = [type_token_ratio, maas_index, flesch_reading_ease, coherence_score, coverage_score, coverage_score_token]

# Create a list of dictionaries for each metric
metrics_data = []
for name, value in zip(metric_names, metric_values):
    metrics_data.append({'Metric Name': name, 'Metric Value': round(value, 3)})
    
# Print metric name and value
for name, value in zip(metric_names, metric_values):
    print(f"{name}: {value}")

# Write metrics to a CSV file
csv_file = 'mistral_8x7b_response_PROMPT2_metrics.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['Metric Name', 'Metric Value'])
    writer.writeheader()
    writer.writerows(metrics_data)

print(f"Metrics saved to {csv_file}")



Type Token Ratio: 0.45318352059925093
Maas Index: 0.020065680868509364
Flesch Reading Ease: 41.56
Coherence Score: 0.3339047302407297
Coverage Score: 0.5590565850337347
Coverage Score Token: 0.2682926829268293
Metrics saved to mistral_8x7b_response_PROMPT2_metrics.csv
