# Visualization of Loss and Metrics

In [None]:
import matplotlib.pyplot as plt
import json

def plot_from_json(data):
    steps, train_losses, eval_steps, eval_losses = [], [], [], []
    eval_accuracies, eval_f1_scores, eval_precisions, eval_recalls = [], [], [], []

    for entry in data['log_history']:
        if 'loss' in entry:
            steps.append(entry['step'])
            train_losses.append(entry['loss'])
        if 'eval_loss' in entry:
            eval_steps.append(entry['step'])
            eval_losses.append(entry['eval_loss'])
            eval_accuracies.append(entry['eval_accuracy']['accuracy'])
            eval_f1_scores.append(entry['eval_f1']['f1'])
            eval_precisions.append(entry['eval_precision']['precision'])
            eval_recalls.append(entry['eval_recall']['recall'])

    plt.figure(figsize=(12, 6))
    plt.plot(steps, train_losses, color='red', label='Training Loss')
    plt.plot(eval_steps, eval_losses, label='Validation Loss', zorder=5)
    plt.title('Train and Validation Loss')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.scatter(eval_steps, eval_accuracies, label='Accuracy', marker='o')
    plt.scatter(eval_steps, eval_f1_scores,color='red', label='F1 Score', marker='x')
    plt.scatter(eval_steps, eval_precisions, label='Precision', marker='^')
    plt.scatter(eval_steps, eval_recalls, label='Recall', marker='s')
    plt.title('Evaluation Metrics')
    plt.xlabel('Steps')
    plt.ylabel('Metric Value')
    plt.legend()
    plt.grid(True)
    plt.show()

data_path = './model/results/checkpoint-1075/trainer_state.json'
with open(data_path, 'r') as file:
    data = json.load(file)

plot_from_json(data)


# Visualization of Token Classification with Behavioral Characteristics Highlighting

In [None]:
import json

def load_label_to_id(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        label_to_id = json.load(file)
    return label_to_id
label_to_id = load_label_to_id('data/label_to_id.json')


In [None]:
from transformers import CamembertTokenizerFast, CamembertForTokenClassification
import torch
from IPython.display import display, HTML
import random

def colorize_text(text, tokens, labels, offset_mapping, color_map):
    html_output = "<div style='font-size:16px;'>"
    current_word = ""
    current_color = "background-color:none"
    last_end = 0
    
    for token, label, (start, end) in zip(tokens, labels, offset_mapping):
        if start != last_end: 
            if current_word: 
                html_output += f"<span style='padding: 0 0.3em; {current_color}'>{current_word}</span>"
            current_word = "" 
        if token.startswith("▁"):
            current_word = token[1:] 
        else:
            current_word += token.lstrip("##")
        current_color = color_map.get(label, "background-color:none")
        last_end = end

    html_output += f"<span style='padding: 0 0.3em; {current_color}'>{current_word}</span>"
    html_output += "</div>"
    return html_output

model_path = "./model4/best_model"
model = CamembertForTokenClassification.from_pretrained(model_path)
tokenizer = CamembertTokenizerFast.from_pretrained('camembert-base')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

rand_number = random.randint(0, 145)
print(rand_number)
file_path = f'data/test/text_{rand_number}.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()



id2label = {idx: label for label, idx in label_to_id.items()}

inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512, return_offsets_mapping=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
offset_mapping = inputs.pop('offset_mapping').squeeze()

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
predictions = torch.argmax(logits, dim=-1)

tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].flatten())
predicted_labels = [id2label[pred.item()] for pred in predictions.flatten()]

print("Tokens:", tokens)
print("Labels:", predicted_labels)

color_map = {
    "_sk1": "background-color:#8205C4",  
    "_sk2": "background-color:#7B3761",  
    "_sk3": "background-color:#01B8AA",  
    "_sk4": "background-color:#4C5D8A",  
    "_sk5": "background-color:#480091",  
    "_sk6": "background-color:#E044A7",  
    "_sk7": "background-color:#0AAC00",  
    "_sk8": "background-color:#750985",  
    "_sk9": "background-color:#499195",  
    "_sk10": "background-color:#F15628", 
    "_sk11": "background-color:#998F85", 
    "_sk12": "background-color:#262A76", 
    "_sk13": "background-color:#8C0000", 
    "_sk14": "background-color:#7B3A3A", 
    "_sk15": "background-color:#0872D7", 
    "_sk16": "background-color:#154734"  
}



color_map.update({label: "background-color:none" for label in label_to_id if label not in color_map})


colored_text = colorize_text(text, tokens, predicted_labels, offset_mapping.tolist(), color_map)
display(HTML(colored_text))


legend_html = "<div style='font-size:16px;'>Legend:<br>"
for key, color in color_map.items():
    if "none" not in color:
        legend_html += f"<span style='padding: 0 0.3em; {color}'>{key} </span>"
legend_html += "</div>"
display(HTML(legend_html))


In [None]:
from IPython.display import display, HTML

def calculate_personality_type_proportions(labels):
    label_counts = {f"_sk{i}": 0 for i in range(1, 17)}
    total_count = 0

    for label in labels:
        if label in label_counts:
            label_counts[label] += 1
            total_count += 1

    personality_type_proportions = {}
    if total_count > 0:
        for label, count in label_counts.items():
            personality_type_proportions[label] = (count / total_count) * 100

    return personality_type_proportions

def display_personality_types_html(proportions, color_map):
    sorted_proportions = sorted(proportions.items(), key=lambda x: x[1], reverse=True)

    html_output = "<div style='font-size:16px;'>Pourcentage de chaque axe dans le Texte :<br>"
    for label, proportion in sorted_proportions:
        color = color_map.get(label, "background-color:none") 
        html_output += f"<div style='margin: 5px 0; padding: 5px; width: {proportion}%; {color}'>{label}: {proportion:.2f}%</div>"

    html_output += "</div>"
    display(HTML(html_output))

personality_type_proportions = calculate_personality_type_proportions(predicted_labels)

display_personality_types_html(personality_type_proportions, color_map)



In [None]:
import matplotlib.pyplot as plt
import json
from IPython.display import display, HTML
import os

def generate_html(data, file_name):
    
    html_output = "<html><head><style>"
    html_output += "body {background-color: #252526; color: white;}"
    html_output += "div {font-size:16px; margin-top: 20px;}"
    html_output += "span {padding: 0 0.3em;}"
    html_output += ".proportion-div {margin: 5px 0; padding: 5px; width: 100%;}"
    html_output += "</style></head><body>"

    
    html_output += colorize_text(data['text'], data['tokens'], data['predicted_labels'], data['offset_mapping'].tolist(), data['color_map'])

    
    proportions = calculate_personality_type_proportions(data['predicted_labels'])
    sorted_proportions = sorted(proportions.items(), key=lambda x: x[1], reverse=True)

    html_output += "<div>Pourcentage de chaque axe dans le Texte :<br>"
    for label, proportion in sorted_proportions:
        color = data['color_map'].get(label, "background-color:none")
        html_output += f"<div class='proportion-div' style='{color}; margin: 5px 0; padding: 5px; width: {proportion}%;'>{label}: {proportion:.2f}%</div>"
    html_output += "</div></body></html>"

    
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(html_output)
    print(f"HTML file saved as {file_name}")


data = {
    'text': text,  
    'tokens': tokens,  
    'predicted_labels': predicted_labels,  
    'offset_mapping': offset_mapping,  
    'color_map': color_map  
}


generate_html(data, 'output/test_'+str(rand_number)+'.html')
