In [2]:
import os
import json
import re

from rouge import Rouge
rouge_scorer = Rouge()

def rouge_score(generation, reference):
    return rouge_scorer.get_scores(
        hyps=generation,
        refs=reference,
    )[0]["rouge-l"]["f"]
# print(rouge_score(hypothesis, reference))

In [3]:
# List all txt files in subdirectories
txt_files = []
for root, dirs, files in os.walk("."):
    for file in files:
        if file.endswith(".txt"):
             txt_files.append(os.path.join(root, file))

print("Number of txt files: ", txt_files)

Number of txt files:  ['.\\form.txt', '.\\datasets\\results_prompts_0\\0.txt', '.\\datasets\\results_prompts_0\\1.txt', '.\\datasets\\results_prompts_0\\10.txt', '.\\datasets\\results_prompts_0\\11.txt', '.\\datasets\\results_prompts_0\\12.txt', '.\\datasets\\results_prompts_0\\13.txt', '.\\datasets\\results_prompts_0\\14.txt', '.\\datasets\\results_prompts_0\\15.txt', '.\\datasets\\results_prompts_0\\16.txt', '.\\datasets\\results_prompts_0\\17.txt', '.\\datasets\\results_prompts_0\\2.txt', '.\\datasets\\results_prompts_0\\3.txt', '.\\datasets\\results_prompts_0\\4.txt', '.\\datasets\\results_prompts_0\\5.txt', '.\\datasets\\results_prompts_0\\6.txt', '.\\datasets\\results_prompts_0\\7.txt', '.\\datasets\\results_prompts_0\\8.txt', '.\\datasets\\results_prompts_0\\9.txt', '.\\datasets\\results_prompts_1\\0.txt', '.\\datasets\\results_prompts_1\\1.txt', '.\\datasets\\results_prompts_1\\10.txt', '.\\datasets\\results_prompts_1\\11.txt', '.\\datasets\\results_prompts_1\\12.txt', '.\\data

In [4]:
# open the first file

def validate_line(line):
    # Check if the line is a valid json
    try:        
        # Add the brackets to make it a valid json
        line = f"[{line}]"
        
        # Remove the last comma
        line = re.sub(r"\,\s*\]", r"]", line, 1)

        json.loads(line)
        return True
    except:
        return False

def extract_json(filename):
    # print(filename)
    with open(filename, 'r') as f:
        text = f.read()
        text = text[max(text.find('['), 0):]

        # Remove the </s> tag, [ and ] from the text
        text = text.replace(r"</s>","")
        text = text.replace(r"[","")
        text = text.replace(r"]","")
        text = text.replace(r"\n","")
        
        # Remove malformed json elements
        text = re.sub(r"(\}\,\s*\w+\s*\{)", r"},\n{", text)
        
        text = filter(validate_line, text.split("\n"))
        text = "\n".join(text)
        
        # Add the brackets to make it a valid json
        text = f"[{text}]"
        
        # Remove the last comma
        text = re.sub(r"\,\s*\]", r"]", text, 1)
        
        # # Remove potential last malformed json elements
        # text = re.sub(r"(\,\s*\{[\w\s\":'\(\)\_\.\,]*\])", r"]", text)

        return json.loads(text)

# for file in txt_files:
#     print(extract_json(file))
    
def natural_sort(list_to_sort): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(list_to_sort.copy(), key=alphanum_key)

In [5]:
TOOLS_FILE = "tools/tools.json"
DATASET_FILES_0 = "datasets/results_prompts_0"
DATASET_FILES_1 = "datasets/results_prompts_1"

In [6]:
from copy import deepcopy

# Open tool json file
with open(TOOLS_FILE, 'r') as json_file:
    tools = json.load(json_file)

def extract_dataset(dataset_files, tools):
    dataset = []

    for file in natural_sort(dataset_files):
        dataset.append(extract_json(file))

    new_dataset = []
    for data, tool in zip(dataset, tools):
        new_entry = tool
        reference = tool['use_cases'][0]['user_request']
        new_data = []
        
        for generation in data:
            scored_generation = generation
            user_request = scored_generation['user_request']
            # print(rouge_score(user_request, reference))
            scored_generation.update({"rouge_score": rouge_score(user_request, reference)})
            new_data.append(scored_generation)

        new_entry.update({"dataset": new_data})
        new_dataset.append(new_entry)

    return deepcopy(new_dataset)

def make_json(data, filename):
    with open(filename, 'w') as outfile:
        json.dump(data, outfile, indent=2)

# print(list(f"{DATASET_FILES_0}/{file}" for file in os.listdir(DATASET_FILES_0)))
dataset_files_0 = [f"{DATASET_FILES_0}/{file}" for file in os.listdir(DATASET_FILES_0)]
dataset_files_1 = [f"{DATASET_FILES_1}/{file}" for file in os.listdir(DATASET_FILES_1)]

dataset_0 = extract_dataset(dataset_files_0, tools)
dataset_1 = extract_dataset(dataset_files_1, tools)

make_json(dataset_0, f"{DATASET_FILES_0}.json")
make_json(dataset_1, f"{DATASET_FILES_1}.json")

print(extract_dataset(dataset_files_1, tools) == extract_dataset(dataset_files_0, tools))


False


True

## Study the datasets with BLUE and ROUGE
1. Load the datasets
2. Compute the BLUE and ROUGE scores
3. Plot the results

In [None]:
# %pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [None]:


# hypothesis = "to make people trustworthy you need to trust them"
# reference = "the way to make people trustworthy is to trust them"



# reference = dataset_0[0]['use_cases'][0]['user_request']
# generation = dataset_0[0]['dataset'][]['user_request']
# print(reference)
# print(generation)

# #  > 0.999999
# print(rouge_score(generation, reference))

0.7058823479584776
Is there a chair in the room?
Is there a sink in the room?
0.8571428521428571


## Study the dataset
1. Load the dataset
2. Compute the number of generation by tool and the mean rouge score (for each tool and for all tools)

In [7]:
def print_dataset_mean_rouge(dataset):
    total_average_rouge_score = 0
    total_number_of_generations = 0

    for data in dataset:    
        classe = data['dataset']
        average_rouge_score = 0

        for generation in classe:
            average_rouge_score += generation['rouge_score']
            
        average_rouge_score /= len(classe)
        total_average_rouge_score += average_rouge_score
        total_number_of_generations += len(classe)
        
        print("Class:", data['tool_name'], "\taverage ROUGE score:", average_rouge_score)

    total_average_rouge_score /= total_number_of_generations
    print("Total average ROUGE score:", total_average_rouge_score)

In [8]:
print_dataset_mean_rouge(dataset_0)

Class: detect_object 	average ROUGE score: 0.38469106173714723
Class: enumerate_objects 	average ROUGE score: 0.3301340027102317
Class: navigation 	average ROUGE score: 0.516528920634861
Class: position 	average ROUGE score: 0.010416666471354169
Class: add_face 	average ROUGE score: 0.05291005201058202
Class: remove_face 	average ROUGE score: 0.06144781029461283
Class: look_for_face 	average ROUGE score: 0.010101009898989903
Class: enumerate_individuals 	average ROUGE score: 0.013935339704892384
Class: age_estimation 	average ROUGE score: 0.15151514904109012
Class: gender_estimation 	average ROUGE score: 0.06593406501567235
Class: emotion_estimation 	average ROUGE score: 0.09256198094665673
Class: colors 	average ROUGE score: 0.32435640436759766
Class: object_color 	average ROUGE score: 0.4510207879933894
Class: ocr 	average ROUGE score: 0.2859504082268971
Class: ocr_objects 	average ROUGE score: 0.2684777295577613
Class: money 	average ROUGE score: 0.17371845877603714
Class: environme

In [9]:
print_dataset_mean_rouge(dataset_1)

Class: detect_object 	average ROUGE score: 0.8560846510871253
Class: enumerate_objects 	average ROUGE score: 0.46048647825218103
Class: navigation 	average ROUGE score: 0.5571747043697814
Class: position 	average ROUGE score: 0.03571428526785715
Class: add_face 	average ROUGE score: 0.4015567715959727
Class: remove_face 	average ROUGE score: 0.4032633982902994
Class: look_for_face 	average ROUGE score: 0.37996031254269425
Class: enumerate_individuals 	average ROUGE score: 0.037037036851851855
Class: age_estimation 	average ROUGE score: 0.36381673412765986
Class: gender_estimation 	average ROUGE score: 0.15120772778003222
Class: emotion_estimation 	average ROUGE score: 0.10148336264821496
Class: colors 	average ROUGE score: 0.60030120898732
Class: object_color 	average ROUGE score: 0.7677018583550037
Class: ocr 	average ROUGE score: 0.7867768545116455
Class: ocr_objects 	average ROUGE score: 0.3409951110237474
Class: money 	average ROUGE score: 0.1476967451334499
Class: environment_desc

### Compute the size of the dataset

In [14]:
def print_dataset_size(dataset):
    total_number_of_elements = 0
    total_per_class = {}

    for data in dataset:    
        classe = data['dataset']
        total_per_class[data['tool_name']] = len(classe)
        total_number_of_elements += len(classe)
    
    print("Total number of elements:", total_number_of_elements)
    print(total_per_class)
    
    # Compute mean
    mean = total_number_of_elements / len(dataset)
    print("Mean:", mean)
    
    # Compute media
    dataset_size = sorted(total_per_class.values())
    if len(dataset_size) % 2 == 0:
        median = (dataset_size[len(dataset_size) // 2] + dataset_size[len(dataset_size) // 2 - 1]) / 2
    else:
        median = dataset_size[len(dataset_size) // 2]
        
    print("Median:", median)

print_dataset_size(dataset_0)
print_dataset_size(dataset_1)

Total number of elements: 510
{'detect_object': 22, 'enumerate_objects': 22, 'navigation': 22, 'position': 24, 'add_face': 21, 'remove_face': 66, 'look_for_face': 22, 'enumerate_individuals': 23, 'age_estimation': 58, 'gender_estimation': 23, 'emotion_estimation': 55, 'colors': 21, 'object_color': 22, 'ocr': 22, 'ocr_objects': 22, 'money': 21, 'environment_description': 22, 'environment_question': 22}
Mean: 28.333333333333332
Median: 22.0
Total number of elements: 455
{'detect_object': 27, 'enumerate_objects': 22, 'navigation': 23, 'position': 21, 'add_face': 24, 'remove_face': 22, 'look_for_face': 24, 'enumerate_individuals': 27, 'age_estimation': 22, 'gender_estimation': 69, 'emotion_estimation': 22, 'colors': 22, 'object_color': 23, 'ocr': 22, 'ocr_objects': 21, 'money': 21, 'environment_description': 22, 'environment_question': 21}
Mean: 25.27777777777778
Median: 22.0


# Select n generation randomly in each class of each dataset

In [205]:
import random
# random.seed(42)
random.seed(None)

def lower_case_first_letter(string):
    return string[0].lower() + string[1:]

def create_an_enumeration(list_of_strings):
    return "\n".join([f"{i+1}. {string}" for i, string in enumerate(list_of_strings)])

def randomly_select_n_generations_for_a_specific_tool(tool, n: int):
    generations = tool['dataset']
    random.shuffle(generations)
    return deepcopy(generations[:n])

def randomly_select_n_generations(dataset, n: int):
    for tool in dataset:
        generations = randomly_select_n_generations_for_a_specific_tool(tool, n)
        list_of_generations = [generation['user_request'] for generation in generations]
        print(f"""The tool '{tool['tool_name']}' should {lower_case_first_letter(tool['description'])}'
{create_an_enumeration(list_of_generations)}""")
        
# randomly_select_n_generations(dataset_0, 5)

def make_description(dataset_a, dataset_b, n: int = 5, filename: str = "form.txt"):
    tools_questions = []
    for tool_a, tool_b in zip(dataset_a, dataset_b):
        random_generations_a = randomly_select_n_generations_for_a_specific_tool(tool_a, n)
        random_generations_b = randomly_select_n_generations_for_a_specific_tool(tool_b, n)
        list_of_generations_a = [generation['user_request'] for generation in random_generations_a]
        list_of_generations_b = [generation['user_request'] for generation in random_generations_b]
        enumerated_generations_a = create_an_enumeration(list_of_generations_a)
        enumerated_generations_b = create_an_enumeration(list_of_generations_b)

        tools_questions.append(f"""The tool '{tool_a['tool_name']}' should {lower_case_first_letter(tool_a['description'])}
In the dataset a, the following user requests were generated for the tool '{tool_a['tool_name']}':

{enumerated_generations_a}

In the dataset b, the following user requests were generated for the tool '{tool_a['tool_name']}':

{enumerated_generations_b}
""")
    with open(filename, 'w') as text_file:
        text_file.write("\n".join(tools_questions))

make_description(dataset_0, dataset_1)