In [8]:
import os
import json
import re

from rouge import Rouge
rouge_scorer = Rouge()

def rouge_score(generation, reference):
    return rouge_scorer.get_scores(
        hyps=generation,
        refs=reference,
    )[0]["rouge-l"]["f"]
# print(rouge_score(hypothesis, reference))

In [9]:
# List all txt files in subdirectories
txt_files = []
for root, dirs, files in os.walk("."):
    for file in files:
        if file.endswith(".txt"):
             txt_files.append(os.path.join(root, file))

print("Number of txt files: ", txt_files)

Number of txt files:  ['.\\datasets\\results_prompts_0\\0.txt', '.\\datasets\\results_prompts_0\\1.txt', '.\\datasets\\results_prompts_0\\10.txt', '.\\datasets\\results_prompts_0\\11.txt', '.\\datasets\\results_prompts_0\\12.txt', '.\\datasets\\results_prompts_0\\13.txt', '.\\datasets\\results_prompts_0\\14.txt', '.\\datasets\\results_prompts_0\\15.txt', '.\\datasets\\results_prompts_0\\16.txt', '.\\datasets\\results_prompts_0\\17.txt', '.\\datasets\\results_prompts_0\\2.txt', '.\\datasets\\results_prompts_0\\3.txt', '.\\datasets\\results_prompts_0\\4.txt', '.\\datasets\\results_prompts_0\\5.txt', '.\\datasets\\results_prompts_0\\6.txt', '.\\datasets\\results_prompts_0\\7.txt', '.\\datasets\\results_prompts_0\\8.txt', '.\\datasets\\results_prompts_0\\9.txt', '.\\datasets\\results_prompts_1\\0.txt', '.\\datasets\\results_prompts_1\\1.txt', '.\\datasets\\results_prompts_1\\10.txt', '.\\datasets\\results_prompts_1\\11.txt', '.\\datasets\\results_prompts_1\\12.txt', '.\\datasets\\results_p

In [25]:
# open the first file

def validate_line(line):
    # Check if the line is a valid json
    try:        
        # Add the brackets to make it a valid json
        line = f"[{line}]"
        
        # Remove the last comma
        line = re.sub(r"\,\s*\]", r"]", line, 1)

        json.loads(line)
        return True
    except:
        return False

def extract_json(filename):
    print(filename)
    with open(filename, 'r') as f:
        text = f.read()
        text = text[max(text.find('['), 0):]

        # Remove the </s> tag, [ and ] from the text
        text = text.replace(r"</s>","")
        text = text.replace(r"[","")
        text = text.replace(r"]","")
        text = text.replace(r"\n","")
        
        # Remove malformed json elements
        text = re.sub(r"(\}\,\s*\w+\s*\{)", r"},\n{", text)
        
        text = filter(validate_line, text.split("\n"))
        text = "\n".join(text)
        
        # Add the brackets to make it a valid json
        text = f"[{text}]"
        
        # Remove the last comma
        text = re.sub(r"\,\s*\]", r"]", text, 1)
        
        # # Remove potential last malformed json elements
        # text = re.sub(r"(\,\s*\{[\w\s\":'\(\)\_\.\,]*\])", r"]", text)

        return json.loads(text)

# for file in txt_files:
#     print(extract_json(file))
    
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key=alphanum_key)

# print(natural_sort(txt_files))

In [12]:
TOOLS_FILE = "tools/tools.json"
DATASET_FILES_0 = "datasets/results_prompts_0"
DATASET_FILES_1 = "datasets/results_prompts_1"

In [38]:
# Open tool json file
with open(TOOLS_FILE, 'r') as json_file:
    tools = json.load(json_file)

def extract_dataset(dataset_files, tools):
    dataset = []

    for file in natural_sort(dataset_files):
        dataset.append(extract_json(file))

    new_dataset = []
    for data, tool in zip(dataset, tools):
        new_entry = tool
        reference = tool['use_cases'][0]['user_request']
        new_data = []
        
        for generation in data:
            scored_generation = generation
            user_request = scored_generation['user_request']
            # print(rouge_score(user_request, reference))
            scored_generation.update({"rouge_score": rouge_score(user_request, reference)})
            new_data.append(scored_generation)

        new_entry.update({"dataset": new_data})
        new_dataset.append(new_entry)

    return new_dataset

def make_json(data, filename):
    with open(filename, 'w') as outfile:
        json.dump(data, outfile, indent=2)

# print(list(f"{DATASET_FILES_0}/{file}" for file in os.listdir(DATASET_FILES_0)))
dataset_files_0 = [f"{DATASET_FILES_0}/{file}" for file in os.listdir(DATASET_FILES_0)]
dataset_files_1 = [f"{DATASET_FILES_1}/{file}" for file in os.listdir(DATASET_FILES_1)]

# dataset_0 = extract_dataset(dataset_files_0, tools)
dataset_1 = extract_dataset(dataset_files_1, tools)

# make_json(dataset_0, f"{DATASET_FILES_0}.json")
make_json(dataset_1, f"{DATASET_FILES_1}.json")

# print(extract_dataset(dataset_files_1, tools) == extract_dataset(dataset_files_0, tools))


datasets/results_prompts_1/0.txt
datasets/results_prompts_1/1.txt
datasets/results_prompts_1/2.txt
datasets/results_prompts_1/3.txt
datasets/results_prompts_1/4.txt
datasets/results_prompts_1/5.txt
datasets/results_prompts_1/6.txt
datasets/results_prompts_1/7.txt
datasets/results_prompts_1/8.txt
datasets/results_prompts_1/9.txt
datasets/results_prompts_1/10.txt
datasets/results_prompts_1/11.txt
datasets/results_prompts_1/12.txt
datasets/results_prompts_1/13.txt
datasets/results_prompts_1/14.txt
datasets/results_prompts_1/15.txt
datasets/results_prompts_1/16.txt
datasets/results_prompts_1/17.txt


True

## Study the datasets with BLUE and ROUGE
1. Load the datasets
2. Compute the BLUE and ROUGE scores
3. Plot the results

In [None]:
# %pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [None]:


# hypothesis = "to make people trustworthy you need to trust them"
# reference = "the way to make people trustworthy is to trust them"



# reference = dataset_0[0]['use_cases'][0]['user_request']
# generation = dataset_0[0]['dataset'][]['user_request']
# print(reference)
# print(generation)

# #  > 0.999999
# print(rouge_score(generation, reference))

0.7058823479584776
Is there a chair in the room?
Is there a sink in the room?
0.8571428521428571


## Study the dataset
1. Load the dataset
2. Compute the number of generation by tool and the mean rouge score (for each tool and for all tools)

In [22]:
total_average_rouge_score = 0
total_number_of_generations = 0

for data in dataset_1:    
    print("Classe:", data['tool_name'])
    classe = data['dataset']
    average_rouge_score = 0

    for generation in classe:
        average_rouge_score += generation['rouge_score']
        
    average_rouge_score /= len(classe)
    total_average_rouge_score += average_rouge_score
    total_number_of_generations += len(classe)
    
    print("Average ROUGE score:", average_rouge_score)
    print("\n")

total_average_rouge_score /= total_number_of_generations
print("Total average ROUGE score:", total_average_rouge_score)

Classe: detect_object
Average ROUGE score: 0.8560846510871253


Classe: enumerate_objects
Average ROUGE score: 0.46048647825218103


Classe: navigation
Average ROUGE score: 0.5571747043697814


Classe: position
Average ROUGE score: 0.03571428526785715


Classe: add_face
Average ROUGE score: 0.4015567715959727


Classe: remove_face
Average ROUGE score: 0.4032633982902994


Classe: look_for_face
Average ROUGE score: 0.37996031254269425


Classe: enumerate_individuals
Average ROUGE score: 0.020833333138020837


Classe: age_estimation
Average ROUGE score: 0.36381673412765986


Classe: gender_estimation
Average ROUGE score: 0.15120772778003222


Classe: emotion_estimation
Average ROUGE score: 0.10148336264821496


Classe: colors
Average ROUGE score: 0.60030120898732


Classe: object_color
Average ROUGE score: 0.7677018583550037


Classe: ocr
Average ROUGE score: 0.7867768545116455


Classe: ocr_objects
Average ROUGE score: 0.3409951110237474


Classe: money
Average ROUGE score: 0.1476967451