In [45]:
import json
import re
from typing import Tuple
from typing import List

class OutputCleaner():
    def __init__(self, verbose=False) -> None:
        self.verbose = verbose
  
    def _remove_space_from_dict_keys(self, model_ouput_list: list) -> list:
        """
        Remove the spaces from the keys of a dictionary. E.g., [{"entity ": "value"}] -> [{"entity": "value"}]

        Args:
        model_ouput_list (dict): the list of dictionaries to be cleaned

        return:
        list: the cleaned list of dicts
        """
        out = []
        for dict in model_ouput_list:
            # print('DICT: ', dict)
            out.append({k.replace(' ', ''): v for k, v in dict.items()})
        return out
    
    def _drop_duplicates(self, model_response: list) -> str:
        """
        Drop the duplicates from a list. This is useful when the model output contains the same entity multiple times.

        Args:
        model_response (str): the model response with no duplicates
        """
        # print('DROPPING DUPLICATES: ', model_response)
        try :
            return list({v['entity']:v for v in model_response}.values())
        except Exception as error:
            model_response = self._remove_space_from_dict_keys(model_response)
            print('ERROR: ', model_response)
            return list({v['entity']:v for v in model_response}.values())
        
    def _assess_model_output(self, model_response: str) -> bool:
        """
        Check if the model output is in the right format. If not, return False.
        
        Args:
        model_output (str): the postprocessed model output after beeing passed to _postprocess_model_output()

        return:
        bool: True if the format is correct, False otherwise
        """
        good_format = True
        try :
            res = json.loads(model_response)
            # print( res)
        except:
            good_format = False
        return good_format

            
    def _remove_json_special_chars(self, string):
        """
        Remove the special characters from a string. This is useful when the model output contains special characters that are not allowed in the json format.
        """
        # print('sto pulendo: ', string)
        chars = ['\xa0', '\x80', '\x93', '\U00100000', '\r\n', '\U00100000I', '\\u001d', '\\"']
        if '\u001d' in string:
            print('ECCOLO')
        for char in chars:
            string = string.replace(char, ' ')
        char_no_space = ['\xad']
        for char in char_no_space:
            string = string.replace(char, '')
        string = string.replace('\\u0010', '^')
        return string
    
    def _special_cases_handler(self, model_response: str) -> str:
        """
        Handle the special cases in the model output. This is useful when the model output contains special characters that are not allowed in the json format.
        Ideally, this function should not be used. It is very specific for encountered issues I could not find a solution to.
        """
        model_response = model_response.replace(""" {"entity":"un\'insufficienza midollare\\" \\"- congenita"},""", "").\
            replace("""l\'aspetto\\"anteriorpuntale""", """l'aspetto anteriorpuntale""").\
            replace('{"jaundice": false, "vomiting": false, "complaints": false, "bleeding": false, "site": [], "complaints": []} ', '{"entity":"jaundice", "entity":"vomiting", "entity":"complaints", "entity":"bleeding", "entity":"site", "entity":"complaints"} ')
        model_response = model_response.replace("""rigonfiamento aneurismatico dell'apice del ventricolo sinistro\\\"""", """""")
        return model_response
    
    def _clean_ground_truth(self, example: dict) -> dict:
        ground_truth = example['ground_truth']
        # print('inner ground truth: ', ground_truth)
        ground_truth = self._remove_json_special_chars(ground_truth)
        ground_truth = ground_truth.replace('</s>', '').replace('<|im_e', '')
        if ground_truth.strip() == ']':
            ground_truth = '[]'
        # print('mid ground truth: ', ground_truth)
        return({'ground_truth': ground_truth})

    def _clean_model_output(self, example: dict,  wrong_keys_to_entity:bool, latest_version:bool=True) -> dict:
        """
        Postprocess the model output to return a json like formatted string that can be used to compute the F1 score.

        Args:
        model_output (str): the model output as it is returned by the model. The processing of the output is done in the function
        wrong_keys_to_entity (bool): if True, the function also extracts the dictionaries with keys different from 'entity', converting the keys into 'entity'. If not, all keys that are not 'entity' are dropped

        return:
        dict: the model response

        """
        def has_unclosed_square_brackets(s:str)  -> bool:
            count = 0
            for char in s:
                if char == '[':
                    count += 1
                elif char == ']':
                    count -= 1
                    if count < 0:
                        return True
            return count > 0
        
        def has_unopen_square_brackets(s:str)  -> bool:
            count = 0
            for char in s:
                if char == '[':
                    count -= 1
                elif char == ']':
                    count += 1
                    if count > 0:
                        return True
            return count > 0
        
        def is_empty_list(string:str)  -> bool:
            if string=='[]':
                return True
            return False
        
        def is_list_of_lists(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, list) for item in tmp):
                    return True
            return False
        
        def is_list_of_dicts(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    return True
            return False
        
        def is_list_of_lists_and_dict(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                found_dict = False
                found_list = False
                for element in tmp:
                    if isinstance(element, list):
                        found_list = True
                    elif isinstance(element, dict):
                        found_dict = True
                    if found_list and found_dict:
                        return True
            return False
        
        def is_list_of_strings(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, str) for item in tmp):
                    return True
            return False

        def is_list_of_empty_dict(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                #print('TMP: ', tmp)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    if all(str(item) == "{}" for item in tmp):
                        return True
            return False

        def is_list_with_one_empty_dict(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list):
                    for item in tmp:
                        if item == {}:
                            return True
            return False
        
        def is_list_of_dicts_with_empty_lists(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        for v in item.values():
                            if v == []:
                                return True
            return False
        
        def is_list_of_dicts_with_one_key_multiple_values(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        if len(item) == 1 and len(item.values()) > 1:
                            return True
            return False

        def is_list_of_dicts_with_multiple_keys_included_entity(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        if len(item) > 1 and 'entity' in item.keys():
                            return True
            return False

        def is_list_of_dict_numeric_values(string:str)  -> bool:
            print('STRING is_list_of_dict_numeric_values: ', string)
            if self._assess_model_output(string):
                tmp = json.loads(string)
                #print('TMP: ', tmp)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        print(print('22 STRING is_list_of_dict_numeric_values: ', item))
                        if len(item.values()) > 0:
                            val = list(item.values())[0] 
                            if isinstance(val, int) or isinstance(val, float):
                                return True
            return False
        
        def is_list_of_dict_bool_values(string:str) -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                #print('TMP: ', tmp)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        if len(item.values()) > 0:
                            val = list(item.values())[0] 
                            if isinstance(val, bool) :
                                return True
            return False
        
        def is_list_of_dicts_none_values(string:str) -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        if len(item.values()) > 0:
                            val = list(item.values())[0] 
                            if val is None:
                                return True
            return False

        def is_list_of_dicts_and_strings(string:str)  -> bool:
            if self._assess_model_output(string):
                #print('ASSESSED')
                tmp = json.loads(string)
                found_dict = False
                found_string = False
                for element in tmp:
                    if isinstance(element, str):
                        found_string = True
                    elif isinstance(element, dict):
                        found_dict = True
                    if found_string and found_dict:
                        return True
            return False
        
        def is_list_of_dicts_and_lists_of_strings(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                # print('TMP: ', tmp)
                if isinstance(tmp, list):
                    if all(isinstance(item, dict) for item in tmp):
                        return False
                    for item in tmp:
                        print('ITEM: ', item)
                        if isinstance(item, dict):
                            
                            if len(item.values()) == 0:
                               return False
                            if item.get('entity') is None:
                                return False
                        elif isinstance(item, list):
                            if len(item) != 1:
                                return False
                            if not isinstance(item[0], str):
                                return False
                        else:
                            return False
                    return True
            return False
        
        def is_list_of_dicts_with_value_list(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        for v in item.values():
                            if isinstance(v, list):
                                return True
            return False
        
        def is_string(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, str):
                    return True
            return False
        
        def is_list_of_strings_representing_dicts(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                # print('TMP: ', tmp)
                if isinstance(tmp, list) and all(isinstance(item, str) for item in tmp):
                    tmp_list = []
                    for item in tmp:
                        print('ITEM: ', item)
                        if self._assess_model_output(item):
                          tmp_list.append(json.loads(item))
                    if all(isinstance(item, dict) for item in tmp_list):
                        return True
            return False
        
        def is_list_of_dicts_of_lists(string:str)  -> bool:
            # print('STRING: ', string)
            if self._assess_model_output(string):
                tmp = json.loads(string)
                # print('TMP: ', tmp)
                if isinstance(tmp, list) and all(isinstance(item, dict) for item in tmp):
                    for item in tmp:
                        # print('item: ',item)
                        tmp2 = list(item.values())[0]
                        if len(tmp2) > 0:
                            if isinstance(list(item.values())[0], list):
                                return True
            return False
        
        def is_numeric(string:str)  -> bool:
            if self._assess_model_output(string):
                tmp = json.loads(string)
                if isinstance(tmp, (int, float)):
                    return True
            return False
        
        def are_entities_extracted_as_dict_keys_instead_of_values(string:str, example:dict) -> bool:
            if is_list_of_dicts(string):
                tmp = json.loads(string)
                keys = [key for item in tmp for key in item.keys()]
                if 'entity' not in keys:
                    if all(entity in example['sentence'] for entity in keys):
                        return True
            return False
        
        
        
        def convert_wrong_keys_into_entity(string:str) -> List[str]:
            if is_list_of_dicts(string):
                tmp = json.loads(string)
                tmp = [str({"entity":v}) for el in tmp for v in el.values()]
                return tmp
            else:
                return []


        def only_dicts_with_key_entity(string:str, wrong_keys_to_entity:bool) -> Tuple[bool, str]:
            """
            Extract only the dictionaries with the key 'entity' in the list of dictionaries in the string
            
            Args:
            string (str): the string to be cleaned
            wrong_keys_to_entity (bool): if True, the function also extracts the dictionaries with keys different from 'entity', converting the keys into 'entity'
            """
            els_between_curly = re.findall(r'\{(.+?)\}', string)
            clean = [el for el in els_between_curly if el.startswith('"entity"') or el.startswith("'entity'")]
            clean = ['{' + el + '}' for el in clean]
            dirty = []
            if wrong_keys_to_entity:
                dirty = [el for el in els_between_curly if (not el.startswith('"entity"')) and (not el.startswith("'entity'"))]
                dirty = ['{' + el + '}' for el in dirty]
                dirty = '[' + ', '.join(dirty) + ']'
                cleaned_dirty = convert_wrong_keys_into_entity(dirty)
                out = '[' + ', '.join(clean) + ', '.join(cleaned_dirty) +  ']'
            else:
                out = '[' + ', '.join(clean) + ']'
            # out = out.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(" \'", " \"")
            operations_performed = False
            if len(clean) != len(els_between_curly):
                operations_performed = True
            if is_empty_list(out):
                return operations_performed, '[{"entity":""}]'
            return operations_performed, str(out)
        
        # print('EXAMPLE:  ', example['model_responses'])
        model_output = example['model_responses']
        model_output = self._special_cases_handler(model_output)
        if self.verbose: print('ORIGINAL MODEL OUTPUT:', model_output)
        if self.verbose: print('GROUND TRUTH: ', example['ground_truth'])
        # model_output = self._exceptions_handler(model_output)
    
        if model_output is None or is_empty_list(model_output):
            return {'model_output':'[{"entity":""}]'}
        
        model_output = self._remove_json_special_chars(model_output)
        if self.verbose:print('PULITO: ', model_output)
                
        if are_entities_extracted_as_dict_keys_instead_of_values(model_output, example):
            if self.verbose: print('ENTITIES EXTRACTED AS DICT KEYS INSTEAD OF VALUES')
            tmp = json.loads(model_output)
            tmp = [{"entity":k} for el in tmp for k in el.keys() ]
            tmp = str(tmp)
            return {'model_output':tmp}
        
        if is_list_of_dicts_and_lists_of_strings(model_output):
            if self.verbose: print('is_list_of_dicts_and_lists_of_strings')
            tmp = json.loads(model_output)
            out = []
            for item in tmp:
                if self.verbose: print('ITEM: ', item)
                if isinstance(item, dict):
                    out.append(item)
                elif isinstance(item, list):
                    out.append({"entity":item[0]})
            return {'model_output':str(out)}

        if is_numeric(model_output):
            # print('IS NUMERIC')
            return {'model_output':'[{"entity":""}]'}

        # print('QUI HO QUESTO: ', model_output)
        if is_list_of_strings_representing_dicts(model_output):
            if self.verbose: print('is_list_of_strings_representing_dicts 1')                
            tmp = json.loads(model_output)
            tmp_list = []
            for item in tmp:
                if self._assess_model_output(item):
                  tmp_list.append(json.loads(item))
            if self.verbose: print('TEMPOOOO 2 ',tmp)
            return {'model_output':str(tmp_list)}
        
        if is_list_of_dicts_with_one_key_multiple_values(model_output):
            if self.verbose: print('is_list_of_dicts_with_one_key_multiple_values')
            tmp = json.loads(model_output)
            tmp = [{"entity":v[0]} for el in tmp for v in el.values()]
            return {'model_output':str(tmp)}
       
        if is_list_of_dicts_with_multiple_keys_included_entity(model_output):
            if self.verbose: print('is_list_of_dicts_with_multiple_keys_included_entity')
            tmp = json.loads(model_output)
            out = []
            for item in tmp:
                out.append({"entity":item['entity']})
            return {'model_output':str(out)}
        
        
        if is_list_of_lists_and_dict(model_output):
            if self.verbose: print('is_list_of_lists_and_dict')
            tmp = json.loads(model_output)
            for el in tmp:
                if isinstance(el, list):
                    tmp = str(el)
                    # print('is_list_of_lists_and_dict')
                    return {'model_output':tmp}
                
        if is_list_of_lists(model_output):
            if self.verbose: print('is_list_of_lists')
            tmp = json.loads(model_output)
            tmp2 = str(tmp[0]).replace("'", "\"")
            if is_list_of_dicts_and_strings(tmp2):
                tmp = tmp[0]
                out = [item for item in tmp if isinstance(item, dict)]
                return {'model_output':str(out)} 
            tmp = str(tmp[0])
            return {'model_output':tmp}
        

        if is_list_of_strings(model_output):
            if self.verbose: print('is_list_of_strings')
            tmp = json.loads(model_output)
            tmp = [{"entity":el} for el in tmp]
            tmp = str(tmp)
            # print('is_list_of_strings')
            if self.verbose: print('TEMPOOOO ',tmp)
            return {'model_output': tmp}
        
        if is_string(model_output):
            # model_output = model_output.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(" \'", " \"")
            if self.verbose: print('PULO: ', model_output)
            tmp = json.loads(model_output)
            if all(el in tmp for el in ['{', 'entity', '}']):
                return {'model_output':tmp}
            tmp = [{"entity":tmp}]
            tmp = str(tmp)
            #print('is_string')
            return {'model_output':tmp}

        
        if latest_version:
            model_output = self._extract_text_between_curl_brackets(model_output)
            model_output = self._clean_text_between_curl_brackets(model_output)

            # print('QUI HO il SECONDO QUESTO: ', model_output)

            if is_list_of_strings_representing_dicts(model_output):
                if self.verbose: print('is_list_of_strings_representing_dicts 2')                
                tmp = json.loads(model_output)
                tmp_list = []
                for item in tmp:
                    if self._assess_model_output(item):
                        tmp_list.append(json.loads(item))
                return {'model_output':str(tmp_list)}
            
            if is_list_of_dicts_with_one_key_multiple_values(model_output):
                if self.verbose: print('is_list_of_dicts_with_one_key_multiple_values')
                tmp = json.loads(model_output)
                tmp = [{"entity":v[0]} for el in tmp for v in el.values()]
                return {'model_output':str(tmp)}
            
            if is_list_of_dicts_and_lists_of_strings(model_output):
                if self.verbose: print('is_list_of_dicts_and_lists_of_strings')
                tmp = json.loads(model_output)
                out = []
                for item in tmp:
                    # print('ITEM: ', item)
                    if isinstance(item, dict):
                        out.append(item)
                    elif isinstance(item, list):
                        out.append({"entity":item[0]})
                return {'model_output':str(out)}
            
            if self.verbose: print('QUI HO il TEERZO QUESTO: ', model_output)

            if is_list_of_dicts_with_empty_lists(model_output):
                if self.verbose: print('is_list_of_dicts_with_empty_lists')
                tmp = json.loads(model_output)
                tmp = [{"entity":v} for el in tmp for v in el.values() if v != []]
                # print('TMP: ', tmp)
                if is_list_of_dicts_with_value_list(str(tmp)):# .replace("'", "\"")):
                    if self.verbose: print('is_list_of_dicts_with_value_list')
                    tmp = [{"entity":v} for el in tmp for v in el.values() if not isinstance(v, list)]
                    tmp2 = [{"entity":v[0]} for el in tmp for v in el.values() if isinstance(v, list)]
                    # print('returning this: ', {'model_output ':str(tmp2)}  )
                    return {'model_output':str(tmp2)}
                # print('returning this: ', {'model_output ':str(tmp)}  )

                return {'model_output':str(tmp)}
            
            if self.verbose: print('QUI HO il QUARTO QUESTO:', model_output)

            if is_list_of_dicts_with_value_list(model_output):
                if self.verbose: print('is_list_of_dicts_with_value_list')
                tmp = json.loads(model_output)
                tmp = [{"entity":v} for el in tmp for v in el.values() if not isinstance(v, list)]
                tmp2 = [{"entity":v[0]} for el in tmp for v in el.values() if isinstance(v, list)]
                return {'model_output':str(tmp)}

            if is_list_of_dict_numeric_values(model_output):
                if self.verbose: print('is_list_of_dict_int_values')
                tmp = json.loads(model_output)
                tmp = [str({"entity":str(v)}) for el in tmp for v in el.values()]
                model_output = str(tmp)
            
            if is_list_of_dict_bool_values(model_output):
                if self.verbose: print('is_list_of_dict_bool_values')
                tmp = json.loads(model_output)
                tmp = [str({"entity":str(v)}) for el in tmp for v in el.values()]
                model_output = str(tmp)
            
            if is_list_of_dicts_none_values(model_output):
                if self.verbose: print('is_list_of_dicts_none_values')
                tmp = json.loads(model_output)
                tmp = [str({"entity":v}) for el in tmp for v in el.values() if v is not None]
                model_output = str(tmp)
                    
            if is_list_of_empty_dict(model_output):
                if self.verbose: print('is_list_of_empty_dict')
                return {'model_output':'[{"entity":""}]'}
            
            if is_list_with_one_empty_dict(model_output):
                if self.verbose: print('is_list_with_one_empty_dict')
                tmp = json.loads(model_output)
                tmp = [el for el in tmp if el != {}]
                model_output = tmp
                return {'model_output':str(model_output)}
            
            if is_list_of_dicts_of_lists(model_output):
                if self.verbose: print('is_list_of_dicts_of_lists')
                tmp = json.loads(model_output)
                tmp = [{"entity":v} for el in tmp for v in el.values() if not isinstance(v, list)]
                # tmp.extend([{"entity":el.values()[0]} for el in tmp if isinstance(el.values(), list)])
                # print('returning this: ', {'model_output ':str(tmp)}  )
                return {'model_output':str(tmp)}  
                
            if self.verbose: print('CLEANED: ', model_output)
            cleaning_done, cleaned_model_output = only_dicts_with_key_entity(model_output, wrong_keys_to_entity=wrong_keys_to_entity)
            if cleaning_done:
                model_output = cleaned_model_output
            
            if is_list_of_dicts(model_output):
                if self.verbose: print('PRE CLEANED: ', model_output)
                tmp = json.loads(model_output)
                return {'model_output':str(tmp)}
            
            else: 
                # print('NOT CLEANED: ', model_output, '\n\n')
                return {'model_output':'[{"entity":""}]'}
        
            
    def _exceptions_handler(self, model_output: str, error) -> str:
        # if hasattr(error, 'msg'):
        #     if error.msg.startswith('Expecting property name enclosed in double quotes'):
        #         model_output = model_output.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(": \'", ": \"")
        
        try:
            json.loads(model_output)
        except Exception as error:
            if isinstance(error, json.decoder.JSONDecodeError):
                #if error.msg == "Expecting ',' delimiter":
                key_part, value_part = model_output.split(': ', 1)
                first_occurrence = value_part.find('"')
                last_occurrence = value_part.rfind('"')
                model_output = key_part + ': "' + value_part[first_occurrence+1:last_occurrence].replace("'", r'\'') + '"' + '}'
        return model_output
    # .replace("\'", " ")
    
    def _substitute_apexes(self, model_output: str) -> str:
        model_output = model_output.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(": \'", ": \"")
        return model_output
    
    
    def _extract_text_between_curl_brackets(self, model_output: str) -> str:
        """
        Extract the text between the curl brackets of the model output, as enities are usually outputted in this format: {"entity": "value"}

        Args:
        model_output (str): the example from the dataset

        """
        text_between_curl_brackets = re.findall(r'\{(.+?)\}', model_output)
        cleaned_output = ['{'+ el +'}' for el in text_between_curl_brackets]
        cleaned_output = '[' + ', '.join(cleaned_output) + ']'
        return cleaned_output
    

    def _clean_text_between_curl_brackets(self, text_between_curl_brackets: str) -> str:
        """
        Clean the text between the curl brackets of the model output, as entities are usually outputted in this format: {"key": "value"}

        Args:
        model_output (str): the example from the dataset

        """
        text_between_curl_brackets = re.sub(r'",(.+?)}', r'"}', text_between_curl_brackets)
        text_between_curl_brackets = re.sub(r'{},', r'', text_between_curl_brackets)
        text_between_curl_brackets = re.sub(r',{}', r'', text_between_curl_brackets)
        # print('CLEANED: ', text_between_curl_brackets)
        # text_between_curl_brackets = re.sub(r'\{"entity":\[\]\},', r'', text_between_curl_brackets)
        # text_between_curl_brackets = re.sub(r',{\'entity\':[]}', r'', text_between_curl_brackets)
        return text_between_curl_brackets
    
    def apply_cleaning(self, data, wrong_keys_to_entity) -> None:
        """
        Apply the cleaning to the model output and return the cleaned response in a new cloumn called 'model_output

        Args:
        model_output (str): the model output as it is returned by the model. The processing of the output is done in the function
        wrong_keys_to_entity (bool): if True, the function also extracts the dictionaries with keys different from 'entity', converting the keys into 'entity'. If not, all keys that are not 'entity' are dropped

        return:
        str: the model response, i.e. the model output without the instruction
        """
        data = data.filter(lambda example: example["entities"] is not None)
        data = data.map(lambda x: self._clean_ground_truth(x), remove_columns=['ground_truth'])
        data = data.map(lambda x: self._clean_model_output(x, wrong_keys_to_entity)) 
        return data

In [46]:
from datasets import Dataset
# from utils.evaluator import Evaluator
# from utils.output_cleaner import OutputCleaner
# [, , 'data/mistral/8bit_FT/maxNewTokensFactor8_nShotsInference2_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_16_32_0.01_4_0.0008.csv', 'data/mistral/8bit_FT/maxNewTokensFactor8_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_32_32_0.05_4_0.0002.csv', 'data/mistral/8bit_FT/maxNewTokensFactor8_nShotsInference2_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_32_32_0.05_8_0.0002.csv', 'data/mistral/8bit_FT/maxNewTokensFactor8_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_32_32_0.01_4_0.0002.csv', 'data/mistral/8bit_FT/maxNewTokensFactor4_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_16_32_0.05_4_0.0008.csv', 'data/mistral/8bit_FT/maxNewTokensFactor4_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_16_32_0.01_4_0.0002.csv', 'data/mistral/8bit_FT/maxNewTokensFactor4_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_32_32_0.01_4_0.0002.csv']
file =  'data/mistral/8bit_FT/maxNewTokensFactor8_nShotsInference0_mistral-7b-instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_64_32_0.01_8_0.0002.csv'
eval_data = Dataset.from_csv(file) 
#display(eval_data.to_pandas().head(3))
output_cleaner = OutputCleaner(verbose=True)
similar_is_equal = True
similar_is_equal_threshold = 100
cleaned_data = output_cleaner.apply_cleaning(eval_data, wrong_keys_to_entity=False) #.select(range(12,13))
evaluator = Evaluator(data=cleaned_data, offset=False, output_cleaner=output_cleaner)
evaluator.generate_evaluation_table(similar_is_equal_threshold=similar_is_equal_threshold,
                                    words_level=True, similarity_types=['case', 'stop_words', 'subset', 'superset', 'leveshtein'])

Map: 100%|██████████| 681/681 [00:00<00:00, 15809.25 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4402.93 examples/s]


ORIGINAL MODEL OUTPUT: ][{"entity": "dyslipidemia"}, {"entity": "diabetes mellitus"}, {"entity": "hypokalemia"}, {"entity": "new-onset diabetes mellitus"}, {"entity": "A 46-year-old man"}] 
  Based on the text, the entities contained in the text are dyslipidemia, diabetes mellitus, hypokalemia and A 46-year-old man.</s></s></s></s>
GROUND TRUTH:  [{"entity": "hypertension"}, {"entity": "dyslipidemia"}, {"entity": "diagnosed"}, {"entity": "mellitus"}, {"entity": "referred"}, {"entity": "hypokalemia"}, {"entity": "new-onset diabetes mellitus"}, {"entity": "A 46-year-old man"}, {"entity": "4-months"}, {"entity": "1-month"}] 
PULITO:  ][{"entity": "dyslipidemia"}, {"entity": "diabetes mellitus"}, {"entity": "hypokalemia"}, {"entity": "new-onset diabetes mellitus"}, {"entity": "A 46-year-old man"}] 
  Based on the text, the entities contained in the text are dyslipidemia, diabetes mellitus, hypokalemia and A 46-year-old man.</s></s></s></s>
QUI HO il TEERZO QUESTO:  [{"entity": "dyslipidemi

JSONDecodeError: Expecting value: line 1 column 14 (char 13)

In [41]:
eval_data[201]

{'sentence': 'On the other side there was no history of jaundice, vomiting, urinary or bowel complaints, bleeding from any site or neurological complaints.',
 'entities': "[{'id': '9553', 'offsets': array([31, 38]), 'role': '', 'semantic_type_id': '', 'text': 'history', 'type': 'EVENT'}\n {'id': '9568', 'offsets': array([42, 50]), 'role': '', 'semantic_type_id': '', 'text': 'jaundice', 'type': 'EVENT'}\n {'id': '9583', 'offsets': array([52, 60]), 'role': '', 'semantic_type_id': '', 'text': 'vomiting', 'type': 'EVENT'}\n {'id': '9598', 'offsets': array([79, 89]), 'role': '', 'semantic_type_id': '', 'text': 'complaints', 'type': 'EVENT'}\n {'id': '9613', 'offsets': array([91, 99]), 'role': '', 'semantic_type_id': '', 'text': 'bleeding', 'type': 'EVENT'}\n {'id': '9628', 'offsets': array([130, 140]), 'role': '', 'semantic_type_id': '', 'text': 'complaints', 'type': 'EVENT'}\n {'id': '10788', 'offsets': array([42, 50]), 'role': '', 'semantic_type_id': 'C0022346', 'text': 'jaundice', 'type'

In [53]:
"""[{'entity': 'vomiti'}, {'entity': 'coliche'}, {'entity': 'inappetenza'}, {'entity': 'vomiti'}, {'entity': 'vomiti'}, {'entity': 'coliche addominali'}, {'entity': 'cira una settimana'}, {'entity': 'vistoso calo ponderale (4 kg" in un\'unica settimana)'}, {'entity': 'Il ragazzo'}]"""[231]

'n'

In [50]:
string = """[{"entity": "vistoso calo ponderale (4 kg in un\'unica settimana)"}, {"entity": "Il ragazzo"}]"""

#string = string.replace("\\'", "'")
data = json.loads(string)
# Replace single quotes with double quotes
print(string)
# Load the string as a Python object using json.loads()

[{"entity": "vistoso calo ponderale (4 kg in un'unica settimana)"}, {"entity": "Il ragazzo"}]


In [84]:
string = 'This is a "test" string with "quotes" and "{curly braces}.'

# Replace '"' followed or preceded by any character that is not a curly brace with "'"
string = re.sub(r'(?<![{}])"(?![{}])', "'", string)
print(string)

This is a 'test' string with 'quotes' and "{curly braces}.


In [4]:
import json
import re
import pandas as pd
from datasets import Dataset        
from fuzzywuzzy import fuzz


class Evaluator():

    def __init__(self,  data: Dataset, offset:bool, output_cleaner) -> None:
        self.offset = offset
        self.data = data
        self.cleaner = output_cleaner
        pass

    def _change_apexes(self, model_output: str) -> str:
        """
        Extract the text between the curl brackets of the model output, and change the apexes from double single \'.

        Args:
        model_output (str): the example from the dataset

        """
        text_between_curl_brackets = re.findall(r'\{(.+?)\}', model_output)
        cleaned_output = '['
        for el in text_between_curl_brackets:
            key_part, value_part = el.split(': ', 1)
            first_occurrence = value_part.find('"')
            last_occurrence = value_part.rfind('"')
            tmp = '{' + key_part + ': "' + value_part[first_occurrence+1:last_occurrence].replace('"', "'") + '"' + '}'
            cleaned_output += tmp + ', '
        cleaned_output = cleaned_output[:-2] + ']'
        return cleaned_output
    
    def _assess_model_output(self, model_response: str) -> (bool, str):
        """
        Check if the model output is in the right format. If not, return False.
        
        Args:
        model_output (str): the postprocessed model output after beeing passed to _postprocess_model_output()

        return:
        bool: True if the format is correct, False otherwise
        str: the model output in the adjusted format
        """
        good_format = True
        if self.cleaner.verbose: print('prima sostituz: ', model_response)
        model_response = model_response.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(": \'", ": \"")
        model_response = re.sub(r'(?<=[a-zA-Z])"(?=[a-zA-Z])', "'", model_response)
        if self.cleaner.verbose: print('dopo sostituz: ', model_response)
        try :
            out = json.loads(model_response)
            if isinstance(out, dict):
                model_response = '[' + model_response + ']'
        except Exception as error:
            if hasattr(error, 'msg'):
                if error.msg.startswith('Expecting property name enclosed in double quotes'):
                    model_response = model_response.replace("{\'", "{\"").replace("\'}", "\"}").replace("\'ent", "\"ent").replace("ty\'", "ty\"").replace(": \'", ": \"")
                    try:
                        out = json.loads(model_response)
                        if isinstance(out, dict):
                            model_response = '[' + model_response + ']'
                            good_format = True
                    except Exception as error2:
                        if isinstance(error2, json.decoder.JSONDecodeError):
                            if error2.msg == "Expecting ',' delimiter":
                                model_response = self._change_apexes(model_response)
                                good_format = True
            else:
                #print('MODEL RESPNSE: ', model_response)
                good_format = False
        if not good_format:
            model_response = re.findall(r'\[\{(.+?)\}\]', model_response)
            if len(model_response) != 0:
                model_response = '[{' + model_response[0] + '}]'
                good_format = True
                try :
                    json.loads(model_response)
                except Exception as error:
                    good_format = False
            else:
                good_format = False
        return good_format, model_response

    def _parse_json(self, model_response: str, drop_duplicates: bool = True) -> dict:
        """
        Parse the model output to extract the entities and their offsets if present.
        
        Args:
        model_response (str): the model response 
        drop_duplicates (bool): if True, drop the duplicates in the model response
        """
        # print('MODEL RESPONSE 1: ', model_response)
        model_response = model_response.replace("\n", " ")

        good_format, model_response = self._assess_model_output(model_response)

        # print('MODEL RESPONSE 2: ', model_response)        
        # print('MODEL RESPONSE 3: ', model_response)
        if model_response == []:
            model_response = '[{"entity":""}]'
        if self.offset and good_format:
            output = json.loads(model_response)
            if drop_duplicates:
                output = self.cleaner._drop_duplicates(output)
            entities = [entity["entity"] for entity in output]
            offsets = [entity["offset"] for entity in output]
            return {"entities": entities, "offsets": offsets}
        if (not self.offset) and good_format:

            # print('MODEL RESPONSE 4: ', model_response)
            # print('ORA STO PARSANDO: ', model_response)
            output = json.loads(model_response)
            # print('OUTPUT: ', type(output))
            if drop_duplicates:
                output = self.cleaner._drop_duplicates(output)
            entities = [entity["entity"] for entity in output]
            # print('ENTITIES: ', entities)
            return {"entities": entities}
        if not good_format:
            return {"entities": []}
    
    def _count_common_words(self, string1: str, string2: str) -> int:
        """
        Count the number of common words between two entities without considering repetition.

        Args:
        string1 (str): an entity in the model response
        string2 (str): an entity in the ground truth

        return:
        int: the number of common words
        """
        model_words = set(string1.lower().split())
        ground_truth_words = set(string2.lower().split())
        common_words = model_words.intersection(ground_truth_words)
        return len(common_words)
        
    def _entity_similar_to_ground_truth_entity_LowerUppercase(self, entity_in_model_response: str, entity_in_ground_truth: str) -> (bool, str):
        """
        Check if two entities are similar, i.e. if the difference is just a fact of being upper or lower case.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth
        threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise

        """
        FP_words = 0
        FN_words = 0
        TP_words = 0
        if entity_in_model_response.lower() == entity_in_ground_truth.lower():
            # print('SIMILI CASE: ', entity_in_model_response, ' e ', entity_in_ground_truth)
            TP_words = len(entity_in_ground_truth.split())
            return True, entity_in_ground_truth, FP_words, FN_words, TP_words
        return False, entity_in_model_response, FP_words, FN_words, TP_words
    
    def _entity_similar_to_ground_truth_entity_StopWords(self, entity_in_model_response: str, entity_in_ground_truth: str) -> (bool, str):
        """
        Check if two entities are similar, i.e. if the difference is just a stop words (e.g., "the" or "a"). Everything is performend in lower case.
        This is useful when the model output is not exactly the same as the ground truth.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth
        threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
        """
        def __preprocess_string__(string):
            # Remove common articles and noise words
            noise_words = ["a", "an", "the", "of"]
            words = string.split()
            filtered_words = [word for word in words if word.lower() not in noise_words]
            return ' '.join(filtered_words)
        
        FP_words = 0
        FN_words = 0
        TP_words = 0
        normalized_string = __preprocess_string__(entity_in_model_response)
        normalized_entity_ground_truth = __preprocess_string__(entity_in_ground_truth)
        if normalized_string == normalized_entity_ground_truth:
            n_words_ground_truth = len(entity_in_ground_truth.split())
            n_words_model_response = len(entity_in_model_response.split())
            FP_words = max(0, n_words_model_response - n_words_ground_truth)
            FN_words = max(0, n_words_ground_truth - n_words_model_response)
            TP_words = self._count_common_words(entity_in_model_response, entity_in_ground_truth)
            #print('SIMILI NORMALIZED: ', entity_in_model_response, ' e ', entity_in_ground_truth, ' -> FP_words:', FP_words,' FN_words:', FN_words,'TP_words:', TP_words)
            return True, entity_in_ground_truth, FP_words, FN_words, TP_words
        return False, entity_in_model_response, FP_words, FN_words, TP_words

    def _entity_similar_to_ground_truth_entity_Subset(self, entity_in_model_response: str, entity_in_ground_truth: str) -> (bool, str, int, int):
        """
        Check if two entities are similar in terms of being a subset of the one in list. E.g., entity='am happy' ground truth='I am happy'.
        This is useful when the model output is not exactly the same as the ground truth.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
        FP_words: number of identified false positive words, i.e. number of words identified as entity that are not in the ground truth
        FN_words: number of identified false positive words, always 0 in this case
        TP_words: number of identified true positive words
        """
        FP_words = 0
        FN_words = 0
        TP_words = 0
        if entity_in_model_response.lower() != entity_in_ground_truth.lower():
            if entity_in_model_response.lower() in entity_in_ground_truth.lower():
                FN_words = entity_in_ground_truth.strip().count(" ") - entity_in_model_response.strip().count(" ")
                TP_words = self._count_common_words(entity_in_model_response, entity_in_ground_truth)
                # print('SIMILI Subset: ', entity_in_model_response, ' e ', entity_in_ground_truth, ' -> FP_words:', FP_words,' FN_words:', FN_words,'TP_words:', TP_words)
                return True, entity_in_ground_truth, FP_words, FN_words, TP_words
        return False, entity_in_model_response, FP_words, FN_words, TP_words

    def _entity_similar_to_ground_truth_entity_Superset(self, entity_in_model_response: str, entity_in_ground_truth: str) -> (bool, str, int, int):
        """
        Check if two entities are similar in terms of being a super of the one in list. E.g., entity='I am very happy' ground truth='I am happy'.
        This is useful when the model output is not exactly the same as the ground truth.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth
        threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
        FP_words: number of identified false positive words, i.e. number of words identified as entity that are not in the ground truth
        FN_words: number of identified false positive words, always 0 in this case
        """
        FP_words = 0
        FN_words = 0
        TP_words = 0
        if entity_in_model_response.lower() != entity_in_ground_truth.lower():
            if entity_in_ground_truth.lower() in entity_in_model_response.lower():
                FP_words = entity_in_model_response.strip().count(" ") - entity_in_ground_truth.strip().count(" ")
                TP_words = self._count_common_words(entity_in_model_response, entity_in_ground_truth)
                # print('SIMILI Superset: ', entity_in_model_response, ' e ', entity_in_ground_truth, ' -> FP_words:', FP_words,' FN_words:', FN_words,'TP_words:', TP_words)
                return True, entity_in_ground_truth, FP_words, FN_words, TP_words
        return False, entity_in_model_response, FP_words, FN_words, TP_words


    def _entity_similar_to_ground_truth_entity_Leveshtein(self, entity_in_model_response: str, entity_in_ground_truth: str, threshold: int) -> (bool, str):
        """
        Check if two entities are similar in terms of Leveshtein distance. This is useful when the model output is not exactly the same as the ground truth.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth
        threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
        """
        similarity = fuzz.ratio(entity_in_model_response.lower(), entity_in_ground_truth.lower())
        if similarity >= threshold:
            # print('SIMILI LEVESHTEIN: ', entity_in_model_response, ' e ', entity_in_ground_truth)
            return True, entity_in_ground_truth
        return False, entity_in_model_response
    

    def _entity_similar_to_ground_truth_entity(self, entity_in_model_response: str, entity_in_ground_truth: str, leveshtein_threshold: int, similarity_types:list=['case', 'stop_words', 'subset', 'superset', 'leveshtein']) -> (bool, str):
        """
        Check if two entities are similar. This is useful when the model output is not exactly the same as the ground truth.

        Args:
        entity_in_model_response (str): an entity in the model response
        entity_in_ground_truth (str): an entity in the ground truth
        leveshtein_threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.
        similarity_types (list): the list of similarity types to consider. The default value is ['case', 'stop_words', 'subset', 'superset', 'leveshtein']

        return:
        bool: True if the entities are similar, False otherwise
        str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
        """
        FP_words = 0
        FN_words = 0
        TP_words = 0

        if entity_in_model_response == entity_in_ground_truth:
            TP_words = len(entity_in_model_response.split())
            return True, entity_in_ground_truth, FP_words, FN_words, TP_words
        
        if 'case' in similarity_types:
            similar, entity_to_output, FP_words, FN_words, TP_words = self._entity_similar_to_ground_truth_entity_LowerUppercase(entity_in_model_response, entity_in_ground_truth)
            #print('SIMILI CASE: ', similar, entity_to_output, FP_words, FN_words, TP_words)
            if similar:
                return similar, entity_to_output, FP_words, FN_words, TP_words
        if 'stop_words' in similarity_types:
            similar, entity_to_output, FP_words, FN_words, TP_words = self._entity_similar_to_ground_truth_entity_StopWords(entity_in_model_response, entity_in_ground_truth)
            #print('SIMILI STOP WORDS: ', similar, entity_to_output, FP_words, FN_words, TP_words)
            if similar:
                return similar, entity_to_output, FP_words, FN_words, TP_words
        if 'subset' in similarity_types:
            similar, entity_to_output, FP_words, FN_words, TP_words = self._entity_similar_to_ground_truth_entity_Subset(entity_in_model_response, entity_in_ground_truth)
            #print('SIMILI SUBSET: ', similar, entity_to_output, FP_words, FN_words, TP_words)
            if similar:
                return similar, entity_to_output, FP_words, FN_words, TP_words
        if 'superset' in similarity_types:
            similar, entity_to_output, FP_words, FN_words, TP_words = self._entity_similar_to_ground_truth_entity_Superset(entity_in_model_response, entity_in_ground_truth)  
            #print('SIMILI SUPERSET: ', similar, entity_to_output, FP_words, FN_words, TP_words)
            if similar:
                return similar, entity_to_output, FP_words, FN_words, TP_words
        if 'leveshtein' in similarity_types:
            similar, entity_to_output = self._entity_similar_to_ground_truth_entity_Leveshtein(entity_in_model_response, entity_in_ground_truth, leveshtein_threshold)
            #print('SIMILI LEVESTAIN: ', similar, entity_to_output)
            if similar:
                FP_words, FN_words, TP_words = 0, 0, 0 # non calcolo FP, FN, TP per leveshtein
                return similar, entity_to_output, FP_words, FN_words, TP_words

        return False, entity_in_model_response, FP_words, FN_words, TP_words
    

    # def _entity_similar_to_ground_truth_entity_deprecated(self, entity_in_model_response: str, entity_in_ground_truth: str, threshold: int) -> (bool, str):
    #     """
    #     Check if two entities are similar. This is useful when the model output is not exactly the same as the ground truth.

    #     Args:
    #     entity_in_model_response (str): an entity in the model response
    #     entity_in_ground_truth (str): an entity in the ground truth
    #     threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.

    #     return:
    #     bool: True if the entities are similar, False otherwise
    #     str: the entity in the ground truth if the entities are similar, the entity in the model response otherwise
    #     """
    #     def __preprocess_string__(string):
    #         # Remove common articles and noise words
    #         noise_words = ["a", "an", "the", "of"]
    #         words = string.split()
    #         filtered_words = [word for word in words if word.lower() not in noise_words]
    #         return ' '.join(filtered_words)

    #     if entity_in_model_response == entity_in_ground_truth:
    #         return True, entity_in_ground_truth
        
    #     normalized_string = __preprocess_string__(entity_in_model_response)
    #     normalized_entity_ground_truth = __preprocess_string__(entity_in_ground_truth)
    #     if normalized_string == normalized_entity_ground_truth:
    #         print('SIMILI NORMALIZED 1: ', entity_in_model_response, ' e ', entity_in_ground_truth)
    #         return True, entity_in_ground_truth
        
    #     similarity = fuzz.ratio(entity_in_model_response.lower(), entity_in_ground_truth.lower())
    #     if similarity >= threshold:
    #         print('SIMILI LEVESTAIN 2: ', entity_in_model_response, ' e ', entity_in_ground_truth)
    #         return True, entity_in_ground_truth
    #     return False, entity_in_model_response
    
        
    def entity_in_ground_truth_list(self, entity_in_model_response: str, ground_truth: list, model_response_list: list, leveshtein_threshold: int, similarity_types:'list[str]') -> (str, int, int):
        """
        Check if an entity is in the ground truth

        Args:
        entity_in_model_response (str): an entity in the model response
        ground_truth (list): the ground truth
        model_response_list (list): the list off all entities already in the answer
        threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.
        similarity_types: the list of similarity types to consider. Must contain elements in ['case', 'stop_words', 'subset', 'superset', 'leveshtein']

        return:
        bool: True if the entity is in the ground truth, False otherwise
        str: the entity in the ground truth if the entity is in the ground truth, the entity in the model response otherwise
        """
        strings = []
        FPs = []
        FNs = []
        TPs = []
        for entity_in_ground_truth in ground_truth:
            is_in, string, FP, FN, TP = self._entity_similar_to_ground_truth_entity(entity_in_model_response, entity_in_ground_truth, leveshtein_threshold, similarity_types)
            if is_in:
                strings.append(string)
                FPs.append(FP)
                FNs.append(FN)
                TPs.append(TP)
        #if entity_in_model_response in ground_truth and entity_in_model_response
        if len(strings) > 0:
            if entity_in_model_response in strings: # se ho estratto la stessa, ritorno se stessa
                TP = len(entity_in_model_response.split())
                return entity_in_model_response, 0, 0, TP
            else: #
                # print('sto analizzando: "', entity_in_model_response, '" e ho trovato: ', strings)
                return strings[-1], FPs[-1], FNs[-1], TPs[-1]
        else:
            FP = len(entity_in_model_response.split())
            FN = 0
            TP = 0
        return entity_in_model_response, FP, FN, TP
    


    def _extract_TP_FP_FN(self, model_response: str, ground_truth: str, similar_is_equal:bool=True, similar_is_equal_threshold: int=100, similarity_types:'list[str]'=['case', 'stop_words', 'subset', 'superset', 'leveshtein'], words_level:bool=True) -> [int, int, int]:
        """
        Compute the F1 score, the precision and the recall between the model output and the ground truth

        Args:
        model_response (str): the model output as it is returned by the model
        ground_truth (str): the ground truth in json format.
        similar_is_equal (bool): if True, the function will consider similar entities as equal. The default value is False.
        similar_is_equal_threshold (int): the threshold to consider the entities similar. The default value is 80. 0 is completely different, 100 is the same.
        words_level (bool): if True, the function will consider as base elements the words. If False, the function will consider as base elements the entity. 
        E.g., if True, the original sentence is "Yesterday morning I was so very happy and sad", the ground truth is ["yesterday morning", "so very happy"] the model output is ["morning", "happy and"], the function will consider FP=2 ("and"); TP=1 ("morning", "happy"); FN=2 ("Yesterday", "so"). 
        If False, the function will consider FP=1 ("happy and"); TP=0; FN=2 ("Yesterday morning", "so very happy").
        similarity_types: the list of similarity types to consider. Must contain elements in ['case', 'stop_words', 'subset', 'superset', 'leveshtein']

        """
        if self.cleaner.verbose: print('ORIGINAL model_response: ', model_response)
        model_response = self._parse_json(model_response)
        ground_truth = self._parse_json(ground_truth.replace('\n', ''))
        model_response = model_response["entities"]
        ground_truth = ground_truth["entities"]
        # print('PARSED ORIGINAL model_response: ', model_response)
        if not similar_is_equal:
            similarity_types = []

        if words_level:
            FP_sum = 0
            FN_sum = 0
            TP_sum = 0
            identified_entities_list = []
            for i, response_entity in enumerate(model_response):
                entity_identified, FP, FN, TP= self.entity_in_ground_truth_list(response_entity, ground_truth, model_response, similar_is_equal_threshold, similarity_types)
                FP_sum += FP
                FN_sum += FN
                TP_sum += TP
                identified_entities_list.append(entity_identified)
            FN_entities = set(ground_truth).difference(set(identified_entities_list))
            FN_entities = [entity.split() for entity in FN_entities]
            FN_entities = [item for row in FN_entities for item in row]
            # print('FALSE NEGATIVES: ', FN_entities)
            FN_sum += len(FN_entities)
            #print('PARSED GROUND TRUTH: ', ground_truth, 'TP_sum:', TP_sum, 'FP_sum:', FP_sum, 'FN_sum:', FN_sum, '\n\n')
            return [TP_sum, FP_sum, FN_sum]
           
        elif not words_level:
            for i, response_entity in enumerate(model_response):
                model_response[i], _, _, _= self.entity_in_ground_truth_list(response_entity, ground_truth, model_response, similar_is_equal_threshold, similarity_types)
            #print('PARSED GROUND TRUTH: ', ground_truth)
            #print('NEW model_response to calculate TP, FP, FN: ', model_response, '\n\n')

            TP = len(set(model_response).intersection(set(ground_truth)))
            FP = len(set(model_response).difference(set(ground_truth)))
            FN = len(set(ground_truth).difference(set(model_response)))
            # F1 = 2 * TP / (2 * TP + FN + FP)
            return [TP, FP, FN]
    
    def generate_evaluation_table(self, similar_is_equal_threshold: int, words_level:bool, similarity_types:'list[str]') -> dict:
        """
        Generate the evaluation table for the model output and the ground truth.

        Args:
        similar_is_equal_threshold (int): the threshold to consider the entities similar by the Leveshtein distance. The default value is 80. 0 is completely
        different, 100 is the same. 
        words_level (bool): if True, the function will consider as base elements the words. If False, the function will consider as base elements the entity. 
        E.g., if True, the original sentence is "Yesterday morning I was so very happy and sad", the ground truth is ["yesterday morning", "so very happy"] the model output is ["morning", "happy and"], the function will consider FP=2 ("and"); TP=1 ("morning", "happy"); FN=2 ("Yesterday", "so"). 
        If False, the function will consider FP=1 ("happy and"); TP=0; FN=2 ("Yesterday morning", "so very happy").
        similarity_types: the list of similarity types to consider. Must contain elements in ['case', 'stop_words', 'subset', 'superset', 'leveshtein']

        return:
        dict: the evaluation table
        """
        metrics_list = []
        for i, res in enumerate(self.data['model_output']):
            if self.cleaner.verbose: print('res:', res)
            metrics_list.append(self._extract_TP_FP_FN(res, self.data['ground_truth'][i], True, similar_is_equal_threshold, similarity_types, words_level))

        metrics_dataframe = pd.DataFrame(metrics_list, columns=['TP', 'FP', 'FN'])
        summary = metrics_dataframe.sum()
        precision = summary['TP'] / (summary['TP'] + summary['FP'])
        recall = summary['TP'] / (summary['TP'] + summary['FN'])
        f1 = 2 * (precision * recall) / (precision + recall)
        self.evaluation_table = {'evaluation': metrics_dataframe, 'precision':precision, 'recall':recall, 'f1':f1}
        return {'evaluation': metrics_dataframe, 'precision':precision, 'recall':recall, 'f1':f1}

In [23]:
""""\u001dl'analisi molecolare"""
s = """[{"entity": "\u001dl'analisi molecolare"}, {"entity": "confermato"}, {"entity": "del tipo p210"}] """
s.replace('\u001d', ' ')

'[{"entity": " l\'analisi molecolare"}, {"entity": "confermato"}, {"entity": "del tipo p210"}] '

In [72]:
import glob
import pandas as pd
from datasets import Dataset
#from utils.evaluator import Evaluator
#from utils.output_cleaner import OutputCleaner

similar_is_equal_threshold_list = [100]
#adapters_list = generate_ft_adapters_list("enlayer1_3epochs_4bits__ft_params")
evaluators = {}
csv_files = glob.glob('data/zefiro/4bit_FT/*.csv') 
evaluation_results = pd.DataFrame(columns=['file', 'similar_is_equal_threshold', 'f1_score', 'precision', 'recall'])
output_cleaner = OutputCleaner(verbose=False)

print(evaluation_results)
for file in csv_files:
    if "maxNewTokensFactor8_nShotsInference4_zefiro-7b-base-ita__adapters_it.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002" in file: 
        continue
    print("FILE: " , file)
    eval_data = Dataset.from_csv(file) 
    cleaned_data = output_cleaner.apply_cleaning(eval_data, wrong_keys_to_entity=False)
    for similar_is_equal_threshold in similar_is_equal_threshold_list:
        # print(f"{file}_SimilarIsEqual{similar_is_equal}_Threshold{similar_is_equal_threshold}")
        evaluator = Evaluator(data=cleaned_data, offset=False, output_cleaner=output_cleaner)
        evaluator.generate_evaluation_table(similar_is_equal_threshold=similar_is_equal_threshold,
                                            words_level=True, 
                                            similarity_types=['case', 'stop_words', 'subset', 'superset'])
        #evaluators[f"{file}_SimilarIsEqual{similar_is_equal}_Threshold{similar_is_equal_threshold}"] = evaluator
        evaluation_results.loc[len(evaluation_results)] = {'file': file, 'similar_is_equal_threshold': similar_is_equal_threshold, 'f1_score': evaluator.evaluation_table['f1'], 'precision': evaluator.evaluation_table['precision'], 'recall': evaluator.evaluation_table['recall']}
        # print('DONE')

Empty DataFrame
Columns: [file, similar_is_equal_threshold, f1_score, precision, recall]
Index: []
FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9843.71 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4782.43 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9965.01 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4442.15 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9825.12 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4282.30 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9969.15 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3886.14 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 4317.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4936.06 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9794.63 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4728.48 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9390.73 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3929.68 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 9854.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4279.47 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 10333.68 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 5027.68 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 10332.29 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4969.13 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Map: 100%|██████████| 681/681 [00:00<00:00, 11032.82 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4844.47 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3622.02it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 354.22it/s]
Generating train split: 681 examples [00:00, 12884.83 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63949.87 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 6675.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2932.23 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1885.93it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 239.40it/s]
Generating train split: 681 examples [00:00, 16518.74 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 58950.34 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11167.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4931.57 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3246.37it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 331.88it/s]
Generating train split: 681 examples [00:00, 12523.88 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65785.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11301.20 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4943.17 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3923.58it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 314.75it/s]
Generating train split: 681 examples [00:00, 13339.88 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66511.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10970.83 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4028.68 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2959.99it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 310.76it/s]
Generating train split: 681 examples [00:00, 11466.38 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 25543.23 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9410.50 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3202.11 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4899.89it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 380.64it/s]
Generating train split: 681 examples [00:00, 13433.29 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 61617.07 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11022.99 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4389.42 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3248.88it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 304.51it/s]
Generating train split: 681 examples [00:00, 13931.44 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63148.24 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11189.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4908.14 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3894.43it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 304.22it/s]
Generating train split: 681 examples [00:00, 12450.40 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68934.98 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10987.79 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4659.76 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4084.04it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 216.08it/s]
Generating train split: 681 examples [00:00, 14578.05 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66766.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10983.82 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3127.20 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4080.06it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 331.36it/s]
Generating train split: 681 examples [00:00, 14494.97 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67626.04 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11185.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4301.50 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4084.04it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 311.06it/s]
Generating train split: 681 examples [00:00, 12401.21 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63362.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11047.97 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4320.02 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3358.13it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 317.34it/s]
Generating train split: 681 examples [00:00, 10999.09 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 51295.18 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 6449.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3517.75 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4223.87it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 357.39it/s]
Generating train split: 681 examples [00:00, 15161.66 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66848.93 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11020.31 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4837.79 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4017.53it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 233.38it/s]
Generating train split: 681 examples [00:00, 12312.15 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65236.64 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11109.12 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3732.39 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3715.06it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 346.52it/s]
Generating train split: 681 examples [00:00, 14792.46 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63567.03 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11106.92 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4461.47 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2941.31it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 347.47it/s]
Generating train split: 681 examples [00:00, 17066.83 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67781.70 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11214.67 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4931.82 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3844.46it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 318.38it/s]
Generating train split: 681 examples [00:00, 14879.77 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66452.34 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11148.71 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4912.78 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4219.62it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 374.56it/s]
Generating train split: 681 examples [00:00, 12415.01 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65142.91 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11018.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4067.99 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3998.38it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 254.71it/s]
Generating train split: 681 examples [00:00, 14602.20 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65199.41 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11025.84 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3779.14 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5745.62it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 388.47it/s]
Generating train split: 681 examples [00:00, 14459.24 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 64574.44 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11054.99 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4846.59 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3524.63it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 354.49it/s]
Generating train split: 681 examples [00:00, 14646.75 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67902.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11066.85 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3165.87 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5957.82it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 290.50it/s]
Generating train split: 681 examples [00:00, 16309.35 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 62776.29 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11094.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4220.23 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1952.66it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 333.33it/s]
Generating train split: 681 examples [00:00, 12635.74 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63949.87 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11184.50 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3067.86 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3355.44it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 356.39it/s]
Generating train split: 681 examples [00:00, 15066.65 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66750.51 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7887.69 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2946.45 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2841.67it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 340.14it/s]
Generating train split: 681 examples [00:00, 12503.92 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63842.67 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11190.51 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3951.36 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3637.73it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 374.66it/s]
Generating train split: 681 examples [00:00, 14204.12 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 62615.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11055.76 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3831.46 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3165.51it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 371.08it/s]
Generating train split: 681 examples [00:00, 10843.59 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 41842.89 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7796.13 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4135.07 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1915.21it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 254.12it/s]
Generating train split: 681 examples [00:00, 14672.63 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68793.86 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11116.38 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4341.76 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4760.84it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 373.39it/s]
Generating train split: 681 examples [00:00, 15173.10 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66961.76 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9615.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4723.62 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3876.44it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 358.27it/s]
Generating train split: 681 examples [00:00, 9525.67 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 53037.25 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9807.04 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3748.10 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3830.41it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 390.93it/s]
Generating train split: 681 examples [00:00, 13026.03 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 64434.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10585.20 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3739.66 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3095.43it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 366.96it/s]
Generating train split: 681 examples [00:00, 12658.02 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65988.70 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10984.96 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4240.61 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3313.04it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 246.07it/s]
Generating train split: 681 examples [00:00, 12882.15 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 37852.12 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8324.97 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4171.55 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1890.18it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 244.27it/s]
Generating train split: 681 examples [00:00, 13089.84 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66259.65 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10730.34 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4830.78 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4275.54it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 367.34it/s]
Generating train split: 681 examples [00:00, 13722.35 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67266.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11224.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3805.09 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4588.95it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 369.54it/s]
Generating train split: 681 examples [00:00, 13031.20 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65024.27 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11013.72 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4270.39 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3097.71it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 348.45it/s]
Generating train split: 681 examples [00:00, 17811.49 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68592.31 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11042.85 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4872.11 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2933.08it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 342.00it/s]
Generating train split: 681 examples [00:00, 13072.05 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66175.22 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11072.08 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3337.19 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3816.47it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 207.90it/s]
Generating train split: 681 examples [00:00, 14802.05 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66335.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11245.80 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4350.02 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3460.65it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 349.26it/s]
Generating train split: 681 examples [00:00, 10742.69 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 49348.16 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9467.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3342.38 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3905.31it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 331.33it/s]
Generating train split: 681 examples [00:00, 17674.49 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67485.43 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11507.49 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 5029.13 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2906.66it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 283.53it/s]
Generating train split: 681 examples [00:00, 12875.82 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65950.61 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11231.74 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4935.82 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3934.62it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 243.49it/s]
Generating train split: 681 examples [00:00, 13558.40 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65936.91 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11263.10 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4063.01 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1869.12it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 303.14it/s]
Generating train split: 681 examples [00:00, 12805.34 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66232.00 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10535.77 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4058.32 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4144.57it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 289.94it/s]
Generating train split: 681 examples [00:00, 14287.46 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67366.06 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11121.32 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4977.96 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5957.82it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 443.23it/s]
Generating train split: 681 examples [00:00, 15626.07 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63572.69 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10424.00 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4436.42 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3449.26it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 324.59it/s]
Generating train split: 681 examples [00:00, 19129.24 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 59129.74 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10757.46 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4709.93 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5053.38it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 361.67it/s]
Generating train split: 681 examples [00:00, 12912.67 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 53744.80 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11178.77 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4339.05 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5262.61it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 446.82it/s]
Generating train split: 681 examples [00:00, 15839.76 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63078.51 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8873.24 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4751.24 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6710.89it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 425.90it/s]
Generating train split: 681 examples [00:00, 12969.89 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 59779.43 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10390.25 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3685.38 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5777.28it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 439.01it/s]
Generating train split: 681 examples [00:00, 19342.20 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 61869.32 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10600.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4782.87 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5592.41it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 346.84it/s]
Generating train split: 681 examples [00:00, 15510.42 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 62523.44 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10874.22 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4316.16 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6853.44it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 423.03it/s]
Generating train split: 681 examples [00:00, 13928.92 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65067.22 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10585.00 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2216.39 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 7002.18it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 335.36it/s]
Generating train split: 681 examples [00:00, 16008.88 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 63537.34 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10763.54 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4802.61 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6278.90it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 372.30it/s]
Generating train split: 681 examples [00:00, 16128.20 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 62609.79 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10798.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4454.94 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2761.23it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 435.59it/s]
Generating train split: 681 examples [00:00, 13707.40 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 46997.52 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10048.06 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4842.33 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4485.89it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 273.76it/s]
Generating train split: 681 examples [00:00, 13850.44 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67499.79 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11204.16 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4002.09 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3826.92it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 278.32it/s]
Generating train split: 681 examples [00:00, 12062.93 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65362.04 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10971.46 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4066.81 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3050.40it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 308.43it/s]
Generating train split: 681 examples [00:00, 10795.97 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 43570.04 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9582.91 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4482.83 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4136.39it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 289.10it/s]
Generating train split: 681 examples [00:00, 12169.63 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67904.17 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11181.88 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4322.79 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3366.22it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 305.24it/s]
Generating train split: 681 examples [00:00, 13735.09 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68284.03 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11446.53 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2159.88 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3258.98it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 285.54it/s]
Generating train split: 681 examples [00:00, 13351.91 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68477.20 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11144.23 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3758.20 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2933.08it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 341.42it/s]
Generating train split: 681 examples [00:00, 17360.91 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 70723.77 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11308.36 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4928.82 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3876.44it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 351.11it/s]
Generating train split: 681 examples [00:00, 14874.50 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69051.64 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11211.02 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4298.26 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4766.25it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 367.50it/s]
Generating train split: 681 examples [00:00, 13318.79 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68663.21 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11152.06 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4863.03 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3226.39it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 303.85it/s]
Generating train split: 681 examples [00:00, 12215.48 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66691.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11235.54 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4467.51 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2974.68it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 300.04it/s]
Generating train split: 681 examples [00:00, 13811.66 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65891.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11192.35 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2839.17 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3300.00it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 304.22it/s]
Generating train split: 681 examples [00:00, 12061.61 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66407.54 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11192.92 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4971.65 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3021.83it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 347.10it/s]
Generating train split: 681 examples [00:00, 14620.06 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68871.82 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10064.02 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4883.23 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4202.71it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 309.50it/s]
Generating train split: 681 examples [00:00, 12395.18 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67232.86 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11009.65 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4252.41 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3287.07it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 307.70it/s]
Generating train split: 681 examples [00:00, 12084.77 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66213.57 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11197.75 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2629.40 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3905.31it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 341.08it/s]
Generating train split: 681 examples [00:00, 12847.39 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66653.93 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11022.82 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4324.76 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3377.06it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 321.95it/s]
Generating train split: 681 examples [00:00, 13968.36 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 54340.91 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8312.30 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2860.12 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3460.65it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 284.49it/s]
Generating train split: 681 examples [00:00, 16384.75 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 70848.32 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11155.72 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4950.10 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3075.00it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 353.86it/s]
Generating train split: 681 examples [00:00, 12492.27 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 42973.52 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8586.26 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4119.43 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2839.75it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 309.89it/s]
Generating train split: 681 examples [00:00, 12119.18 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66000.90 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11124.74 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3759.75 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3334.10it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 298.25it/s]
Generating train split: 681 examples [00:00, 13923.02 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67991.45 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8538.11 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3138.56 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1937.32it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 313.94it/s]
Generating train split: 681 examples [00:00, 16816.43 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 70813.19 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11218.15 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4958.36 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3858.61it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 339.43it/s]
Generating train split: 681 examples [00:00, 11986.54 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 48507.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8909.60 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4099.07 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3819.95it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 339.02it/s]
Generating train split: 681 examples [00:00, 14764.32 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67538.09 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11222.25 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4373.85 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2849.39it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 338.52it/s]
Generating train split: 681 examples [00:00, 14563.11 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 59899.78 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9486.67 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4392.58 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3310.42it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 269.19it/s]
Generating train split: 681 examples [00:00, 12095.73 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66019.21 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11136.41 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3871.28 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 1858.35it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 310.76it/s]
Generating train split: 681 examples [00:00, 13629.83 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67518.93 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4326.53 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2273.98 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3435.14it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 319.88it/s]
Generating train split: 681 examples [00:00, 12161.13 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66192.09 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11203.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3968.09 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5384.22it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 361.14it/s]
Generating train split: 681 examples [00:00, 15324.68 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 35084.77 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9497.67 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4529.35 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3830.41it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 265.68it/s]
Generating train split: 681 examples [00:00, 12830.65 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68113.06 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11209.30 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4897.97 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4293.04it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 362.17it/s]
Generating train split: 681 examples [00:00, 14911.39 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 35767.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8928.68 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3956.96 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2803.68it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 263.49it/s]
Generating train split: 681 examples [00:00, 12443.13 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67293.06 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10895.46 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4379.11 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5084.00it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 369.28it/s]
Generating train split: 681 examples [00:00, 16124.93 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 40321.88 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9305.16 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3829.28 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3010.99it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 322.27it/s]
Generating train split: 681 examples [00:00, 14435.63 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69257.58 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11253.95 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4392.31 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4169.29it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 323.53it/s]
Generating train split: 681 examples [00:00, 12898.44 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 36993.23 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8697.40 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2942.19 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4928.68it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 354.58it/s]
Generating train split: 681 examples [00:00, 19130.27 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 70479.46 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11373.87 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4913.92 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2933.08it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 332.01it/s]
Generating train split: 681 examples [00:00, 9742.65 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 52344.25 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8879.58 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4190.59 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3506.94it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 314.16it/s]
Generating train split: 681 examples [00:00, 17477.23 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69751.43 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11198.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4936.61 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3472.11it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 349.67it/s]
Generating train split: 681 examples [00:00, 11603.14 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 39927.33 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9337.34 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4005.87 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6442.86it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 464.64it/s]
Generating train split: 681 examples [00:00, 13999.03 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 64993.20 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11182.27 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4238.82 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3423.92it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 306.67it/s]
Generating train split: 681 examples [00:00, 13208.97 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 27048.75 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7939.28 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2621.40 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5745.62it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 374.66it/s]
Generating train split: 681 examples [00:00, 14180.99 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69625.61 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11127.99 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4395.71 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4760.84it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 357.27it/s]
Generating train split: 681 examples [00:00, 8237.36 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 46172.46 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7731.07 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4573.38 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3876.44it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 332.70it/s]
Generating train split: 681 examples [00:00, 14361.31 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69939.30 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11144.40 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4975.44 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2445.66it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 307.97it/s]
Generating train split: 681 examples [00:00, 11055.67 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 32014.00 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9794.16 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3655.36 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.01_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5555.37it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 341.06it/s]
Generating train split: 681 examples [00:00, 13385.51 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 66511.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11029.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3746.04 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3095.43it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 290.54it/s]
Generating train split: 681 examples [00:00, 12994.26 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 37651.04 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8609.78 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3418.42 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.01_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4165.15it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 310.83it/s]
Generating train split: 681 examples [00:00, 16519.50 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68593.96 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11146.79 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4933.10 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6061.13it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 169.00it/s]
Generating train split: 681 examples [00:00, 10215.23 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 52364.40 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7525.09 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3940.26 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4485.89it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 274.64it/s]
Generating train split: 681 examples [00:00, 14158.29 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68087.08 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11198.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4303.50 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2866.92it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 262.82it/s]
Generating train split: 681 examples [00:00, 12438.36 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68475.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11105.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4879.31 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3775.25it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 318.26it/s]
Generating train split: 681 examples [00:00, 11865.59 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 33326.58 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7548.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3254.92 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 2995.93it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 322.89it/s]
Generating train split: 681 examples [00:00, 11739.33 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67027.76 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11052.38 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3746.33 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3146.51it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 340.14it/s]
Generating train split: 681 examples [00:00, 14698.00 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 69215.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11176.41 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4833.76 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_8_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3404.47it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 301.92it/s]
Generating train split: 681 examples [00:00, 14047.37 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 68730.95 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8199.48 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 2890.71 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference0_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_64_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 3802.63it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 312.10it/s]
Generating train split: 681 examples [00:00, 16735.05 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 67259.78 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11273.59 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4964.39 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor8_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 4185.93it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 345.58it/s]
Generating train split: 681 examples [00:00, 12121.49 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65847.23 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 5420.90 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3698.07 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor2_nShotsInference2_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_16_32_0.05_4_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6626.07it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 399.50it/s]
Generating train split: 681 examples [00:00, 12865.79 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 46762.07 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 6181.42 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 3541.32 examples/s]


FILE:  data/llama/7B_4bit_FT/maxNewTokensFactor4_nShotsInference4_llama-2-7b-chat-hf_adapters_en.layer1_4_torch.bfloat16_32_32_0.05_2_0.0002.csv


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 5053.38it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 385.22it/s]
Generating train split: 681 examples [00:00, 12754.68 examples/s]
Filter: 100%|██████████| 681/681 [00:00<00:00, 65974.99 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11140.92 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 4204.82 examples/s]
