In [1]:
import torch
import re
import pandas as pd
from transformers import AutoTokenizer, BitsAndBytesConfig
from transformers import AutoModelForCausalLM
from peft import PeftModel
from torch import cuda
from sql_metadata import Parser
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "/home/LLM_para/deepspeek_code"
# model_name = "mistralai/Mistral-7B-Instruct-v0.2"
ibnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    attn_implementation="flash_attention_2", # use with amper architecture
    torch_dtype=torch.bfloat16,
    #quantization_config=bnb_config, # use when low on memory
    device_map = "auto"
)

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.05s/it]


In [3]:
adapter_path = "./final_checkpoint_SFT_deepspeek_Myidea"
model = PeftModel.from_pretrained(model,adapter_path ,torch_dtype = torch.bfloat16)
model = model.merge_and_unload()
model.to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
tokenizer.encode(' ;')

[32013, 6203]

In [5]:
# 拆解训练集
# 提供每次训练token数
from tqdm import tqdm
import copy 
import re
max_token = 1800
#  保证特殊字符能被识别
accept_toekn = max_token - 150
#获取REF表数据
tab_str = ""
schema_slicing = []
df = pd.read_csv("./mydataset_new/table_schema_Reference_cropped.csv",encoding="utf-8")
for index, row in tqdm(df.iterrows(), total=len(df)):
    table_group = row['Reference_group']
    tab_str_elder = copy.deepcopy(tab_str)
    tab_str += table_group + "\n"
    check_token = tokenizer(f"{tab_str}", add_special_tokens=False)
    if len(check_token["input_ids"]) > accept_toekn:
        schema_slicing.append(tab_str_elder)
        tab_str = table_group + "\n"

    
print(len(schema_slicing))

# print(ReF_group[5])
# print("#############################")
# print(ReF_group[6])


#获取noREF表数据
df = pd.read_csv("./mydataset_new/table_schema_noReference_cropped.csv",encoding="utf-8")
for index, row in tqdm(df.iterrows(), total=len(df)):
    table_group = row['noReference_group']
    tab_str_elder = copy.deepcopy(tab_str)
    tab_str += table_group + "\n"
    check_token = tokenizer(f"{tab_str}", add_special_tokens=False)
    if len(check_token["input_ids"]) > accept_toekn:
#         print("+++++++++++")
        schema_slicing.append(tab_str_elder)
        tab_str = table_group + "\n"
        
        
if len(tab_str) > 0:
    schema_slicing.append(tab_str)

print(len(schema_slicing))
# print(schema_slicing[6])
# print("#############################")
# print(schema_slicing[7])
# print(schema_slicing[8])

create_table_pattern = re.compile(r'CREATE TABLE `[^`]+` \([^;]+\);')
table_name_group = []
test_total = set()
count = 0
for piece in schema_slicing:
    create_table_statements = create_table_pattern.findall(piece)
    temp_list = []
    for table in create_table_statements:
        table_name = re.search(r"CREATE TABLE `([^`]+)`", table).group(1)
        temp_list.append(table_name.lower())
        test_total.add(table_name.lower())
        count+=1
    print(temp_list)
    table_name_group.append(temp_list)
# print(count)
print(len(test_total))
#     table_name_group.append()

100%|████████████████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 478.75it/s]


6


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 530.62it/s]

8
['manufacturers', 'products', 'student', 'plays_games', 'sportsinfo', 'actor', 'entrepreneur', 'people', 'ref_hotel_star_ratings', 'ref_attraction_types', 'hotels', 'tourist_attractions', 'street_markets', 'shops', 'museums', 'royal_family', 'theme_parks', 'visits', 'photos', 'staff', 'tourist_attraction_features', 'wrestler', 'elimination']
['business', 'category', 'checkin', 'neighbourhood', 'review', 'tip', 'ref_detention_type', 'ref_incident_type', 'addresses', 'students', 'teachers', 'assessment_notes', 'behavior_incident', 'detention', 'student_addresses', 'students_in_detention', 'film', 'film_market_estimation', 'catalogs']
['catalog_structure', 'catalog_contents', 'catalog_contents_additional_attributes', 'routes', 'airports', 'stadium', 'game', 'injury_accident', 'physician', 'department', 'affiliated_with', 'trained_in', 'patient', 'nurse', 'appointment', 'prescribes', 'block', 'room', 'on_call', 'stay']
['undergoes', 'buildings', 'office_locations', 'region', 'party', 'me




In [6]:
from transformers import StoppingCriteria
# from typing import List
# from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList,STOPPING_CRITERIA_INPUTS_DOCSTRING, add_start_docstrings
class EosListStoppingCriteria(StoppingCriteria):
    def __init__(self, eos_sequence = [6203]):
    # 设置在第一个';'处停止      
        self.eos_sequence = eos_sequence

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        last_ids = input_ids[:,-len(self.eos_sequence):].tolist()
        return self.eos_sequence in last_ids
    
def append_string_to_file(text, file_path):
  with open(file_path, 'a') as file:
      file.write(text + '\n')

def remove_spaces(text):
  return re.sub(r'\s+', ' ', text)

def call_mistral(inputs):
    output_tokens = model.generate(inputs, max_new_tokens=30, do_sample=False, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, stopping_criteria = [EosListStoppingCriteria()])
#     output_tokens = model.generate(inputs, max_new_tokens=250, do_sample=False, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, stopping_criteria = stopping_criteria)
    return tokenizer.decode(output_tokens[0][len(inputs[0]):], skip_special_tokens=True)

In [7]:
def get_input_value(question_pad,schema_pad):
    input_value = f"I want you to act as a relation extraction robot for a sample SQL table. You need to return the tables related to the user's input question and tables.##Question:{question_pad}Below are instructions describing the relationship between tables. Please write a response that appropriately completes the request. \n##instruction:{schema_pad}"
    return input_value 

In [9]:
df = pd.read_csv("./mydataset_new/validation_dataset_formatted_cropped.csv",encoding="utf-8")
outer_index = 0
import re
import time
pattern = re.compile(r'-- Tables: ([\w, ]+);')
results = []
#  schema_slicing
start_time = time.time()
for index, row in tqdm(df.iterrows(), total=len(df)):
    question = row['question']
    ref_tables = row['correct_tables']
    selected_table_set = set()
    result_count = 0
    for schema in schema_slicing:
        result_count += 1
#         instruction = get_instruction(schema)
#         input_value = get_input_value(question,selected_table)
        input_value = get_input_value(question,schema)
        messages = [{"role": "user", "content": input_value.strip()}]
        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt",add_generation_prompt=True,tokenize = True).to(model.device)
        response = call_mistral(inputs)
#         print(response)
        if "#None#" not in response:
            match = re.search(pattern, response)
            if match:
            # 提取匹配的表名字符串
                table_names_str = match.group(1)
                table_names = table_names_str.split(', ')
                for item in table_names:
                     selected_table_set.add(item)
            else:
                predict_table = "None"
    if len(selected_table_set) > 0:
        total_selected_table =  ', '.join(list(selected_table_set))
    else:
        total_selected_table = "None"
    print(f"++++++++++++++++index:{index}++++++++++++++++++++++++")
    print("正确表名：",ref_tables)
    print("预测表名：",total_selected_table)
    print("############################################")
    results.append([ref_tables,total_selected_table])

end_time = time.time()

  1%|▍                                                                                  | 1/178 [00:05<15:04,  5.11s/it]

++++++++++++++++index:0++++++++++++++++++++++++
正确表名： products, manufacturers
预测表名： manufacturers, products
############################################


  1%|▉                                                                                  | 2/178 [00:09<14:18,  4.88s/it]

++++++++++++++++index:1++++++++++++++++++++++++
正确表名： products
预测表名： catalog_contents, products
############################################


  2%|█▍                                                                                 | 3/178 [00:14<13:53,  4.76s/it]

++++++++++++++++index:2++++++++++++++++++++++++
正确表名： manufacturers
预测表名： manufacturer, manufacturers
############################################


  2%|█▊                                                                                 | 4/178 [00:19<14:00,  4.83s/it]

++++++++++++++++index:3++++++++++++++++++++++++
正确表名： manufacturers
预测表名： companies, manufacturers
############################################


  3%|██▎                                                                                | 5/178 [00:23<13:42,  4.75s/it]

++++++++++++++++index:4++++++++++++++++++++++++
正确表名： manufacturers
预测表名： manufacturers
############################################


  3%|██▊                                                                                | 6/178 [00:29<13:52,  4.84s/it]

++++++++++++++++index:5++++++++++++++++++++++++
正确表名： products, manufacturers
预测表名： manufacturer, manufacturers, furniture_manufacte, products
############################################


  4%|███▎                                                                               | 7/178 [00:33<13:44,  4.82s/it]

++++++++++++++++index:6++++++++++++++++++++++++
正确表名： manufacturers
预测表名： manufacturers
############################################


  4%|███▋                                                                               | 8/178 [00:38<13:36,  4.80s/it]

++++++++++++++++index:7++++++++++++++++++++++++
正确表名： products
预测表名： products
############################################


  5%|████▏                                                                              | 9/178 [00:43<13:27,  4.78s/it]

++++++++++++++++index:8++++++++++++++++++++++++
正确表名： student, sportsinfo
预测表名： student, sportsinfo
############################################


  6%|████▌                                                                             | 10/178 [00:48<13:32,  4.84s/it]

++++++++++++++++index:9++++++++++++++++++++++++
正确表名： plays_games, video_games
预测表名： plays_games, video_games
############################################


  6%|█████                                                                             | 11/178 [00:53<13:25,  4.82s/it]

++++++++++++++++index:10++++++++++++++++++++++++
正确表名： video_games
预测表名： video_games
############################################


  7%|█████▌                                                                            | 12/178 [00:57<13:16,  4.80s/it]

++++++++++++++++index:11++++++++++++++++++++++++
正确表名： video_games
预测表名： video_games
############################################


  7%|█████▉                                                                            | 13/178 [01:02<13:09,  4.79s/it]

++++++++++++++++index:12++++++++++++++++++++++++
正确表名： video_games
预测表名： video_games
############################################


  8%|██████▍                                                                           | 14/178 [01:07<12:58,  4.75s/it]

++++++++++++++++index:13++++++++++++++++++++++++
正确表名： sportsinfo, student
预测表名： sportsinfo
############################################


  8%|██████▉                                                                           | 15/178 [01:12<12:57,  4.77s/it]

++++++++++++++++index:14++++++++++++++++++++++++
正确表名： student, plays_games
预测表名： plays_games, student, video_games
############################################


  9%|███████▎                                                                          | 16/178 [01:17<13:03,  4.84s/it]

++++++++++++++++index:15++++++++++++++++++++++++
正确表名： student, sportsinfo
预测表名： student, sportsinfo
############################################


 10%|███████▊                                                                          | 17/178 [01:21<12:49,  4.78s/it]

++++++++++++++++index:16++++++++++++++++++++++++
正确表名： musical
预测表名： musical
############################################


 10%|████████▎                                                                         | 18/178 [01:26<12:45,  4.79s/it]

++++++++++++++++index:17++++++++++++++++++++++++
正确表名： actor, musical
预测表名： actor, musical
############################################


 11%|████████▊                                                                         | 19/178 [01:30<12:28,  4.71s/it]

++++++++++++++++index:18++++++++++++++++++++++++
正确表名： musical, actor
预测表名： actor, musical
############################################


 11%|█████████▏                                                                        | 20/178 [01:35<12:22,  4.70s/it]

++++++++++++++++index:19++++++++++++++++++++++++
正确表名： actor, musical
预测表名： actor, musical
############################################


 12%|█████████▋                                                                        | 21/178 [01:40<12:30,  4.78s/it]

++++++++++++++++index:20++++++++++++++++++++++++
正确表名： entrepreneur, people
预测表名： entrepreneur
############################################


 12%|██████████▏                                                                       | 22/178 [01:45<12:32,  4.82s/it]

++++++++++++++++index:21++++++++++++++++++++++++
正确表名： entrepreneur, people
预测表名： entrepreneur, people
############################################


 13%|██████████▌                                                                       | 23/178 [01:50<12:37,  4.89s/it]

++++++++++++++++index:22++++++++++++++++++++++++
正确表名： people
预测表名： None
############################################


 13%|███████████                                                                       | 24/178 [01:55<12:32,  4.88s/it]

++++++++++++++++index:23++++++++++++++++++++++++
正确表名： entrepreneur
预测表名： companies
############################################


 14%|███████████▌                                                                      | 25/178 [02:00<12:34,  4.93s/it]

++++++++++++++++index:24++++++++++++++++++++++++
正确表名： tourist_attractions, visitors, visits
预测表名： tourist_attractions, visitors, visits
############################################


 15%|███████████▉                                                                      | 26/178 [02:05<12:11,  4.81s/it]

++++++++++++++++index:25++++++++++++++++++++++++
正确表名： visits
预测表名： visits
############################################


 15%|████████████▍                                                                     | 27/178 [02:09<11:55,  4.74s/it]

++++++++++++++++index:26++++++++++++++++++++++++
正确表名： locations, tourist_attractions
预测表名： tourist_attractions
############################################


 16%|████████████▉                                                                     | 28/178 [02:14<11:54,  4.76s/it]

++++++++++++++++index:27++++++++++++++++++++++++
正确表名： tourist_attractions
预测表名： tourist_attractions
############################################


 16%|█████████████▎                                                                    | 29/178 [02:19<11:59,  4.83s/it]

++++++++++++++++index:28++++++++++++++++++++++++
正确表名： museums, tourist_attractions
预测表名： museums
############################################


 17%|█████████████▊                                                                    | 30/178 [02:24<11:50,  4.80s/it]

++++++++++++++++index:29++++++++++++++++++++++++
正确表名： locations, tourist_attractions
预测表名： tourist_attractions
############################################


 17%|██████████████▎                                                                   | 31/178 [02:28<11:40,  4.77s/it]

++++++++++++++++index:30++++++++++++++++++++++++
正确表名： photos
预测表名： photos
############################################


 18%|██████████████▋                                                                   | 32/178 [02:33<11:42,  4.81s/it]

++++++++++++++++index:31++++++++++++++++++++++++
正确表名： staff, tourist_attractions
预测表名： tourist_attractions, staff
############################################


 19%|███████████████▏                                                                  | 33/178 [02:38<11:36,  4.80s/it]

++++++++++++++++index:32++++++++++++++++++++++++
正确表名： elimination
预测表名： elimination
############################################


 19%|███████████████▋                                                                  | 34/178 [02:43<11:28,  4.78s/it]

++++++++++++++++index:33++++++++++++++++++++++++
正确表名： wrestler
预测表名： wrestler
############################################


 20%|████████████████                                                                  | 35/178 [02:47<11:14,  4.71s/it]

++++++++++++++++index:34++++++++++++++++++++++++
正确表名： wrestler
预测表名： wrestler
############################################


 20%|████████████████▌                                                                 | 36/178 [02:52<11:16,  4.76s/it]

++++++++++++++++index:35++++++++++++++++++++++++
正确表名： elimination
预测表名： basketball_match, elimination
############################################


 21%|█████████████████                                                                 | 37/178 [02:57<11:19,  4.82s/it]

++++++++++++++++index:36++++++++++++++++++++++++
正确表名： user, tip
预测表名： tip, user
############################################


 21%|█████████████████▌                                                                | 38/178 [03:02<11:09,  4.79s/it]

++++++++++++++++index:37++++++++++++++++++++++++
正确表名： category, business, review, user
预测表名： business, user_profiles, user, follows, review, tweets
############################################


 22%|█████████████████▉                                                                | 39/178 [03:07<11:10,  4.83s/it]

++++++++++++++++index:38++++++++++++++++++++++++
正确表名： category, business, review, user
预测表名： review, location, restaurant, business
############################################


 22%|██████████████████▍                                                               | 40/178 [03:12<11:20,  4.93s/it]

++++++++++++++++index:39++++++++++++++++++++++++
正确表名： category, business, neighbourhood
预测表名： restaurant, business, category, geographic, location, neighbourhood
############################################


 23%|██████████████████▉                                                               | 41/178 [03:17<11:00,  4.82s/it]

++++++++++++++++index:40++++++++++++++++++++++++
正确表名： category, business
预测表名： None
############################################


 24%|███████████████████▎                                                              | 42/178 [03:21<10:42,  4.73s/it]

++++++++++++++++index:41++++++++++++++++++++++++
正确表名： user, review
预测表名： review, user
############################################


 24%|███████████████████▊                                                              | 43/178 [03:26<10:47,  4.79s/it]

++++++++++++++++index:42++++++++++++++++++++++++
正确表名： category, business
预测表名： location, category, restaurant, business
############################################


 25%|████████████████████▎                                                             | 44/178 [03:31<10:39,  4.77s/it]

++++++++++++++++index:43++++++++++++++++++++++++
正确表名： user, review
预测表名： review, user
############################################


 25%|████████████████████▋                                                             | 45/178 [03:36<10:37,  4.80s/it]

++++++++++++++++index:44++++++++++++++++++++++++
正确表名： review, business, user
预测表名： review, business, user
############################################


 26%|█████████████████████▏                                                            | 46/178 [03:40<10:35,  4.81s/it]

++++++++++++++++index:45++++++++++++++++++++++++
正确表名： tip, business, user
预测表名： business, tip, user
############################################


 26%|█████████████████████▋                                                            | 47/178 [03:45<10:33,  4.83s/it]

++++++++++++++++index:46++++++++++++++++++++++++
正确表名： review, business
预测表名： review, business
############################################


 27%|██████████████████████                                                            | 48/178 [03:50<10:21,  4.78s/it]

++++++++++++++++index:47++++++++++++++++++++++++
正确表名： behavior_incident
预测表名： behavior_incident
############################################


 28%|██████████████████████▌                                                           | 49/178 [03:55<10:13,  4.75s/it]

++++++++++++++++index:48++++++++++++++++++++++++
正确表名： student_addresses
预测表名： student_addresses
############################################


 28%|███████████████████████                                                           | 50/178 [03:59<10:12,  4.78s/it]

++++++++++++++++index:49++++++++++++++++++++++++
正确表名： teachers
预测表名： teachers, person
############################################


 29%|███████████████████████▍                                                          | 51/178 [04:04<10:00,  4.73s/it]

++++++++++++++++index:50++++++++++++++++++++++++
正确表名： students_in_detention
预测表名： students_in_detention
############################################


 29%|███████████████████████▉                                                          | 52/178 [04:09<09:48,  4.67s/it]

++++++++++++++++index:51++++++++++++++++++++++++
正确表名： film
预测表名： film
############################################


 30%|████████████████████████▍                                                         | 53/178 [04:13<09:40,  4.64s/it]

++++++++++++++++index:52++++++++++++++++++++++++
正确表名： film
预测表名： film
############################################


 30%|████████████████████████▉                                                         | 54/178 [04:18<09:34,  4.63s/it]

++++++++++++++++index:53++++++++++++++++++++++++
正确表名： film_market_estimation
预测表名： film_market_estimation
############################################


 31%|█████████████████████████▎                                                        | 55/178 [04:22<09:30,  4.64s/it]

++++++++++++++++index:54++++++++++++++++++++++++
正确表名： film
预测表名： film
############################################


 31%|█████████████████████████▊                                                        | 56/178 [04:27<09:38,  4.74s/it]

++++++++++++++++index:55++++++++++++++++++++++++
正确表名： catalog_contents, catalog_contents_Additional_Attributes
预测表名： catalog_contents, catalog_contents_additional_attributes
############################################


 32%|██████████████████████████▎                                                       | 57/178 [04:32<09:33,  4.74s/it]

++++++++++++++++index:56++++++++++++++++++++++++
正确表名： catalog_structure
预测表名： catalog_structure
############################################


 33%|██████████████████████████▋                                                       | 58/178 [04:37<09:25,  4.72s/it]

++++++++++++++++index:57++++++++++++++++++++++++
正确表名： catalogs
预测表名： catalogs
############################################


 33%|███████████████████████████▏                                                      | 59/178 [04:41<09:13,  4.65s/it]

++++++++++++++++index:58++++++++++++++++++++++++
正确表名： catalog_contents
预测表名： catalog_contents, products
############################################


 34%|███████████████████████████▋                                                      | 60/178 [04:46<09:06,  4.63s/it]

++++++++++++++++index:59++++++++++++++++++++++++
正确表名： airports
预测表名： airports
############################################


 34%|████████████████████████████                                                      | 61/178 [04:51<09:02,  4.63s/it]

++++++++++++++++index:60++++++++++++++++++++++++
正确表名： airports
预测表名： airports
############################################


 35%|████████████████████████████▌                                                     | 62/178 [04:55<08:53,  4.60s/it]

++++++++++++++++index:61++++++++++++++++++++++++
正确表名： airports
预测表名： airports
############################################


 35%|█████████████████████████████                                                     | 63/178 [05:00<08:45,  4.57s/it]

++++++++++++++++index:62++++++++++++++++++++++++
正确表名： airlines
预测表名： routes, airlines
############################################


 36%|█████████████████████████████▍                                                    | 64/178 [05:04<08:45,  4.61s/it]

++++++++++++++++index:63++++++++++++++++++++++++
正确表名： airports
预测表名： airports
############################################


 37%|█████████████████████████████▉                                                    | 65/178 [05:09<08:40,  4.61s/it]

++++++++++++++++index:64++++++++++++++++++++++++
正确表名： airports
预测表名： airports
############################################


 37%|██████████████████████████████▍                                                   | 66/178 [05:13<08:34,  4.59s/it]

++++++++++++++++index:65++++++++++++++++++++++++
正确表名： airlines
预测表名： airlines
############################################


 38%|██████████████████████████████▊                                                   | 67/178 [05:18<08:29,  4.59s/it]

++++++++++++++++index:66++++++++++++++++++++++++
正确表名： airlines, routes
预测表名： routes, airlines
############################################


 38%|███████████████████████████████▎                                                  | 68/178 [05:23<08:23,  4.58s/it]

++++++++++++++++index:67++++++++++++++++++++++++
正确表名： game
预测表名： game
############################################


 39%|███████████████████████████████▊                                                  | 69/178 [05:27<08:20,  4.59s/it]

++++++++++++++++index:68++++++++++++++++++++++++
正确表名： physician, patient
预测表名： patient, physician
############################################


 39%|████████████████████████████████▏                                                 | 70/178 [05:32<08:20,  4.64s/it]

++++++++++++++++index:69++++++++++++++++++++++++
正确表名： department
预测表名： None
############################################


 40%|████████████████████████████████▋                                                 | 71/178 [05:37<08:24,  4.71s/it]

++++++++++++++++index:70++++++++++++++++++++++++
正确表名： procedures
预测表名： procedures
############################################


 40%|█████████████████████████████████▏                                                | 72/178 [05:41<08:11,  4.64s/it]

++++++++++++++++index:71++++++++++++++++++++++++
正确表名： procedures, physician, trained_in
预测表名： procedures, physician, trained_in
############################################


 41%|█████████████████████████████████▋                                                | 73/178 [05:46<08:13,  4.70s/it]

++++++++++++++++index:72++++++++++++++++++++++++
正确表名： physician, prescribes
预测表名： physician, prescribes
############################################


 42%|██████████████████████████████████                                                | 74/178 [05:51<08:00,  4.62s/it]

++++++++++++++++index:73++++++++++++++++++++++++
正确表名： appointment
预测表名： appointment
############################################


 42%|██████████████████████████████████▌                                               | 75/178 [05:55<07:57,  4.64s/it]

++++++++++++++++index:74++++++++++++++++++++++++
正确表名： nurse, on_call
预测表名： on_call, nurse
############################################


 43%|███████████████████████████████████                                               | 76/178 [06:00<07:46,  4.58s/it]

++++++++++++++++index:75++++++++++++++++++++++++
正确表名： procedures, physician, trained_in
预测表名： procedures, physician, trained_in
############################################


 43%|███████████████████████████████████▍                                              | 77/178 [06:04<07:45,  4.61s/it]

++++++++++++++++index:76++++++++++++++++++++++++
正确表名： department
预测表名： None
############################################


 44%|███████████████████████████████████▉                                              | 78/178 [06:09<07:40,  4.61s/it]

++++++++++++++++index:77++++++++++++++++++++++++
正确表名： procedures, physician, trained_in
预测表名： procedures, physician, trained_in
############################################


 44%|████████████████████████████████████▍                                             | 79/178 [06:14<07:40,  4.65s/it]

++++++++++++++++index:78++++++++++++++++++++++++
正确表名： buildings
预测表名： buildings
############################################


 45%|████████████████████████████████████▊                                             | 80/178 [06:19<07:54,  4.84s/it]

++++++++++++++++index:79++++++++++++++++++++++++
正确表名： companies
预测表名： company, companies
############################################


 46%|█████████████████████████████████████▎                                            | 81/178 [06:24<07:56,  4.91s/it]

++++++++++++++++index:80++++++++++++++++++++++++
正确表名： companies
预测表名： company, companies
############################################


 46%|█████████████████████████████████████▊                                            | 82/178 [06:29<07:41,  4.81s/it]

++++++++++++++++index:81++++++++++++++++++++++++
正确表名： buildings
预测表名： buildings
############################################


 47%|██████████████████████████████████████▏                                           | 83/178 [06:33<07:32,  4.76s/it]

++++++++++++++++index:82++++++++++++++++++++++++
正确表名： party
预测表名： party
############################################


 47%|██████████████████████████████████████▋                                           | 84/178 [06:38<07:22,  4.70s/it]

++++++++++++++++index:83++++++++++++++++++++++++
正确表名： party
预测表名： party
############################################


 48%|███████████████████████████████████████▏                                          | 85/178 [06:42<07:11,  4.64s/it]

++++++++++++++++index:84++++++++++++++++++++++++
正确表名： party
预测表名： party
############################################


 48%|███████████████████████████████████████▌                                          | 86/178 [06:47<07:06,  4.64s/it]

++++++++++++++++index:85++++++++++++++++++++++++
正确表名： party_events, party
预测表名： party, party_events
############################################


 49%|████████████████████████████████████████                                          | 87/178 [06:52<07:16,  4.80s/it]

++++++++++++++++index:86++++++++++++++++++++++++
正确表名： browser, accelerator_compatible_browser, web_client_accelerator
预测表名： browser, web_client_accelerator, accelerator_compatible_browser
############################################


 49%|████████████████████████████████████████▌                                         | 88/178 [06:57<07:16,  4.85s/it]

++++++++++++++++index:87++++++++++++++++++++++++
正确表名： transactions, transactions_lots
预测表名： lots, transactions, transactions_lots
############################################


 50%|█████████████████████████████████████████                                         | 89/178 [07:02<07:11,  4.85s/it]

++++++++++++++++index:88++++++++++++++++++++++++
正确表名： investors
预测表名： entrepreneur, investors
############################################


 51%|█████████████████████████████████████████▍                                        | 90/178 [07:07<07:06,  4.84s/it]

++++++++++++++++index:89++++++++++++++++++++++++
正确表名： lots, transactions_lots
预测表名： lots, transactions_lots
############################################


 51%|█████████████████████████████████████████▉                                        | 91/178 [07:12<07:06,  4.90s/it]

++++++++++++++++index:90++++++++++++++++++++++++
正确表名： transactions
预测表名： ref_transaction_types, transactions
############################################


 52%|██████████████████████████████████████████▍                                       | 92/178 [07:16<06:52,  4.80s/it]

++++++++++++++++index:91++++++++++++++++++++++++
正确表名： university
预测表名： university
############################################


 52%|██████████████████████████████████████████▊                                       | 93/178 [07:21<06:42,  4.74s/it]

++++++++++++++++index:92++++++++++++++++++++++++
正确表名： university
预测表名： university
############################################


 53%|███████████████████████████████████████████▎                                      | 94/178 [07:26<06:37,  4.73s/it]

++++++++++++++++index:93++++++++++++++++++++++++
正确表名： university
预测表名： university
############################################


 53%|███████████████████████████████████████████▊                                      | 95/178 [07:30<06:31,  4.72s/it]

++++++++++++++++index:94++++++++++++++++++++++++
正确表名： university, basketball_match
预测表名： basketball_match, university
############################################


 54%|████████████████████████████████████████████▏                                     | 96/178 [07:35<06:33,  4.80s/it]

++++++++++++++++index:95++++++++++++++++++++++++
正确表名： restaurant, geographic, location
预测表名： geographic, location, restaurant
############################################


 54%|████████████████████████████████████████████▋                                     | 97/178 [07:40<06:23,  4.74s/it]

++++++++++++++++index:96++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 55%|█████████████████████████████████████████████▏                                    | 98/178 [07:45<06:17,  4.72s/it]

++++++++++++++++index:97++++++++++++++++++++++++
正确表名： location, restaurant, geographic
预测表名： geographic, location, restaurant
############################################


 56%|█████████████████████████████████████████████▌                                    | 99/178 [07:49<06:08,  4.67s/it]

++++++++++++++++index:98++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 56%|█████████████████████████████████████████████▌                                   | 100/178 [07:54<06:04,  4.67s/it]

++++++++++++++++index:99++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： geographic, location, restaurant
############################################


 57%|█████████████████████████████████████████████▉                                   | 101/178 [07:59<05:58,  4.66s/it]

++++++++++++++++index:100++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 57%|██████████████████████████████████████████████▍                                  | 102/178 [08:03<05:52,  4.64s/it]

++++++++++++++++index:101++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 58%|██████████████████████████████████████████████▊                                  | 103/178 [08:08<05:45,  4.60s/it]

++++++++++++++++index:102++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 58%|███████████████████████████████████████████████▎                                 | 104/178 [08:12<05:37,  4.57s/it]

++++++++++++++++index:103++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： location, restaurant
############################################


 59%|███████████████████████████████████████████████▊                                 | 105/178 [08:17<05:31,  4.54s/it]

++++++++++++++++index:104++++++++++++++++++++++++
正确表名： restaurant, geographic, location
预测表名： location, restaurant
############################################


 60%|████████████████████████████████████████████████▏                                | 106/178 [08:21<05:30,  4.59s/it]

++++++++++++++++index:105++++++++++++++++++++++++
正确表名： restaurant, geographic, location
预测表名： location, restaurant
############################################


 60%|████████████████████████████████████████████████▋                                | 107/178 [08:26<05:28,  4.63s/it]

++++++++++++++++index:106++++++++++++++++++++++++
正确表名： restaurant, location
预测表名： None
############################################


 61%|█████████████████████████████████████████████████▏                               | 108/178 [08:31<05:24,  4.63s/it]

++++++++++++++++index:107++++++++++++++++++++++++
正确表名： user_profiles
预测表名： personfriend, user_profiles, user
############################################


 61%|█████████████████████████████████████████████████▌                               | 109/178 [08:36<05:34,  4.85s/it]

++++++++++++++++index:108++++++++++++++++++++++++
正确表名： user_profiles
预测表名： personfriend, user_profiles
############################################


 62%|██████████████████████████████████████████████████                               | 110/178 [08:41<05:23,  4.75s/it]

++++++++++++++++index:109++++++++++++++++++++++++
正确表名： weather
预测表名： weather
############################################


 62%|██████████████████████████████████████████████████▌                              | 111/178 [08:45<05:21,  4.80s/it]

++++++++++++++++index:110++++++++++++++++++++++++
正确表名： weather, trip
预测表名： trip, weather
############################################


 63%|██████████████████████████████████████████████████▉                              | 112/178 [08:50<05:17,  4.81s/it]

++++++++++++++++index:111++++++++++++++++++++++++
正确表名： trip, weather
预测表名： trip, weather
############################################


 63%|███████████████████████████████████████████████████▍                             | 113/178 [08:55<05:07,  4.73s/it]

++++++++++++++++index:112++++++++++++++++++++++++
正确表名： trip
预测表名： trip
############################################


 64%|███████████████████████████████████████████████████▉                             | 114/178 [09:00<05:05,  4.78s/it]

++++++++++++++++index:113++++++++++++++++++++++++
正确表名： station, status
预测表名： status, station
############################################


 65%|████████████████████████████████████████████████████▎                            | 115/178 [09:05<05:01,  4.79s/it]

++++++++++++++++index:114++++++++++++++++++++++++
正确表名： trip
预测表名： trip
############################################


 65%|████████████████████████████████████████████████████▊                            | 116/178 [09:09<04:52,  4.72s/it]

++++++++++++++++index:115++++++++++++++++++++++++
正确表名： weather
预测表名： weather
############################################


 66%|█████████████████████████████████████████████████████▏                           | 117/178 [09:14<04:44,  4.66s/it]

++++++++++++++++index:116++++++++++++++++++++++++
正确表名： station
预测表名： station
############################################


 66%|█████████████████████████████████████████████████████▋                           | 118/178 [09:18<04:36,  4.61s/it]

++++++++++++++++index:117++++++++++++++++++++++++
正确表名： trip
预测表名： station, trip
############################################


 67%|██████████████████████████████████████████████████████▏                          | 119/178 [09:23<04:29,  4.58s/it]

++++++++++++++++index:118++++++++++++++++++++++++
正确表名： trip
预测表名： station, trip
############################################


 67%|██████████████████████████████████████████████████████▌                          | 120/178 [09:27<04:27,  4.61s/it]

++++++++++++++++index:119++++++++++++++++++++++++
正确表名： campuses
预测表名： campuses
############################################


 68%|███████████████████████████████████████████████████████                          | 121/178 [09:32<04:24,  4.64s/it]

++++++++++++++++index:120++++++++++++++++++++++++
正确表名： campuses
预测表名： campuses
############################################


 69%|███████████████████████████████████████████████████████▌                         | 122/178 [09:37<04:19,  4.64s/it]

++++++++++++++++index:121++++++++++++++++++++++++
正确表名： campuses
预测表名： campuses
############################################


 69%|███████████████████████████████████████████████████████▉                         | 123/178 [09:42<04:22,  4.76s/it]

++++++++++++++++index:122++++++++++++++++++++++++
正确表名： campuses
预测表名： None
############################################


 70%|████████████████████████████████████████████████████████▍                        | 124/178 [09:46<04:13,  4.69s/it]

++++++++++++++++index:123++++++++++++++++++++++++
正确表名： campuses, degrees
预测表名： degrees, campuses
############################################


 70%|████████████████████████████████████████████████████████▉                        | 125/178 [09:51<04:13,  4.78s/it]

++++++++++++++++index:124++++++++++++++++++++++++
正确表名： faculty, campuses
预测表名： campuses, faculty
############################################


 71%|█████████████████████████████████████████████████████████▎                       | 126/178 [09:56<04:05,  4.73s/it]

++++++++++++++++index:125++++++++++++++++++++++++
正确表名： campuses
预测表名： campuses
############################################


 71%|█████████████████████████████████████████████████████████▊                       | 127/178 [10:00<04:00,  4.71s/it]

++++++++++++++++index:126++++++++++++++++++++++++
正确表名： journalist
预测表名： journalist
############################################


 72%|██████████████████████████████████████████████████████████▏                      | 128/178 [10:05<03:56,  4.74s/it]

++++++++++++++++index:127++++++++++++++++++++++++
正确表名： authors, authorship, papers
预测表名： papers, authorship, authors
############################################


 72%|██████████████████████████████████████████████████████████▋                      | 129/178 [10:10<03:53,  4.77s/it]

++++++++++++++++index:128++++++++++++++++++++++++
正确表名： papers, authorship, inst
预测表名： papers, inst, authorship, authors
############################################


 73%|███████████████████████████████████████████████████████████▏                     | 130/178 [10:15<03:45,  4.70s/it]

++++++++++++++++index:129++++++++++++++++++++++++
正确表名： inst, authorship, papers
预测表名： papers, inst, authorship
############################################


 74%|███████████████████████████████████████████████████████████▌                     | 131/178 [10:20<03:42,  4.74s/it]

++++++++++++++++index:130++++++++++++++++++++++++
正确表名： authors, authorship, papers
预测表名： papers, authorship, authors
############################################


 74%|████████████████████████████████████████████████████████████                     | 132/178 [10:24<03:37,  4.74s/it]

++++++++++++++++index:131++++++++++++++++++++++++
正确表名： inst, authorship, papers
预测表名： papers, authorship, authors
############################################


 75%|████████████████████████████████████████████████████████████▌                    | 133/178 [10:29<03:36,  4.81s/it]

++++++++++++++++index:132++++++++++++++++++++++++
正确表名： authors, authorship, papers
预测表名： papers, authorship, authors
############################################


 75%|████████████████████████████████████████████████████████████▉                    | 134/178 [10:34<03:28,  4.73s/it]

++++++++++++++++index:133++++++++++++++++++++++++
正确表名： furniture
预测表名： furniture
############################################


 76%|█████████████████████████████████████████████████████████████▍                   | 135/178 [10:38<03:21,  4.69s/it]

++++++++++++++++index:134++++++++++++++++++++++++
正确表名： person, personFriend
预测表名： personFriend, person
############################################


 76%|█████████████████████████████████████████████████████████████▉                   | 136/178 [10:43<03:18,  4.72s/it]

++++++++++++++++index:135++++++++++++++++++++++++
正确表名： person, personfriend
预测表名： personfriend, person
############################################


 77%|██████████████████████████████████████████████████████████████▎                  | 137/178 [10:48<03:11,  4.67s/it]

++++++++++++++++index:136++++++++++++++++++++++++
正确表名： person, personFriend
预测表名： personfriend
############################################


 78%|██████████████████████████████████████████████████████████████▊                  | 138/178 [10:52<03:03,  4.60s/it]

++++++++++++++++index:137++++++++++++++++++++++++
正确表名： person
预测表名： person
############################################


 78%|███████████████████████████████████████████████████████████████▎                 | 139/178 [10:57<02:58,  4.58s/it]

++++++++++++++++index:138++++++++++++++++++++++++
正确表名： person
预测表名： person
############################################


 79%|███████████████████████████████████████████████████████████████▋                 | 140/178 [11:01<02:53,  4.55s/it]

++++++++++++++++index:139++++++++++++++++++++++++
正确表名： person
预测表名： person
############################################


 79%|████████████████████████████████████████████████████████████████▏                | 141/178 [11:06<02:51,  4.62s/it]

++++++++++++++++index:140++++++++++++++++++++++++
正确表名： personfriend, person
预测表名： personfriend
############################################


 80%|████████████████████████████████████████████████████████████████▌                | 142/178 [11:11<02:46,  4.63s/it]

++++++++++++++++index:141++++++++++++++++++++++++
正确表名： person
预测表名： person
############################################


 80%|█████████████████████████████████████████████████████████████████                | 143/178 [11:15<02:42,  4.63s/it]

++++++++++++++++index:142++++++++++++++++++++++++
正确表名： enzyme
预测表名： enzyme
############################################


 81%|█████████████████████████████████████████████████████████████████▌               | 144/178 [11:20<02:37,  4.62s/it]

++++++++++++++++index:143++++++++++++++++++++++++
正确表名： enzyme
预测表名： enzyme
############################################


 81%|█████████████████████████████████████████████████████████████████▉               | 145/178 [11:24<02:32,  4.63s/it]

++++++++++++++++index:144++++++++++++++++++++++++
正确表名： enzyme
预测表名： enzyme
############################################


 82%|██████████████████████████████████████████████████████████████████▍              | 146/178 [11:29<02:28,  4.65s/it]

++++++++++++++++index:145++++++++++++++++++++++++
正确表名： medicine, medicine_enzyme_interaction
预测表名： medicine, medicine_enzyme_interaction
############################################


 83%|██████████████████████████████████████████████████████████████████▉              | 147/178 [11:34<02:24,  4.65s/it]

++++++++++++++++index:146++++++++++++++++++++++++
正确表名： apartment_bookings, apartments
预测表名： apartments, apartment_bookings
############################################


 83%|███████████████████████████████████████████████████████████████████▎             | 148/178 [11:38<02:19,  4.64s/it]

++++++++++++++++index:147++++++++++++++++++++++++
正确表名： apartment_bookings, guests
预测表名： guests, apartment_bookings
############################################


 84%|███████████████████████████████████████████████████████████████████▊             | 149/178 [11:43<02:17,  4.74s/it]

++++++++++++++++index:148++++++++++++++++++++++++
正确表名： apartment_buildings, apartments
预测表名： apartment_buildings, apartments
############################################


 84%|████████████████████████████████████████████████████████████████████▎            | 150/178 [11:48<02:14,  4.81s/it]

++++++++++++++++index:149++++++++++++++++++++++++
正确表名： apartment_bookings, guests
预测表名： guests, apartments, apartment_bookings
############################################


 85%|████████████████████████████████████████████████████████████████████▋            | 151/178 [11:53<02:10,  4.85s/it]

++++++++++++++++index:150++++++++++++++++++++++++
正确表名： apartment_facilities, apartments
预测表名： apartment_facilities, apartments
############################################


 85%|█████████████████████████████████████████████████████████████████████▏           | 152/178 [11:58<02:06,  4.86s/it]

++++++++++++++++index:151++++++++++++++++++++++++
正确表名： apartments
预测表名： apartments
############################################


 86%|█████████████████████████████████████████████████████████████████████▌           | 153/178 [12:03<01:58,  4.75s/it]

++++++++++++++++index:152++++++++++++++++++++++++
正确表名： apartments
预测表名： apartments
############################################


 87%|██████████████████████████████████████████████████████████████████████           | 154/178 [12:07<01:53,  4.73s/it]

++++++++++++++++index:153++++++++++++++++++++++++
正确表名： apartment_buildings
预测表名： apartment_buildings, building
############################################


 87%|██████████████████████████████████████████████████████████████████████▌          | 155/178 [12:12<01:49,  4.77s/it]

++++++++++++++++index:154++++++++++++++++++++++++
正确表名： program
预测表名： program
############################################


 88%|██████████████████████████████████████████████████████████████████████▉          | 156/178 [12:17<01:43,  4.70s/it]

++++++++++++++++index:155++++++++++++++++++++++++
正确表名： program, broadcast
预测表名： broadcast, program
############################################


 88%|███████████████████████████████████████████████████████████████████████▍         | 157/178 [12:22<01:39,  4.73s/it]

++++++++++++++++index:156++++++++++++++++++++++++
正确表名： channel
预测表名： channel
############################################


 89%|███████████████████████████████████████████████████████████████████████▉         | 158/178 [12:26<01:34,  4.71s/it]

++++++++++++++++index:157++++++++++++++++++++++++
正确表名： channel
预测表名： channel
############################################


 89%|████████████████████████████████████████████████████████████████████████▎        | 159/178 [12:32<01:34,  4.96s/it]

++++++++++++++++index:158++++++++++++++++++++++++
正确表名： customer_master_index
预测表名： customer_master_index
############################################


 90%|████████████████████████████████████████████████████████████████████████▊        | 160/178 [12:37<01:29,  4.96s/it]

++++++++++++++++index:159++++++++++++++++++++++++
正确表名： district
预测表名： district
############################################


 90%|█████████████████████████████████████████████████████████████████████████▎       | 161/178 [12:42<01:23,  4.89s/it]

++++++++++++++++index:160++++++++++++++++++++++++
正确表名： district
预测表名： district
############################################


 91%|█████████████████████████████████████████████████████████████████████████▋       | 162/178 [12:46<01:18,  4.88s/it]

++++++++++++++++index:161++++++++++++++++++++++++
正确表名： district
预测表名： district
############################################


 92%|██████████████████████████████████████████████████████████████████████████▏      | 163/178 [12:51<01:12,  4.84s/it]

++++++++++++++++index:162++++++++++++++++++++++++
正确表名： district
预测表名： district
############################################


 92%|██████████████████████████████████████████████████████████████████████████▋      | 164/178 [12:56<01:07,  4.79s/it]

++++++++++++++++index:163++++++++++++++++++++++++
正确表名： gas_station
预测表名： gas_station
############################################


 93%|███████████████████████████████████████████████████████████████████████████      | 165/178 [13:01<01:04,  4.95s/it]

++++++++++++++++index:164++++++++++++++++++++++++
正确表名： company
预测表名： companies, station_company
############################################


 93%|███████████████████████████████████████████████████████████████████████████▌     | 166/178 [13:06<00:57,  4.82s/it]

++++++++++++++++index:165++++++++++++++++++++++++
正确表名： company
预测表名： company, companies
############################################


 94%|███████████████████████████████████████████████████████████████████████████▉     | 167/178 [13:11<00:53,  4.84s/it]

++++++++++++++++index:166++++++++++++++++++++++++
正确表名： station_company, company, gas_station
预测表名： gas_station, station_company
############################################


 94%|████████████████████████████████████████████████████████████████████████████▍    | 168/178 [13:15<00:47,  4.76s/it]

++++++++++++++++index:167++++++++++++++++++++++++
正确表名： festival_detail
预测表名： festival_detail
############################################


 95%|████████████████████████████████████████████████████████████████████████████▉    | 169/178 [13:20<00:42,  4.73s/it]

++++++++++++++++index:168++++++++++++++++++++++++
正确表名： festival_detail
预测表名： festival_detail
############################################


 96%|█████████████████████████████████████████████████████████████████████████████▎   | 170/178 [13:24<00:37,  4.66s/it]

++++++++++++++++index:169++++++++++++++++++++++++
正确表名： building, institution
预测表名： inst, building, buildings, institution
############################################


 96%|█████████████████████████████████████████████████████████████████████████████▊   | 171/178 [13:29<00:32,  4.63s/it]

++++++++++++++++index:170++++++++++++++++++++++++
正确表名： building, institution
预测表名： inst, building, institution
############################################


 97%|██████████████████████████████████████████████████████████████████████████████▎  | 172/178 [13:34<00:27,  4.66s/it]

++++++++++++++++index:171++++++++++++++++++++++++
正确表名： roller_coaster
预测表名： roller_coaster
############################################


 97%|██████████████████████████████████████████████████████████████████████████████▋  | 173/178 [13:38<00:23,  4.69s/it]

++++++++++++++++index:172++++++++++++++++++++++++
正确表名： participants
预测表名： participants
############################################


 98%|███████████████████████████████████████████████████████████████████████████████▏ | 174/178 [13:43<00:19,  4.77s/it]

++++++++++++++++index:173++++++++++++++++++++++++
正确表名： mill
预测表名： mill
############################################


 98%|███████████████████████████████████████████████████████████████████████████████▋ | 175/178 [13:48<00:14,  4.72s/it]

++++++++++++++++index:174++++++++++++++++++++++++
正确表名： railway
预测表名： railway
############################################


 99%|████████████████████████████████████████████████████████████████████████████████ | 176/178 [13:52<00:09,  4.69s/it]

++++++++++++++++index:175++++++++++++++++++++++++
正确表名： railway
预测表名： railway
############################################


 99%|████████████████████████████████████████████████████████████████████████████████▌| 177/178 [13:57<00:04,  4.72s/it]

++++++++++++++++index:176++++++++++++++++++++++++
正确表名： book
预测表名： book
############################################


100%|█████████████████████████████████████████████████████████████████████████████████| 178/178 [14:02<00:00,  4.73s/it]

++++++++++++++++index:177++++++++++++++++++++++++
正确表名： book, publication
预测表名： book, publication
############################################





In [10]:
total_data = len(df['question'])
total_time = end_time - start_time
# print(total_data)
signal_infernece_time = total_time/total_data
print(signal_infernece_time)

4.733441950230116


In [11]:
# new_df = pd.DataFrame(results, columns = ['predicted_tables','correct_tables','reference_tables','query','question','db_id'])
# new_df.to_csv('new_Prompt.csv', index=False)
#     print(f"++++++++++++++++index:{index}++++++++++++++++++++++++")
#     print("正确表名：",ref_tables)
#     print("预测表名：",total_selected_table)
#     print("############################################")
#     results.append([ref_tables,total_selected_table])
new_df = pd.DataFrame(results, columns = ['correct_tables','predicted_tables'])

In [12]:
total_samples = len(new_df)
total_accuracy = 0
filtered_accuracy = 0
total_precision = 0
total_recall = 0

for index, row in new_df.iterrows():
    
    if not row['predicted_tables'] or pd.isna(row['predicted_tables']):
        continue
    predicted_tables = row['predicted_tables'].split(", ")
    reference_tables = row['correct_tables'].split(", ")
    
    # Convert to lowercase and strip whitespace for comparison
    predicted_tables = [x.lower().replace("--","").replace("**","").strip() for x in predicted_tables]
    reference_tables = [x.lower().strip() for x in reference_tables]
    
    # Calculate accuracy
    if set(predicted_tables) == set(reference_tables):
        total_accuracy += 1
    
    # Calculate precision and recall
    true_positives = len(set(predicted_tables) & set(reference_tables))
    false_positives = len(set(predicted_tables) - set(reference_tables))
    false_negatives = len(set(reference_tables) - set(predicted_tables))

    if true_positives == len(reference_tables):
        filtered_accuracy += 1
    
    if len(predicted_tables) > 0:
        precision = true_positives / (true_positives + false_positives)
        recall = true_positives / (true_positives + false_negatives)
    
    total_precision += precision
    total_recall += recall

# Calculate average precision and recall
avg_precision = total_precision / total_samples
avg_recall = total_recall / total_samples

# Calculate total accuracy
accuracy = total_accuracy / total_samples
filtered_accuracy = filtered_accuracy / total_samples

print("Total Accuracy:", accuracy)
print("Filtered Accuracy:", filtered_accuracy)
print("Average Precision:", avg_precision)
print("Average Recall:", avg_recall)

# new_df.to_csv("generated_schema_links.csv", index=False)

Total Accuracy: 0.7303370786516854
Filtered Accuracy: 0.8820224719101124
Average Precision: 0.8768726591760299
Average Recall: 0.9236891385767789


In [13]:
new_df.to_csv("generated_schema_links.csv", index=False)