In [1]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TextStreamer

hf_peft_repo = "LoRA_adapted_T5"
peft_config = PeftConfig.from_pretrained(hf_peft_repo)
model = AutoModelForSeq2SeqLM.from_pretrained(peft_config.base_model_name_or_path, return_dict=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
streamer = TextStreamer(tokenizer)

# Load the finetuned Lora PEFT model
model = PeftModel.from_pretrained(model, hf_peft_repo)
model.eval()

[2023-07-17 21:26:26,680] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


2023-07-17 21:26:33.166782: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-17 21:26:33.350751: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-17 21:26:33.563700: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-07-17 21:26:37.013873: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 512)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 512)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): Linear(
                    in_features=512, out_features=512, bias=False
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=512, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=512, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B): P

In [2]:
from datasets import Dataset
import pandas as pd

df = pd.read_csv('Training_set_IMDB/testing_set_no_target.csv')
df = df.sample(frac =1).reset_index(drop=True)
for index,row in df.iterrows():
    df.loc[index, 'Text'] = "Translate to SQL: " + row['Text']


df2 = pd.read_csv('Training_set_IMDB/testing_set_unseen_no_target.csv')
df2 = df2.sample(frac =1).reset_index(drop=True)
for index,row in df2.iterrows():
    df2.loc[index, 'Text'] = "Translate to SQL: " + row['Text']

test_set_seen = Dataset.from_pandas(df)
test_set_unseen = Dataset.from_pandas(df2)

In [3]:
test_set_seen.set_format(type = "torch")
test_set_unseen.set_format(type = "torch")

test_set_seen["Text"][1]

"Translate to SQL: How many roles by movie 'Skinny Puppy: Video Collection 1984-1992'"

In [4]:
def map_to_lenght(x):
    x["input_len"] = len(tokenizer(x["Text"]).input_ids)
    x["input_longer_256"] = int(x["input_len"]>256)
    x["input_longer_128"] = int(x["input_len"]>128)
    x["input_longet_64"] = int(x["input_len"]>64)
    x["output_len"] = len(tokenizer(x["SQL"]).input_ids)
    x["output_longet_256"] = int(x["output_len"]>256)
    x["output_longet_128"] = int(x["output_len"]>128)
    x["output_longet_64"] = int(x["output_len"]>64)
    return x

sample_size = 2000
data_stats = test_set_seen.select(range(sample_size)).map(map_to_lenght, num_proc=4)
data_stats_2 = test_set_unseen.select(range(sample_size)).map(map_to_lenght, num_proc=4)

Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [5]:
def compute_and_print(x):
    if len(x["input_len"])==sample_size:
        print(
            f"Input mean: {sum(x['input_len'])/sample_size} \n % of input len > 256: {sum(x['input_longer_256'])/sample_size}, \n % of input len > 128: {sum(x['input_longer_128'])/sample_size}, \n % of input len > 64: {sum(x['input_longet_64'])/sample_size}, \n Ouput mean: {sum(x['output_len'])/sample_size},\n% of output len > 256: {sum(x['output_longet_256'])/sample_size}, \n% of output len > 128: {sum(x['output_longet_128'])/sample_size}, \n% of output len > 64: {sum(x['output_longet_64'])/sample_size}")

output = data_stats.map(compute_and_print, batched=True, batch_size=-1)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Input mean: 19.3125 
 % of input len > 256: 0.0, 
 % of input len > 128: 0.0, 
 % of input len > 64: 0.0, 
 Ouput mean: 106.75450134277344,
% of output len > 256: 0.0, 
% of output len > 128: 0.30000001192092896, 
% of output len > 64: 0.7365000247955322


In [6]:
test_set_seen['Text'][0]

'Translate to SQL: List all movies by director Robert de Nesle order by rank'

In [7]:
def convert_to_features(example_batch, padding = "max_length",input_max = 100, output_max = 170):
    inputs = tokenizer.batch_encode_plus(example_batch["Text"], max_length=input_max, is_split_into_words = False, padding='max_length', truncation=True, return_tensors = "pt")
    
    targets = tokenizer.batch_encode_plus(example_batch["SQL"], max_length=output_max, padding = "max_length",truncation = True)
    if padding == "max_length":
        targets["inputs_ids"] = [
            [(l if l != tokenizer.pad_token_id else -100) for l in target] for target in targets["input_ids"]
        ]
    
    inputs["labels"] = targets['input_ids']
    return inputs

def evaluate_peft_model(sample):
    outputs = model.generate(input_ids=sample["input_ids"].unsqueeze(0).cuda(), max_new_tokens = 200, top_p=0.9)
    prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)
    label = np.where(sample['labels'] != -100, sample['labels'], tokenizer.pad_token_id)
    label = tokenizer.decode(label, skip_special_tokens=True)
    _ = execution_accuracy(prediction, label)
    
    return prediction, label

def execution_accuracy(prediction, label):
    try:
        
        cursor.execute(label)
        result_label = cursor.fetchall()
        all_executions_overall.append(1)
        try:
            cursor.execute(prediction)
            result_pred = cursor.fetchall()
            all_executions_accuracy.append(1)
            if len(result_label)>10:
                if len(result_label) == len(result_pred):
                    accurate_executions.append(1) 
            elif result_label == result_pred:
                accurate_executions.append(1)
            else:
                for_checking_label.append(label)
                for_checking_prediction.append(prediction)
                
        except:
            failed_executions.append(1)
            failed_predicted_SQL.append(prediction)
                
    except:
        failed_original_SQL.append(label)
    return None



In [8]:
import evaluate
import numpy as np
from tqdm import tqdm
import mysql.connector



connection = mysql.connector.connect(
    host="relational.fit.cvut.cz",
    user="guest",
    password="relational",
    database="imdb_ijs"
)
cursor = connection.cursor()



print("mapping both datasets")
tokenized_dataset = test_set_seen.map(convert_to_features, batched=True, num_proc=4)
tokenized_dataset_2 = test_set_unseen.map(convert_to_features, batched=True, num_proc=4)
print("mapped both dataset")
print("Documents we have: tokenizer_dataset for seen and tokenized_dataset_2 for unseen data")


print("\n\n Running executions for seen dataset")
all_executions_overall = []
failed_executions = []
all_executions_accuracy = []
accurate_executions = []
for_checking_label = []
for_checking_prediction = []
failed_original_SQL = []
failed_predicted_SQL = []



for sample in tokenized_dataset:
    p,l = evaluate_peft_model(sample)
    print("all_executions_overall: ", len(all_executions_overall))
    print("all_executions_accuracy: ",len(all_executions_accuracy))
    print("accurate_executions: ", len(accurate_executions))
    print("failed_executions: ", len(failed_executions))


print("All SQL runs:", len(all_executions_overall))
print("Model SQLs that failed: " len(failed_executions))
print(f"Execution rate: {len(all_executions_accuracy)/len(all_executions_overall)*100}%")
print(f"Execution rate: {100 - len(failed_executions)/len(all_executions_overall)*100}%")
print(f"Execution accuracy: {len(accurate_executions)/len(all_executions_accuracy)*100}%")

failed_original_sql_df = pd.DataFrame(failed_original_SQL)
failed_predicted_sql_df = pd.DataFrame(failed_predicted_SQL)
not_equals = pd.DataFrame({
    'Label':for_checking_label,
    'Prediction': for_checking_prediction
})


not_equals.to_csv("/home/toibazd/Data/Text2SQL/Training_set_IMDB/Not_equals_lora.csv", index = False)
failed_original_sql_df.to_csv("/home/toibazd/Data/Text2SQL/Training_set_IMDB/Failed_originals_lora.csv", index = False)
failed_predicted_sql_df.to_csv("/home/toibazd/Data/Text2SQL/Training_set_IMDB/Failed_predicted_lora.csv", index = False)

mapping both datasets


Map (num_proc=4):   0%|          | 0/2315 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/2655 [00:00<?, ? examples/s]

mapped both dataset
Documents we have: tokenizer_dataset for seen and tokenized_dataset_2 for unseen data


 Running executions for seen dataset
all_executions_overall:  1
all_executions_accuracy:  1
accurate_executions:  0
failed_executions:  0
all_executions_overall:  2
all_executions_accuracy:  2
accurate_executions:  1
failed_executions:  0
all_executions_overall:  3
all_executions_accuracy:  3
accurate_executions:  2
failed_executions:  0
all_executions_overall:  4
all_executions_accuracy:  4
accurate_executions:  3
failed_executions:  0
all_executions_overall:  5
all_executions_accuracy:  5
accurate_executions:  4
failed_executions:  0
all_executions_overall:  6
all_executions_accuracy:  6
accurate_executions:  5
failed_executions:  0
all_executions_overall:  7
all_executions_accuracy:  7
accurate_executions:  6
failed_executions:  0
all_executions_overall:  8
all_executions_accuracy:  8
accurate_executions:  6
failed_executions:  0
all_executions_overall:  9
all_executions_accur

all_executions_overall:  70
all_executions_accuracy:  70
accurate_executions:  56
failed_executions:  0
all_executions_overall:  71
all_executions_accuracy:  71
accurate_executions:  57
failed_executions:  0
all_executions_overall:  72
all_executions_accuracy:  72
accurate_executions:  58
failed_executions:  0
all_executions_overall:  73
all_executions_accuracy:  73
accurate_executions:  59
failed_executions:  0
except_1
all_executions_overall:  73
all_executions_accuracy:  73
accurate_executions:  59
failed_executions:  0
all_executions_overall:  74
all_executions_accuracy:  74
accurate_executions:  59
failed_executions:  0
all_executions_overall:  75
all_executions_accuracy:  75
accurate_executions:  60
failed_executions:  0
all_executions_overall:  76
all_executions_accuracy:  76
accurate_executions:  60
failed_executions:  0
all_executions_overall:  77
all_executions_accuracy:  77
accurate_executions:  61
failed_executions:  0
all_executions_overall:  78
all_executions_accuracy:  7

except_1
all_executions_overall:  140
all_executions_accuracy:  140
accurate_executions:  112
failed_executions:  0
all_executions_overall:  141
all_executions_accuracy:  141
accurate_executions:  113
failed_executions:  0
all_executions_overall:  142
all_executions_accuracy:  142
accurate_executions:  114
failed_executions:  0
all_executions_overall:  143
all_executions_accuracy:  143
accurate_executions:  115
failed_executions:  0
all_executions_overall:  144
all_executions_accuracy:  144
accurate_executions:  116
failed_executions:  0
all_executions_overall:  145
all_executions_accuracy:  145
accurate_executions:  117
failed_executions:  0
except_1
all_executions_overall:  145
all_executions_accuracy:  145
accurate_executions:  117
failed_executions:  0
all_executions_overall:  146
all_executions_accuracy:  146
accurate_executions:  118
failed_executions:  0
except_1
all_executions_overall:  146
all_executions_accuracy:  146
accurate_executions:  118
failed_executions:  0
all_execut

except_1
all_executions_overall:  202
all_executions_accuracy:  202
accurate_executions:  165
failed_executions:  0
all_executions_overall:  203
all_executions_accuracy:  203
accurate_executions:  166
failed_executions:  0
all_executions_overall:  204
all_executions_accuracy:  204
accurate_executions:  167
failed_executions:  0
except_1
all_executions_overall:  204
all_executions_accuracy:  204
accurate_executions:  167
failed_executions:  0
all_executions_overall:  205
all_executions_accuracy:  205
accurate_executions:  167
failed_executions:  0
all_executions_overall:  206
all_executions_accuracy:  206
accurate_executions:  168
failed_executions:  0
all_executions_overall:  207
all_executions_accuracy:  207
accurate_executions:  169
failed_executions:  0
all_executions_overall:  208
all_executions_accuracy:  208
accurate_executions:  170
failed_executions:  0
all_executions_overall:  209
all_executions_accuracy:  209
accurate_executions:  171
failed_executions:  0
all_executions_over

all_executions_overall:  265
all_executions_accuracy:  265
accurate_executions:  219
failed_executions:  0
all_executions_overall:  266
all_executions_accuracy:  266
accurate_executions:  220
failed_executions:  0
all_executions_overall:  267
all_executions_accuracy:  267
accurate_executions:  221
failed_executions:  0
all_executions_overall:  268
all_executions_accuracy:  268
accurate_executions:  222
failed_executions:  0
all_executions_overall:  269
all_executions_accuracy:  269
accurate_executions:  223
failed_executions:  0
all_executions_overall:  270
all_executions_accuracy:  270
accurate_executions:  224
failed_executions:  0
all_executions_overall:  271
all_executions_accuracy:  271
accurate_executions:  224
failed_executions:  0
except_1
all_executions_overall:  271
all_executions_accuracy:  271
accurate_executions:  224
failed_executions:  0
all_executions_overall:  272
all_executions_accuracy:  272
accurate_executions:  224
failed_executions:  0
all_executions_overall:  273

all_executions_overall:  326
all_executions_accuracy:  326
accurate_executions:  268
failed_executions:  0
all_executions_overall:  327
all_executions_accuracy:  327
accurate_executions:  269
failed_executions:  0
all_executions_overall:  328
all_executions_accuracy:  328
accurate_executions:  270
failed_executions:  0
all_executions_overall:  329
all_executions_accuracy:  329
accurate_executions:  270
failed_executions:  0
all_executions_overall:  330
all_executions_accuracy:  330
accurate_executions:  271
failed_executions:  0
except_1
all_executions_overall:  330
all_executions_accuracy:  330
accurate_executions:  271
failed_executions:  0
all_executions_overall:  331
all_executions_accuracy:  331
accurate_executions:  271
failed_executions:  0
all_executions_overall:  332
all_executions_accuracy:  332
accurate_executions:  272
failed_executions:  0
all_executions_overall:  333
all_executions_accuracy:  333
accurate_executions:  273
failed_executions:  0
all_executions_overall:  334

all_executions_overall:  392
all_executions_accuracy:  392
accurate_executions:  318
failed_executions:  0
all_executions_overall:  393
all_executions_accuracy:  393
accurate_executions:  318
failed_executions:  0
all_executions_overall:  394
all_executions_accuracy:  394
accurate_executions:  319
failed_executions:  0
except_1
all_executions_overall:  394
all_executions_accuracy:  394
accurate_executions:  319
failed_executions:  0
all_executions_overall:  395
all_executions_accuracy:  395
accurate_executions:  320
failed_executions:  0
except_1
all_executions_overall:  395
all_executions_accuracy:  395
accurate_executions:  320
failed_executions:  0
all_executions_overall:  396
all_executions_accuracy:  396
accurate_executions:  320
failed_executions:  0
all_executions_overall:  397
all_executions_accuracy:  397
accurate_executions:  321
failed_executions:  0
all_executions_overall:  398
all_executions_accuracy:  398
accurate_executions:  322
failed_executions:  0
all_executions_over

KeyboardInterrupt: 

In [None]:
print("\n \n Running evaluation on unseen data")
all_executions_overall = []
failed_executions = []
all_executions_accuracy = []
accurate_executions = []
for_checking_label = []
for_checking_prediction = []
failed_original_SQL = []
failed_predicted_SQL = []

for sample in tokenized_dataset_2:
    p,l = evaluate_peft_model(sample)

print("All SQL runs:", len(all_executions_overall))
print("Model SQLs that failed: " len(failed_executions))
print(f"Execution rate: {len(all_executions_accuracy)/len(all_executions_overall)*100}%")
print(f"Execution rate: {100 - len(failed_executions)/len(all_executions_overall)*100}%")
print(f"Execution accuracy: {len(accurate_executions)/len(all_executions_accuracy)*100}%")