<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/FAISS_Evaluator_Mistral_7B_text_to_sql.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://medium.com/@frankmorales_91352/fine-tuning-the-llm-mistral-7b-instruct-v0-3-249c1814ceaf

# Dependencies

In [1]:
!nvidia-smi

Thu Jul  4 20:58:47 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0              43W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
# Install Pytorch & other libraries
!pip install torch tensorboard --quiet

# Install Hugging Face libraries
!pip install  --upgrade transformers datasets accelerate evaluate bitsandbytes --quiet

#FlashAttention only supports Ampere GPUs or newer. #NEED A100 , L4  IN GOOGLE COLAB
!pip install -U flash-attn --no-build-isolation --quiet

!pip install colab-env --quiet

!pip install mistral_inference -q

!pip install peft -q

# Hugging Face Setup

In [5]:
import colab_env
import os
from huggingface_hub import login


access_token_write = os.getenv("HUGGINGFACE_ACCESS_TOKEN_WRITE")

login(
  token=access_token_write,
  add_to_git_credential=True
)


#from huggingface_hub import notebook_login
#notebook_login(write_permission=True)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Load the Fine Tuned Model

In [6]:
import torch
import os
import sys
import json
import IPython
from datetime import datetime
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
)

In [None]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline

#peft_model_id = "frankmorales2020/Mistral-7B-text-to-sql-flash-attention-2-dataeval"

peft_model_id  = 'frankmorales2020/Mistral-7B-text-to-sql-flash-attention-2-FAISS'

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    device_map="auto",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

# load into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Tensorboad Setup

In [8]:
import colab_env

/content/gdrive/MyDrive/model/POC-Mistral-7B-text-to-sql-flash-attention-2-dataeval/logs

In [None]:
%load_ext tensorboard

##only in my personal dev-environment
#%tensorboard --logdir /content/gdrive/MyDrive/model/Mistral-7B-text-to-sql-flash-attention-2-dataeval/logs

%tensorboard --logdir /content/gdrive/MyDrive/model/Mistral-7B-text-to-sql-flash-attention-2-FAISS/logs



# Dataset Settings

In [None]:
from datasets import load_dataset

# Convert dataset to OAI messages
system_message = """You are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.
SCHEMA:
{schema}"""

def create_conversation(sample):
  return {
    "messages": [
      {"role": "system", "content": system_message.format(schema=sample["context"])},
      {"role": "user", "content": sample["question"]},
      {"role": "assistant", "content": sample["answer"]}
    ]
  }

# Load dataset from the hub
dataset = load_dataset("b-mc2/sql-create-context", split="train")
#dataset = dataset.shuffle().select(range(12500))
dataset = dataset.select(range(12500))

# Convert dataset to OAI messages
dataset = dataset.map(create_conversation, remove_columns=dataset.features,batched=False)

# split dataset into 10,000 training samples and 2,500 test samples
dataset = dataset.train_test_split(test_size=2500/12500)

%cd /content/
# save datasets to disk
dataset["train"].to_json("train_dataset.json", orient="records")
dataset["test"].to_json("test_dataset.json", orient="records")

In [11]:
# Load our test dataset
eval_dataset = load_dataset("json", data_files="/content/test_dataset.json", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [12]:
eval_dataset

Dataset({
    features: ['messages'],
    num_rows: 2500
})

# Model Evaluation - Inference

## Postgresql Settings

In [None]:
#ADDED By FM 01/06/2024
!apt-get update -y
!apt-get install postgresql-14 -y

!service postgresql restart
!sudo apt install postgresql-server-dev-all

In [14]:
# (proceed_to_quarter_final VARCHAR, eliminated_from_competition VARCHAR)
QUERY_create='CREATE TABLE table_name_24 (score VARCHAR, date VARCHAR)'

In [15]:
# PostGRES SQL Settings
!sudo -u postgres psql -c "CREATE USER postgres WITH SUPERUSER"
!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"

ERROR:  role "postgres" already exists
ALTER ROLE


In [16]:
QUERY_select='SELECT 2009 FROM table_name_50 WHERE 2011 = "a"'

In [17]:
import os
import psycopg2 as ps
import pandas as pd

DB_NAME = "postgres"
DB_USER = "postgres"
DB_PASS = "postgres"
DB_HOST = "localhost"
DB_PORT = "5432"

In [18]:
def table_creator(query):
    import os
    import psycopg2 as ps
    import pandas as pd

    DB_NAME = "postgres"
    DB_USER = "postgres"
    DB_PASS = "postgres"
    DB_HOST = "localhost"
    DB_PORT = "5432"

    conn = ps.connect(database=DB_NAME,
                  user=DB_USER,
                  password=DB_PASS,
                  host=DB_HOST,
                  port=DB_PORT)

    cur = conn.cursor() # creating a cursor




    # Wrap the execute command in a try-except block to handle potential errors
    try:
        cur.execute("""
                            %s
                            """%query)
        conn.commit()
        print("Table Created successfully")
    except Exception as e:
        conn.rollback() # Rollback the transaction in case of an error
        print("Error creating table:", e)

    conn.close()

In [19]:
table_creator(QUERY_create)

Table Created successfully


In [20]:
import os
import psycopg2 as ps
import pandas as pd

def table_select(query):
    conn = ps.connect(database=DB_NAME,
                      user=DB_USER,
                      password=DB_PASS,
                      host=DB_HOST,
                      port=DB_PORT)
    #print("Database connected successfully")

    query = query.replace('"', "'") # Replace double quotes with single quotes for potential date values

    try:
        #df = pd.read_sql_query("%s"%query, con=conn)
        print('rec: %'%df) # Print the resulting DataFrame
        cursor = conn.cursor()
        cursor.execute(query)
        results = cursor.fetchall()

        for row in results:
            print(row)

            print()

            # Commit the transaction to save the changes
            conn.commit()
            #print("QUERY successfully")
            print()

            # Close the cursor and connection
            cursor.close()
            conn.close()
    except Exception as e:
        #conn.rollback() # Rollback the transaction in case of an error
        #print("Error executing query:", e)
        print('TABLE IS EMPTY')


        conn.close()
    #return bad

In [None]:
print()
# PostGRES SQL Settings
%cd /content/
!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"

print('START: PG embedding COMPILATION')
%cd /content/
!git clone https://github.com/neondatabase/pg_embedding.git
%cd /content/pg_embedding
!make
!make install # may need sudo
print('END: PG embedding COMPILATION')
print()

#!sudo -u postgres psql -c "DROP EXTENSION embedding"
!sudo -u postgres psql -c "CREATE EXTENSION embedding"
#!sudo -u postgres psql -c "DROP TABLE documents"
!sudo -u postgres psql -c "CREATE TABLE documents(id integer PRIMARY KEY, embedding real[])"

## Dataset Settings

In [22]:
eval_dataset[0]["messages"][1]["content"]

'What December is 8.77 in January '

In [23]:
eval_dataset[0]["messages"][2]["content"]

'SELECT december FROM table_15945862_1 WHERE january = "8.77"'

In [24]:
eval_dataset[0]["messages"][0]['content'][153:len(eval_dataset[0]["messages"][0]['content'])]

'CREATE TABLE table_15945862_1 (december VARCHAR, january VARCHAR)'

## Model Inference

In [25]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

similar("Apple","Appel")

0.8

In [26]:
from tqdm import tqdm
from random import randint
from datasets import load_dataset
import psycopg2
from psycopg2 import sql

def evaluate(sample):
    prompt = pipe.tokenizer.apply_chat_template(sample["messages"][:2], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
    predicted_answer = outputs[0]['generated_text'][len(prompt):].strip()

    print()
    print()
    schema=sample["messages"][0]['content']
    schema_query=schema[153:len(schema)]
    question = sample["messages"][1]["content"]
    original_answer = sample["messages"][2]["content"]

    if predicted_answer ==  original_answer:
        print()
        print()
        print('SUCCESS!')
        print()
        print(f'QUESTION: {question}')
        print()
        print(f'SCHEMA QUERY: {schema_query}')
        table_creator(schema_query)
        print()
        print(f'Generated Answer: {predicted_answer}')
        table_select(predicted_answer)
        print()
        print(f'Original Answer: {original_answer}')
        print()
        return 1
    else:
        print()
        print()
        print('NO - SUCCESS!')
        print()

        ps=similar(predicted_answer,original_answer)
        print(f'Generated Answer: {predicted_answer}')
        print(f' Original Answer: {original_answer}')
        print(f'        SIMILARY: {ps}')
        print()
        return 0

success_rate = []
number_of_eval_samples = 10

# iterate over eval dataset and predict
for n in tqdm(range(number_of_eval_samples)):
    s=eval_dataset[n]
    success_rate.append(evaluate(s))

# compute accuracy
accuracy = sum(success_rate)/len(success_rate)

 10%|█         | 1/10 [00:05<00:45,  5.06s/it]





SUCCESS!

QUESTION: What December is 8.77 in January 

SCHEMA QUERY: CREATE TABLE table_15945862_1 (december VARCHAR, january VARCHAR)
Table Created successfully

Generated Answer: SELECT december FROM table_15945862_1 WHERE january = "8.77"
TABLE IS EMPTY

Original Answer: SELECT december FROM table_15945862_1 WHERE january = "8.77"



 20%|██        | 2/10 [00:08<00:31,  3.98s/it]





SUCCESS!

QUESTION: Which major has most number of students?

SCHEMA QUERY: CREATE TABLE Student (major VARCHAR)
Table Created successfully

Generated Answer: SELECT major FROM Student GROUP BY major ORDER BY COUNT(*) DESC LIMIT 1
TABLE IS EMPTY

Original Answer: SELECT major FROM Student GROUP BY major ORDER BY COUNT(*) DESC LIMIT 1



 30%|███       | 3/10 [00:11<00:24,  3.50s/it]





SUCCESS!

QUESTION: What city was the Dick Weber Open in?

SCHEMA QUERY: CREATE TABLE table_name_22 (city VARCHAR, event VARCHAR)
Table Created successfully

Generated Answer: SELECT city FROM table_name_22 WHERE event = "dick weber open"
TABLE IS EMPTY

Original Answer: SELECT city FROM table_name_22 WHERE event = "dick weber open"



 40%|████      | 4/10 [00:15<00:23,  3.98s/it]





NO - SUCCESS!

Generated Answer: SELECT us_viewers__millions_ FROM table_22265225_1 WHERE no_in_season = 12
 Original Answer: SELECT us_viewers__millions_ FROM table_22265225_1 WHERE no_in_season = "12"
        SIMILARY: 0.9866666666666667



 50%|█████     | 5/10 [00:19<00:18,  3.75s/it]





SUCCESS!

QUESTION: Name the player for pick number for 30

SCHEMA QUERY: CREATE TABLE table_1965650_2 (player VARCHAR, pick__number VARCHAR)
Table Created successfully

Generated Answer: SELECT player FROM table_1965650_2 WHERE pick__number = 30
TABLE IS EMPTY

Original Answer: SELECT player FROM table_1965650_2 WHERE pick__number = 30



 60%|██████    | 6/10 [00:24<00:16,  4.09s/it]





SUCCESS!

QUESTION: What was the finishing time of the Stage that featured a distance of 24.00km and a start time of 21:27?

SCHEMA QUERY: CREATE TABLE table_name_78 (time VARCHAR, distance VARCHAR, start_time VARCHAR)
Table Created successfully

Generated Answer: SELECT time FROM table_name_78 WHERE distance = "24.00km" AND start_time = "21:27"
TABLE IS EMPTY

Original Answer: SELECT time FROM table_name_78 WHERE distance = "24.00km" AND start_time = "21:27"



 70%|███████   | 7/10 [00:31<00:15,  5.05s/it]





NO - SUCCESS!

Generated Answer: SELECT COUNT(*) FROM CLASS AS T1 JOIN employee AS T2 ON T1.PROF_NUM = T2.EMP_NUM WHERE T2.EMP_LNAME = 'Graztevski'
 Original Answer: SELECT COUNT(*) FROM employee AS T1 JOIN CLASS AS T2 ON T1.EMP_NUM = T2.PROF_NUM WHERE T1.EMP_LNAME = 'Graztevski'
        SIMILARY: 0.7982456140350878



 80%|████████  | 8/10 [00:36<00:10,  5.31s/it]





NO - SUCCESS!

Generated Answer: SELECT t2.name FROM genres AS t1 JOIN tracks AS t2 ON t1.id = t2.genre_id WHERE t1.name = "Rock"
 Original Answer: SELECT T2.name FROM genres AS T1 JOIN tracks AS T2 ON T1.id = T2.genre_id WHERE T1.name = "Rock"
        SIMILARY: 0.9375



 90%|█████████ | 9/10 [00:40<00:04,  4.69s/it]





SUCCESS!

QUESTION: Which player won the Masters in 1976?

SCHEMA QUERY: CREATE TABLE table_name_66 (player VARCHAR, year_s__won VARCHAR)
Table Created successfully

Generated Answer: SELECT player FROM table_name_66 WHERE year_s__won = "1976"
TABLE IS EMPTY

Original Answer: SELECT player FROM table_name_66 WHERE year_s__won = "1976"



100%|██████████| 10/10 [00:45<00:00,  4.52s/it]





SUCCESS!

QUESTION: Name the 3 where weightlifter is m. van der goten ( bel )

SCHEMA QUERY: CREATE TABLE table_16779068_5 (weightlifter VARCHAR)
Table Created successfully

Generated Answer: SELECT 3 FROM table_16779068_5 WHERE weightlifter = "M. Van der Goten ( BEL )"
TABLE IS EMPTY

Original Answer: SELECT 3 FROM table_16779068_5 WHERE weightlifter = "M. Van der Goten ( BEL )"






In [27]:
print()
#print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Accuracy (Eval dataset and predict) for a sample of {number_of_eval_samples}: {accuracy*100:.2f}%")


Accuracy (Eval dataset and predict) for a sample of 10: 70.00%


# Model Evaluation - Kernel

In [28]:
# Count Hidden Layers and Neurons (Before Evaluation)
if hasattr(model, 'base_model'):
    llama_model = model.base_model
else:
    llama_model = model

# Count hidden layers of type LlamaDecoderLayer
num_hidden_layers = llama_model.config.num_hidden_layers
#print(num_hidden_layers)

# Estimate neurons (this is very simplified, as explained earlier)
num_neurons = num_hidden_layers * llama_model.config.hidden_size

print(f"Number of hidden layers in the model: {num_hidden_layers}")
print(f"Approximate number of neurons (simplified): {num_neurons}")

Number of hidden layers in the model: 32
Approximate number of neurons (simplified): 131072


In [29]:
torch.cuda.empty_cache()

In [30]:
# Load our test dataset
eval_dataset = load_dataset("json", data_files="/content/test_dataset.json", split="train")
reduced_size = 10
eval_dataset = eval_dataset.shuffle(seed=42).select(range(reduced_size))

In [31]:
eval_dataset

Dataset({
    features: ['messages'],
    num_rows: 10
})

In [32]:
eval_dataset[0]["messages"][0]['content']

'You are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.\nSCHEMA:\nCREATE TABLE TV_Channel (LANGUAGE VARCHAR)'

In [33]:
max_length = 10
all_input_ids = []
all_attention_masks = []

for item in eval_dataset:
    messages = item['messages']

    # Concatenate the 'content' of all messages into a single string
    text = " ".join([msg['content'] for msg in messages])
    #print()
    #print(text)
    #print()

    tokenized = tokenizer(text, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt")
    all_input_ids.append(tokenized["input_ids"][0])
    all_attention_masks.append(tokenized["attention_mask"][0])


input_ids = torch.stack(all_input_ids)
attention_masks = torch.stack(all_attention_masks)

In [34]:
# Now you have input_ids, attention_masks, and labels as tensors with compatible shapes
print(input_ids.shape)
print(attention_masks.shape)

torch.Size([10, 10])
torch.Size([10, 10])


In [35]:
torch.cuda.empty_cache()

In [36]:
del model
del tokenizer
torch.cuda.empty_cache()

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from datasets import load_dataset
from peft import PeftModel
import evaluate
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM

# Constants
BATCH_SIZE = 8
MAX_LENGTH = 10
peft_model_id = "frankmorales2020/Mistral-7B-text-to-sql-flash-attention-2-FAISS"
data_files = "/content/test_dataset.json"
reduced_size = 10

# Load tokenizer (using tokenizer from the PEFT model)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.3",
    torch_dtype=torch.float16,
    device_map='auto'
)

# Resize the token embeddings to match the PEFT vocabulary
base_model.resize_token_embeddings(len(tokenizer))

# Load PEFT model (using the base_model object)
model = PeftModel.from_pretrained(base_model, peft_model_id)
model.eval()

# Ensure model is on the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load your test dataset
eval_dataset = load_dataset("json", data_files=data_files, split="train")
eval_dataset = eval_dataset.shuffle(seed=42).select(range(reduced_size))

# Tokenization and Tensor Creation
all_input_ids = []
all_attention_masks = []
for item in eval_dataset:
    messages = item['messages']
    # Concatenate the 'content' of all messages into a single string
    text = " ".join([msg['content'] for msg in messages])

    tokenized = tokenizer(text, padding="max_length", truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
    all_input_ids.append(tokenized["input_ids"][0])
    all_attention_masks.append(tokenized["attention_mask"][0])

input_ids = torch.stack(all_input_ids)
attention_masks = torch.stack(all_attention_masks)

# Create TensorDataset from your tensors
eval_dataset = TensorDataset(input_ids, attention_masks)

# Create DataLoader
eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Evaluation function (Manually calculating perplexity)
def evaluate_model(model, eval_dataloader):
    model.eval()
    losses = []
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        try:
            # Convert batch to device (assuming it's a list/tuple of tensors)
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
                outputs = model(input_ids=batch[0], attention_mask=batch[1], labels=batch[0])  # Adjust based on your batch structure
            loss = outputs.loss
            losses.append(loss.item())

        except RuntimeError as e:
            if "out of memory" in str(e):
                print("WARNING: Ran out of memory. Consider reducing batch size or model complexity.")
                return None  # Exit early if out of memory
            else:
                raise e

    try:
        perplexity = torch.exp(torch.tensor(losses).mean())
        return perplexity
    except OverflowError:
        print("WARNING: Overflow error while calculating perplexity. Loss values might be too large.")
        return None


# Perform Evaluation
results = evaluate_model(model, eval_dataloader)

In [38]:
print(f"Perplexity: {results:.2f}")

Perplexity: 296.27
