In [2]:
import shutil
import requests
import sys
from typing import Optional, List, Tuple
from langchain_core.language_models import BaseChatModel
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document as LangchainDocument
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import HuggingFaceHub
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import config 

In [4]:
##Loading the Model to answer questions
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig


peft_model_id = "Ubaidbhat/zephr_database_finetuned"
config = PeftConfig.from_pretrained(peft_model_id)
print(config.base_model_name_or_path)
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)
model = model.merge_and_unload()

HuggingFaceH4/zephyr-7b-beta


config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/83.9M [00:00<?, ?B/s]



In [32]:
##Creating base Model Chain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import LLMChain

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
    pad_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge.
</s>
<|user|>
{question}
</s>
<|assistant|>
"""

prompt = PromptTemplate(
    input_variables=["question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

def inference(question):
    llmAnswer = llm_chain.invoke({"question": question})
    llmAnswer = llmAnswer.rstrip()
    return llmAnswer

In [33]:
# question = "How should one document a testing plan for a database?"
# answer = inference(question)
# print(answer)

In [20]:
# ##Loading the evaluation datapoints
# from huggingface_hub import login
# login("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")
# import pandas as pd
# from datasets import Dataset
# generated_questions = pd.read_csv("datasets/db2_dataset.csv")
# dataset = Dataset.from_pandas(generated_questions)
# dataset.push_to_hub("databaseBenchmarkQA")
from datasets import load_dataset
datapoints = load_dataset("Ubaidbhat/databaseBenchmarkQA", split = "train")

Downloading readme:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading data: 100%|██████████| 268k/268k [00:00<00:00, 596kB/s]


Generating train split:   0%|          | 0/263 [00:00<?, ? examples/s]

In [34]:
outputs = []
i = 1
n = len(datapoints)
for datapoint in datapoints:
    print("Inference number {}/{} in progress.....".format(i, n))
    question = datapoint["question"]
    correctAnswer = datapoint["answer"]
    if i >= 10 and i <= len(datapoints) - 10:
        llmAnswer = inference(question)
        outputs.append(
                {   
                    "question": question,
                    "correctAnswer": correctAnswer,
                    "llmAnswer":llmAnswer 
                
                }
            )
        generated_questions = pd.DataFrame.from_dict(outputs)
        generated_questions.to_csv("evalDatasets/databaseQAWithZephr_Finetuned.csv", index=False)
    i += 1

Inference number 1/263 in progress.....
Inference number 2/263 in progress.....
Inference number 3/263 in progress.....
Inference number 4/263 in progress.....
Inference number 5/263 in progress.....
Inference number 6/263 in progress.....
Inference number 7/263 in progress.....
Inference number 8/263 in progress.....
Inference number 9/263 in progress.....
Inference number 10/263 in progress.....
Inference number 11/263 in progress.....
Inference number 12/263 in progress.....
Inference number 13/263 in progress.....
Inference number 14/263 in progress.....
Inference number 15/263 in progress.....
Inference number 16/263 in progress.....
Inference number 17/263 in progress.....
Inference number 18/263 in progress.....
Inference number 19/263 in progress.....
Inference number 20/263 in progress.....




Inference number 21/263 in progress.....




Inference number 22/263 in progress.....




Inference number 23/263 in progress.....




Inference number 24/263 in progress.....




Inference number 25/263 in progress.....




Inference number 26/263 in progress.....




Inference number 27/263 in progress.....




Inference number 28/263 in progress.....




Inference number 29/263 in progress.....




Inference number 30/263 in progress.....




Inference number 31/263 in progress.....




Inference number 32/263 in progress.....




Inference number 33/263 in progress.....




Inference number 34/263 in progress.....




Inference number 35/263 in progress.....




Inference number 36/263 in progress.....




Inference number 37/263 in progress.....




Inference number 38/263 in progress.....




Inference number 39/263 in progress.....




Inference number 40/263 in progress.....




Inference number 41/263 in progress.....




Inference number 42/263 in progress.....




Inference number 43/263 in progress.....




Inference number 44/263 in progress.....




Inference number 45/263 in progress.....




Inference number 46/263 in progress.....




Inference number 47/263 in progress.....




Inference number 48/263 in progress.....




Inference number 49/263 in progress.....




Inference number 50/263 in progress.....




Inference number 51/263 in progress.....




Inference number 52/263 in progress.....




Inference number 53/263 in progress.....




Inference number 54/263 in progress.....




Inference number 55/263 in progress.....




Inference number 56/263 in progress.....




Inference number 57/263 in progress.....




Inference number 58/263 in progress.....




Inference number 59/263 in progress.....




Inference number 60/263 in progress.....




Inference number 61/263 in progress.....




Inference number 62/263 in progress.....




Inference number 63/263 in progress.....




Inference number 64/263 in progress.....




Inference number 65/263 in progress.....




Inference number 66/263 in progress.....




Inference number 67/263 in progress.....




Inference number 68/263 in progress.....




Inference number 69/263 in progress.....




Inference number 70/263 in progress.....




Inference number 71/263 in progress.....




Inference number 72/263 in progress.....




Inference number 73/263 in progress.....




Inference number 74/263 in progress.....




Inference number 75/263 in progress.....




Inference number 76/263 in progress.....




Inference number 77/263 in progress.....




Inference number 78/263 in progress.....




Inference number 79/263 in progress.....




Inference number 80/263 in progress.....




Inference number 81/263 in progress.....




Inference number 82/263 in progress.....




Inference number 83/263 in progress.....




Inference number 84/263 in progress.....




Inference number 85/263 in progress.....




Inference number 86/263 in progress.....




Inference number 87/263 in progress.....




Inference number 88/263 in progress.....




Inference number 89/263 in progress.....




Inference number 90/263 in progress.....




Inference number 91/263 in progress.....




Inference number 92/263 in progress.....




Inference number 93/263 in progress.....




Inference number 94/263 in progress.....




Inference number 95/263 in progress.....




Inference number 96/263 in progress.....




Inference number 97/263 in progress.....




Inference number 98/263 in progress.....




Inference number 99/263 in progress.....




Inference number 100/263 in progress.....




Inference number 101/263 in progress.....




Inference number 102/263 in progress.....




Inference number 103/263 in progress.....




Inference number 104/263 in progress.....




Inference number 105/263 in progress.....




Inference number 106/263 in progress.....




Inference number 107/263 in progress.....




Inference number 108/263 in progress.....




Inference number 109/263 in progress.....




Inference number 110/263 in progress.....




Inference number 111/263 in progress.....




Inference number 112/263 in progress.....




Inference number 113/263 in progress.....




Inference number 114/263 in progress.....




Inference number 115/263 in progress.....




Inference number 116/263 in progress.....




Inference number 117/263 in progress.....




Inference number 118/263 in progress.....




Inference number 119/263 in progress.....




Inference number 120/263 in progress.....




Inference number 121/263 in progress.....




Inference number 122/263 in progress.....




Inference number 123/263 in progress.....




Inference number 124/263 in progress.....




Inference number 125/263 in progress.....




Inference number 126/263 in progress.....




Inference number 127/263 in progress.....




Inference number 128/263 in progress.....




Inference number 129/263 in progress.....




Inference number 130/263 in progress.....




Inference number 131/263 in progress.....




Inference number 132/263 in progress.....




Inference number 133/263 in progress.....




Inference number 134/263 in progress.....




Inference number 135/263 in progress.....




Inference number 136/263 in progress.....




Inference number 137/263 in progress.....




Inference number 138/263 in progress.....




Inference number 139/263 in progress.....




Inference number 140/263 in progress.....




Inference number 141/263 in progress.....




Inference number 142/263 in progress.....




Inference number 143/263 in progress.....




Inference number 144/263 in progress.....




Inference number 145/263 in progress.....




Inference number 146/263 in progress.....




Inference number 147/263 in progress.....




Inference number 148/263 in progress.....




Inference number 149/263 in progress.....




Inference number 150/263 in progress.....




Inference number 151/263 in progress.....




Inference number 152/263 in progress.....




Inference number 153/263 in progress.....




Inference number 154/263 in progress.....




Inference number 155/263 in progress.....




Inference number 156/263 in progress.....




Inference number 157/263 in progress.....




Inference number 158/263 in progress.....




Inference number 159/263 in progress.....




Inference number 160/263 in progress.....




Inference number 161/263 in progress.....




Inference number 162/263 in progress.....




Inference number 163/263 in progress.....




Inference number 164/263 in progress.....




Inference number 165/263 in progress.....




Inference number 166/263 in progress.....




Inference number 167/263 in progress.....




Inference number 168/263 in progress.....




Inference number 169/263 in progress.....




Inference number 170/263 in progress.....




Inference number 171/263 in progress.....




Inference number 172/263 in progress.....




Inference number 173/263 in progress.....




Inference number 174/263 in progress.....




Inference number 175/263 in progress.....




Inference number 176/263 in progress.....




Inference number 177/263 in progress.....




Inference number 178/263 in progress.....




Inference number 179/263 in progress.....




Inference number 180/263 in progress.....




Inference number 181/263 in progress.....




Inference number 182/263 in progress.....




Inference number 183/263 in progress.....




Inference number 184/263 in progress.....




Inference number 185/263 in progress.....




Inference number 186/263 in progress.....




Inference number 187/263 in progress.....




Inference number 188/263 in progress.....




Inference number 189/263 in progress.....




Inference number 190/263 in progress.....




Inference number 191/263 in progress.....




Inference number 192/263 in progress.....




Inference number 193/263 in progress.....




Inference number 194/263 in progress.....




Inference number 195/263 in progress.....




Inference number 196/263 in progress.....




Inference number 197/263 in progress.....




Inference number 198/263 in progress.....




Inference number 199/263 in progress.....




Inference number 200/263 in progress.....




Inference number 201/263 in progress.....




Inference number 202/263 in progress.....




Inference number 203/263 in progress.....




Inference number 204/263 in progress.....




Inference number 205/263 in progress.....




Inference number 206/263 in progress.....




Inference number 207/263 in progress.....




Inference number 208/263 in progress.....




Inference number 209/263 in progress.....




Inference number 210/263 in progress.....




Inference number 211/263 in progress.....




Inference number 212/263 in progress.....




Inference number 213/263 in progress.....




Inference number 214/263 in progress.....




Inference number 215/263 in progress.....




Inference number 216/263 in progress.....




Inference number 217/263 in progress.....




Inference number 218/263 in progress.....




Inference number 219/263 in progress.....




Inference number 220/263 in progress.....




Inference number 221/263 in progress.....




Inference number 222/263 in progress.....




Inference number 223/263 in progress.....




Inference number 224/263 in progress.....




Inference number 225/263 in progress.....




Inference number 226/263 in progress.....




Inference number 227/263 in progress.....




Inference number 228/263 in progress.....




Inference number 229/263 in progress.....




Inference number 230/263 in progress.....




Inference number 231/263 in progress.....




Inference number 232/263 in progress.....




Inference number 233/263 in progress.....




Inference number 234/263 in progress.....




Inference number 235/263 in progress.....




Inference number 236/263 in progress.....




Inference number 237/263 in progress.....




Inference number 238/263 in progress.....




Inference number 239/263 in progress.....




Inference number 240/263 in progress.....




Inference number 241/263 in progress.....




Inference number 242/263 in progress.....




Inference number 243/263 in progress.....




Inference number 244/263 in progress.....




Inference number 245/263 in progress.....




Inference number 246/263 in progress.....




Inference number 247/263 in progress.....




Inference number 248/263 in progress.....




Inference number 249/263 in progress.....




Inference number 250/263 in progress.....




Inference number 251/263 in progress.....




Inference number 252/263 in progress.....




Inference number 253/263 in progress.....




Inference number 254/263 in progress.....
Inference number 255/263 in progress.....
Inference number 256/263 in progress.....
Inference number 257/263 in progress.....
Inference number 258/263 in progress.....
Inference number 259/263 in progress.....
Inference number 260/263 in progress.....
Inference number 261/263 in progress.....
Inference number 262/263 in progress.....
Inference number 263/263 in progress.....
