In [None]:
!pip install accelerate==0.21.0 transformers==4.31.0 tokenizers==0.13.3
!pip install bitsandbytes==0.40.0 einops==0.6.1
!pip install xformers==0.0.22.post7
!pip install langchain==0.1.4
!pip install faiss-gpu==1.7.1.post3
!pip install sentence_transformers

Collecting transformers==4.31.0
  Using cached transformers-4.31.0-py3-none-any.whl (7.4 MB)
Collecting tokenizers==0.13.3
  Using cached tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.2
    Uninstalling tokenizers-0.15.2:
      Successfully uninstalled tokenizers-0.15.2
  Attempting uninstall: transformers
    Found existing installation: transformers 4.38.2
    Uninstalling transformers-4.38.2:
      Successfully uninstalled transformers-4.38.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 2.5.1 requires transformers<5.0.0,>=4.32.0, but you have transformers 4.31.0 which is incompatible.[0m[31m
[0mSuccessfully installed tokenizers-0.13.3 transformers-4

In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 122
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


Model loaded on cuda:0


In [None]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [None]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [None]:
# res = generate_text("Explain me the difference between Data Lakehouse and Data Warehouse.")
# print(res[0]["generated_text"])

In [None]:
!pip install langchain langchain_community langchain_core



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from operator import itemgetter

from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [None]:
import os

dir = os.listdir("/content/drive/MyDrive/compressed")

new_dir = []

for path in dir :
  path = os.path.join("/content/drive/MyDrive/compressed",path)
  new_dir.append(path)

In [None]:
## Load documents
from langchain.document_loaders import TextLoader

documents_list = []

for path in new_dir :
  loader = TextLoader(new_dir[0])
  documents = loader.load()
  documents_list.append(documents[0])

print(documents_list)

[Document(page_content=' \n \nRegulations for the B.Tech. in Computer Science and Artificial Intelligence(CSAI) Program \n \n1. Preamble \n \nArtificial Intelligence (AI) has become an integral part of technology in our daily lives, driving to office, searching for a restaurant, getting news updates, and recommendations on social media are all using AI. With increase in usage, there is a significant requirement of researchers who can understand AI and build AI technologies. This program will provide students an opportunity to learn both foundational and experimental components of AI and Machine Learning. A student completing this program will be able to undertake industry careers involving innovation and problem solving using Artificial Intelligence (AI) and Machine Learning (ML) technologies and research careers in AI, ML, and, in general, Computer Science areas. Along with courses that provide specialization in AI, students will also have option to explore applied domains such as com

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  return self.fget.__get__(instance, owner)()


In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
# llm(prompt="WHat are the requirements for a Bio Minors?")
# Ingesting Data using Document Loader

In [None]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [None]:
import time

In [None]:
chat_history = []

query = "I am pursuing BTech (CSAM/CSE/CSD/CSSS/CSB/CSAI). Can I apply for MTech (CSE) dual degree program?"
s = time.time()
result = chain({"question": query, "chat_history": chat_history})
curr_time = (time.time() - s)*1000



 According to the provided document, the main focus of the B.Tech. CSAI program is to develop students' understanding of foundational topics in Computer Science, Artificial Intelligence, and Machine Learning, as well as their ability to design and implement algorithms and data structures for efficiently solving new problems, model and analyze various problems using appropriate mathematical/computational and AI concepts, and apply and develop AI algorithms to transform large amounts of data into intelligent decisions and/or behavior. Additionally, the program aims to provide students with general skills such as teamwork, professional and ethical responsibility, communication, self-learning, and entrepreneurship.
18047.194957733154


In [None]:
print(result["answer"])

 Yes, you can apply for the MTech (CSE) dual degree program if you are pursuing any of the above-mentioned BTech programs. However, please note that the eligibility criteria for MTech (CSE) dual degree program may vary depending on your current semester and academic performance. It is advisable to check the eligibility criteria on the official website of the institute offering the MTech (CSE) dual degree program or by contacting their admissions office directly.


In [None]:
# pip install evaluate

In [None]:
# from typing import Any, Optional

# from evaluate import load
# from langchain.evaluation import StringEvaluator


# class PerplexityEvaluator(StringEvaluator):
#     """Evaluate the perplexity of a predicted string."""

#     def __init__(self, model_id: str = "gpt2"):
#         self.model_id = model_id
#         self.metric_fn = load(
#             "perplexity", module_type="metric", model_id=self.model_id, pad_token=0
#         )

#     def _evaluate_strings(
#         self,
#         *,
#         prediction: str,
#         reference: Optional[str] = None,
#         input: Optional[str] = None,
#         **kwargs: Any,
#     ) -> dict:
#         results = self.metric_fn.compute(
#             predictions=[prediction], model_id=self.model_id
#         )
#         ppl = results["perplexities"][0]
#         return {"score": ppl}

In [None]:
# evaluator = PerplexityEvaluator(model_id)

In [None]:
# evaluator.evaluate_strings(prediction=result["answer"])

In [None]:
print(curr_time)

15807.54041671753
