## Install all required packages

In [None]:
try:
    subprocess.run([sys.executable, '-m', 'pip', 'install', 'pypdf'], check=True)
except subprocess.CalledProcessError as e:
    print(f"An error occurred: {e}")

In [10]:
!pip install tiktoken
!pip install qdrant-client langchain pypdf
!pip install sentence-transformers
!pip install -U "transformers==4.38.0" --upgrade
!pip install huggingface_hub





In [2]:
import os
import getpass
from operator import itemgetter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.vectorstores import Qdrant
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import format_document
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

import subprocess
import sys

## Download PDF documentation and split pages to create docs



In [45]:
import requests

def download_file(url, filename):
    # Send a GET request to the URL
    response = requests.get(url)
    # Ensure the request was successful
    response.raise_for_status()

    # Open a local file in binary write mode
    with open(filename, 'wb') as f:
        f.write(response.content)
    print(f"Downloaded '{filename}' successfully.")

# URL of the file you want to download
url = 'https://assets.ubuntu.com/v1/544d9904-ubuntu-server-guide-2024-01-22.pdf'
# Filename you want to save as
filename = 'ubuntu-server-guide-2024-01-22.pdf'

# Call the function with the URL and filename
download_file(url, filename)

Downloaded 'ubuntu-server-guide-2024-01-22.pdf' successfully.


In [48]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("ubuntu-server-guide-2024-01-22.pdf")
docs = loader.load_and_split()



## Initialize a Embedding model: You can choose many

In [49]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# initialise embeddings used to convert text to vectors
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)


## Create a vector base for all the pages

In [50]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(docs)

# create a qdrant collection - a vector based index of all resumes
qdrant_collection = Qdrant.from_documents(
    all_splits,
    embeddings,
    location=":memory:", # Local mode with in-memory storage only
    collection_name="resumes",
)

# construct a retriever on top of the vector store
qdrant_retriever = qdrant_collection.as_retriever()

## Make sure you are logged on hugging face to use LLM models

In [52]:

# Set the token directly in the notebook
os.environ["HF_TOKEN"] = "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"  # Replace 'your_token_here' with your actual token

# Use the token from the environment for operations that require authentication
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Next, we will try out the Gemma 2b model, before we bring the RAG pipeline together. To do that, we have to first ensure we are using upgraded transformer library.

In [53]:
from transformers import AutoTokenizer, pipeline
import torch

hf_access_token = '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'
model = "google/gemma-2b-it"

# Code below is to first test out the model

tokenizer = AutoTokenizer.from_pretrained(model, token=hf_access_token)
pipeline = pipeline(
    "text-generation",
    model=model,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
    max_new_tokens=512
)

messages = [
    {"role": "user", "content": "Where is Milan?"},
]
prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipeline(
	prompt,
	max_new_tokens=256,
	add_special_tokens=True,
	do_sample=True,
	temperature=0.7,
	top_k=50,
	top_p=0.95
)
print(outputs[0]["generated_text"][len(prompt):])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Milan is a city in the northern part of Italy. It is the capital of the Lombardy region in the north-central part of the country.


## Let's try it out

In [56]:
gemma_llm = HuggingFacePipeline(
    pipeline=pipeline,
    model_kwargs={"temperature": 0.7},
)

qa = RetrievalQA.from_chain_type(
    llm=gemma_llm,
    chain_type="stuff",
    retriever=qdrant_retriever
)

query = "How to list all the files in a folder. give me a bash command"
qa.invoke(query)

{'query': 'How to list all the files in a folder. give me a bash command',
 'result': 'Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nDisplay a formatted list of the directory’s contents, if no DirectoryIndex (such as index.html ) exists in the\nrequested directory.\nCaution\nFor security reasons, this should usually not be set, and certainly should not be set on your Documen-\ntRoot directory. Enable this option carefully on a per-directory basis onlyif you are certain you want\nusers to see the entire contents of the directory.\n• Multiview\n\necho "Backing up $backup_files to $dest/$archive_file"\ndate\necho\n# Backup the files using tar.\ntar czf $dest/$archive_file $backup_files\n# Print end status message.\necho\necho "Backup finished"\ndate\n# Long listing of files in $dest to check file sizes.\nls -lh $dest\n•$backup_files : A variable listing which director

## Let's demo it!

In [36]:
import gradio as gr


In [37]:
from shutil import copyfile

with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.ClearButton([msg, chatbot])

	def respond(message, chat_history):
  		bot_message = qa.invoke(message)
  		chat_history.append((message, bot_message))
  		return "", chat_history

	msg.submit(respond, [msg, chatbot], [msg, chatbot])
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://b8f0414a17126a09fb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


