## Hello, Here's How to use RAG w HF Models

Install some dependencies

In [1]:
!pip install -q -U accelerate==0.27.1
!pip install -q -U datasets==2.17.0
!pip install -q -U transformers==4.38.1
!pip install langchain sentence-transformers chromadb langchainhub

!pip install langchain-community langchain-core

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m126.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m68.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m211.5/211.5 MB[0m [31m75.1 MB/s[0m eta [36m0:00:01[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m6.2 MB/s

Get the Model You Want

In [None]:
#!pip install --upgrade --upgrade-strategy eager "optimum[neural-compressor]"


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import chromadb

import time
import uuid
#from fuzzywuzzy import fuzz
#from optimum.quantization import QuantizeruenceClassification


Define Variables

In [4]:
import os
import torch

# set your own hf token then fetch it here
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token)
tokenizer.pad_token = tokenizer.eos_token

model.to(dtype=torch.float16)
model.generation_config.cache_implementation = "static"
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)


# model.half() -- gpu
#model.to(dtype=torch.float16) # cpu

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Define Data Sources

In [5]:
import pandas as pd

file_names = [
    "study_permit_general",
    "work_permit_student_general",
    "work-study-data-llm",
    "vancouver_transit_qa_pairs",
    "permanent_residence_student_general",
    "data-with-sources"
]

all_texts = []

for file in file_names:
    path = f'./sample_data/{file}.csv'
    try:
        df = pd.read_csv(path)
        df.columns = df.columns.str.lower()

        if 'question' in df.columns and 'answer' in df.columns:
            df = df.drop_duplicates(subset=['question'])
            df['text'] = df['question'].fillna('') + ' ' + df['answer'].fillna('')
        else:
            print(f"no text columns in {file}")
            continue
        all_texts.extend(df['text'].tolist())
    except Exception as e:
        print(f"Error loading {file}: {e}")


Set Embedding Model, and Chroma Client to Interact w Vector Database and Create Collections

In [6]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import chromadb
import uuid
import pandas as pd
#from fuzzywuzzy import fuzz

# pt model for generating embeddings used pretty often
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# persistent client to interact w chroma vector store
client = chromadb.PersistentClient(path="./chroma_db")

# create collections for each data (for testing rn)
collection = client.get_or_create_collection(name="combined_docs")

# seems like better results if we remove duplicates and very similar data
data = pd.DataFrame({"text": all_texts})
data = data.drop_duplicates()
all_texts = data["text"].tolist()

print(f"successfully added {len(all_texts)} documents to Chroma DB.")

  embedding_model = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

successfully added 1031 documents to Chroma DB.


Function to add data to collection by embedding them

In [7]:
def add_data_to_collection_batch(collection, texts, batch_size=3):
    for idx in range(0, len(texts), batch_size):
        try:
            batch_texts = texts[idx: idx + batch_size]

            embeddings = embedding_model.embed_documents(batch_texts)

            batch_ids = [str(uuid.uuid4()) for _ in batch_texts]

            collection.add(
                ids=batch_ids,
                embeddings=embeddings,
                documents=batch_texts
            )
            print(f"successfully added {len(batch_texts)} documents (Batch {idx}-{idx + batch_size - 1})")
        except Exception as e:
            print(f"Error processing batch starting at index {idx}: {e}")

In [8]:
add_data_to_collection_batch(collection, all_texts)
print(f"successfully added {len(all_texts)} documents to the Chroma collection.")

successfully added 3 documents (Batch 0-2)
successfully added 3 documents (Batch 3-5)
successfully added 3 documents (Batch 6-8)
successfully added 3 documents (Batch 9-11)
successfully added 3 documents (Batch 12-14)
successfully added 3 documents (Batch 15-17)
successfully added 3 documents (Batch 18-20)
successfully added 3 documents (Batch 21-23)
successfully added 3 documents (Batch 24-26)
successfully added 3 documents (Batch 27-29)
successfully added 3 documents (Batch 30-32)
successfully added 3 documents (Batch 33-35)
successfully added 3 documents (Batch 36-38)
successfully added 3 documents (Batch 39-41)
successfully added 3 documents (Batch 42-44)
successfully added 3 documents (Batch 45-47)
successfully added 3 documents (Batch 48-50)
successfully added 3 documents (Batch 51-53)
successfully added 3 documents (Batch 54-56)
successfully added 3 documents (Batch 57-59)
successfully added 3 documents (Batch 60-62)
successfully added 3 documents (Batch 63-65)
successfully adde

Function to now match for releveant document

In [9]:
def get_relevant_documents(query, n_results=3):
    try:
        query_embeddings = embedding_model.embed_documents([query])[0]

        results = collection.query(query_embeddings=[query_embeddings], n_results=n_results)
        print(f"Query Results: {results}")

        return results['documents'][0] if results['documents'] else []
    except Exception as e:
        print(f"Error querying: {e}")
        return []

Generate Answer

In [10]:
def generate_answer(query):
    inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True, max_length=512)
    base_output = model.generate(inputs["input_ids"], temperature=0.1, max_new_tokens=200, eos_token_id=model.config.eos_token_id)
    response_before_rag = tokenizer.decode(base_output[0], skip_special_tokens=True)

    relevant_documents = get_relevant_documents(query)
    if not relevant_documents:
        return {
            "Before RAG Response": response_before_rag,
            "After RAG Response": "Sorry, no relevant documents found."
        }

    relevant_texts = "\n\n".join([doc for doc in relevant_documents])
    rag_prompt = f"""
    You are a helpful assistant for international students. Here are relevant documents:

    {relevant_texts}

    Please respond to the following question based on the documents above. Be conversational but concise:

    Question: {query}

    Answer:
    """

    rag_inputs = tokenizer(rag_prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
    rag_output = model.generate(rag_inputs["input_ids"], temperature=0.1, max_new_tokens=300, eos_token_id=model.config.eos_token_id)
    response_after_rag = tokenizer.decode(rag_output[0], skip_special_tokens=True)

    return {
        "Before RAG Response": response_before_rag,
        "After RAG Response": response_after_rag
    }


Example Usage

In [11]:
test_queries = [
    "How do I apply for a study permit in Canada?",
    "Can I work while studying on a student visa?",
    "What happens if my study permit expires before I finish my program?",
    "Do I need a new study permit if I change schools?",
    "How long does it take to process a Canadian study permit?",
    "Am I allowed to work off-campus as an international student?",
    "How many hours can I work while studying in Canada?",
    "What documents do I need to apply for a co-op work permit?",
    "Can I work in Canada after I graduate?",
    "What is a Post-Graduation Work Permit (PGWP) and how do I apply?",
    "How do I apply for MSP (Medical Services Plan) in British Columbia?",
    "Is MSP mandatory for international students?",
    "What healthcare services are covered under MSP?",
    "What should I do if I get sick and don’t have insurance yet?",
    "Can I use private health insurance instead of MSP?",
    "What are my options for student housing in Vancouver?",
    "How much does rent typically cost for international students?",
    "What should I check before signing a lease in Canada?",
    "Are there any student discounts for accommodation?",
    "How can I find a roommate in Canada?",
    "How do I open a bank account as an international student?",
    "What documents do I need to get a student bank account?",
    "Can I get a credit card as an international student?",
    "How do I send money to my home country from Canada?",
    "What scholarships are available for international students?",
    "How does the Compass Card work for transit in Vancouver?",
    "Am I eligible for a U-Pass as an international student?",
    "What is the best way to get around Vancouver on a budget?",
    "Where can I find the bus and SkyTrain schedules?",
    "Are there student discounts for public transportation?",
    "Can I apply for permanent residence after graduating?",
    "What is the Canadian Experience Class (CEC) immigration program?",
    "How can I improve my chances of getting permanent residence?",
    "What are the eligibility requirements for Express Entry?",
    "Does having a Canadian degree help with PR applications?"
]

for idx, user_query in enumerate(test_queries, start=1):
    responses = generate_answer(user_query)

    print("\n" + "="*50)
    print(f"Test Query {idx}: {user_query}")
    print("="*50)
    print("Response Before RAG:\n", responses["Before RAG Response"])
    print("\nResponse After RAG:\n", responses["After RAG Response"])
    print("="*50 + "\n\n")

#add_all_texts_to_collection()

# takes 1 min 15 seconds ish to get relevant documents
# dropped from 2 min to 1min 10 sec ish


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Query Results: {'ids': [['18a103b4-593d-4ad1-93db-9c9249abc800', 'cc8455ae-1572-4740-94c0-e522edcd67c8', '742dad22-ca94-41c6-b87b-d4e00f42077b']], 'embeddings': None, 'documents': [['How do I pay for a study permit?("https://ircc.canada.ca/english/helpcentre/answer.asp?qnum=481&top=15") You must pay a processing fee with your application. For details, visit the\xa0Pay your fees page("https://ircc.canada.ca/english/information/fees/index.asp")\xa0to learn about the methods of payment accepted by Canadian visa offices.', 'Do I need a permit to study in Canada?("https://ircc.canada.ca/english/helpcentre/answer.asp?qnum=478&top=15") Most\xa0foreign nationals\xa0need a study permit to study in Canada. Some people in specific situations may not. Learn more about\xa0who needs a study permit("https://www.canada.ca/en/immigration-refugees-citizenship/services/study-canada/study-permit/eligibility.html").\nYou should apply for a study permit\xa0before\xa0coming to Canada. Only some people can ap

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Test Query 1: How do I apply for a study permit in Canada?
Response Before RAG:
 How do I apply for a study permit in Canada? The application process typically involves several steps, which are outlined below.

## Step 1: Determine Your Eligibility
To apply for a study permit in Canada, you must meet certain eligibility criteria. These include being a permanent resident of Canada, having a valid passport, and having sufficient funds to support yourself during your stay in Canada.

## Step 2: Choose the Correct Study Permit Type
There are several types of study permits available, including a study permit for a specific program, a study permit for a specific country, and a study permit for a specific field of study. You must choose the correct type of permit based on your program and country of origin.

## Step 3: Gather Required Documents
To apply for a study permit, you will need to gather several documents, including:
- A valid passport
- Proof of language proficiency (e.g. IELTS or 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Query Results: {'ids': [['77ba47db-b0c0-4d31-a2b6-04047e32236b', '5da9fe5c-ca1c-487b-be2f-314dc7153225', 'f6e2134e-b0f8-4d85-a160-be1f70dc26d4']], 'embeddings': None, 'documents': [['Can students work on-campus before their studies begin? No, international students cannot work before their studies begin.', 'Can students work remotely for an employer outside Canada while studying?  "Yes  students can work remotely for an employer outside Canada as long as they meet the conditions of their study permit. This work does not count toward the 24-hour per week limit."', 'Can students work off-campus in addition to on-campus? Yes, international students can work both on-campus and off-campus as long as they meet the eligibility requirements.']], 'uris': None, 'data': None, 'metadatas': [[None, None, None]], 'distances': [[0.6896981000900269, 0.7329870462417603, 0.7381037473678589]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas:

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Test Query 2: Can I work while studying on a student visa?
Response Before RAG:
 Can I work while studying on a student visa? Working while studying on a student visa can be a bit tricky, but it's not impossible. Here are some things to consider:

**Eligibility:** To work while studying on a student visa, you must meet the following conditions:

1. You must be a student in a degree program that is approved by the relevant authorities (e.g., the UK's Immigration, Home Office).
2. You must have a valid student visa.
3. You must not have overstayed your student visa or been removed from the UK.

**Types of work:** You can work in various types of jobs, including:

1. Part-time work (less than 20 hours per week)
2. Full-time work (more than 20 hours per week)
3. Freelance work (e.g., writing, designing, consulting)
4. Internships (some internships may require you to work part-time)

**Rules and regulations:**

1. **Maximum working hours:** You can work up to 20

Response After RAG:
 
    

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Query Results: {'ids': [['637539a8-270f-436f-912b-9fb813777974', '013a4284-d3ee-489e-8628-3f2c9dd64144', 'a7d318a8-7c21-44c5-9672-2e017fac2838']], 'embeddings': None, 'documents': [['What happens if I finish my studies earlier than expected? Your study permit will expire on either the date marked on the permit or 90 days after the day you complete your studies, whichever comes first.', 'What happens if I finish my studies later than expected? You must apply to extend your study permit at least 30 days before the original expiry date.', 'What happens when my study permit expires? You lose your student status in Canada if your study permit expired before you applied for a new one, or if you changed the conditions of your study permit or didn’t respect its conditions.']], 'uris': None, 'data': None, 'metadatas': [[None, None, None]], 'distances': [[0.31575074791908264, 0.3863426446914673, 0.45058271288871765]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'do

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Test Query 3: What happens if my study permit expires before I finish my program?
Response Before RAG:
 What happens if my study permit expires before I finish my program? Can I still enter Canada?
If your study permit expires before you finish your program, you may not be able to enter Canada. However, there are some options you can consider:

1. **Apply for a new study permit**: You can apply for a new study permit, which will allow you to enter Canada and continue your studies. You'll need to provide new documentation, such as a new passport, proof of language proficiency, and proof of financial support.
2. **Apply for a temporary resident visa**: If you're unable to enter Canada, you can apply for a temporary resident visa, which allows you to stay in Canada for a specific period (usually 6 months). You'll need to provide proof of your studies, language proficiency, and financial support.
3. **Apply for a work permit**: If you're unable to enter Canada, you can apply for a work pe

KeyboardInterrupt: 