### Import necessary libraries and packages

In [2]:
pip install -U transformers accelerate bitsandbytes -qq

Note: you may need to restart the kernel to use updated packages.


#### Change the hugging face home directory to save the model

In [4]:
import os
os.environ['HF_HOME'] = '/run/cache/'

#### Get file from LLM zoomcamp course repo

In [6]:
!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-04 13:42:27--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3.7K) [text/plain]
Saving to: ‘minsearch.py’


2024-07-04 13:42:27 (85.9 MB/s) - ‘minsearch.py’ saved [3832/3832]



#### Get json file from course repo and create TF-IDF

In [8]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7f2dac7c01f0>

#### search query based on cosine-similarity

In [9]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )
    return results

In [17]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
import torch

#### Get FLAN-T5 model from huggingface

In [12]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map="auto")

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

#### Sample generation

In [93]:
%%time
input_text = "translate English to german: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0],skip_special_tokens=True))

Wie alt sind Sie?
CPU times: user 213 ms, sys: 0 ns, total: 213 ms
Wall time: 212 ms


#### Build prompt

In [94]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt,temp):
    config={'max_length':512,
           'temperature':temp,
           'do_sample':True,
           'num_beams':1
           }
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids, **config)
    result = tokenizer.decode(outputs[0],skip_special_tokens=True)
    return result

#### Get results and create prompt from it and generate result using llm

In [96]:
def rag(query,temp=0.1):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt,temp)
    return answer

#### The search based on the json document data

#### Temperature 0.1

In [97]:
%%time
rag("I just discovered the course. Can I still enroll it?")

CPU times: user 1.72 s, sys: 172 ms, total: 1.89 s
Wall time: 1.89 s


"Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute."

#### Let's try using different temperature values and examine the differences in the results.

In [55]:
import numpy as np
from IPython.display import display, Markdown

In [98]:
%%time
for temp in np.round(np.linspace(0.2,1.0,9),1):
    display(Markdown(f"### Temperature: {temp}"))
    result=rag('I just discovered the course. Can I still enroll it?',temp)
    display(Markdown(f"Result: {result}"))
    print("-"*100)

### Temperature: 0.2

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.3

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.4

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.5

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.6

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.7

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.8

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 0.9

Result: Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

----------------------------------------------------------------------------------------------------


### Temperature: 1.0

Result: Yes, even if you don't register, you're still eligible to submit the homeworks.

----------------------------------------------------------------------------------------------------
CPU times: user 15.1 s, sys: 1.32 s, total: 16.4 s
Wall time: 16.4 s
