## Setup: Cloning Repository and Installing Required Packages

In [1]:
import os
from google.colab import userdata
github_token = userdata.get('GITHUB_TOKEN')
!git clone https://alihuss1017:{github_token}@github.com/alihuss1017/hgss-llm.git

Cloning into 'hgss-llm'...
remote: Enumerating objects: 202, done.[K
remote: Counting objects: 100% (158/158), done.[K
remote: Compressing objects: 100% (109/109), done.[K
remote: Total 202 (delta 52), reused 146 (delta 46), pack-reused 44 (from 1)[K
Receiving objects: 100% (202/202), 6.24 MiB | 22.82 MiB/s, done.
Resolving deltas: 100% (61/61), done.


In [2]:
cd hgss-llm

/content/hgss-llm


In [3]:
!git checkout -b RAG-pipeline origin/RAG-pipeline

Branch 'RAG-pipeline' set up to track remote branch 'RAG-pipeline' from 'origin'.
Switched to a new branch 'RAG-pipeline'


In [None]:
!pip install -r requirements.txt

## Loading Precomputed Vector Store and Chunks

In [5]:
import faiss
import pickle

index = faiss.read_index("data/RAG/faiss_index.index")

with open("data/RAG/metadata.pkl", "rb") as f:
    text_chunks, metadata_chunks = pickle.load(f)

## Loading Embedder

In [None]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer("all-MiniLM-L6-v2")

## Defining Retriever

In [7]:
import numpy as np

def retrieve_relevant_chunks(query, top_k=3):
    query_embedding = embedder.encode([query])
    _, indices = index.search(np.array(query_embedding), top_k)
    return [(text_chunks[i], metadata_chunks[i]) for i in indices[0]]

## Loading Language Model

In [None]:
from transformers import pipeline
import torch

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="cuda")


## Defining Prompt Template

In [9]:
def generate_answer(query, top_k=3, max_new_tokens=150):
    retrieved_chunks = retrieve_relevant_chunks(query, top_k=top_k)
    context = "\n\n".join([chunk for chunk, _ in retrieved_chunks])

    messages = [
        {
            "role": "system",
            "content": "You are an expert on Pokémon HeartGold and SoulSilver. Use only the provided context to answer accurately.",
        },
        {
            "role": "user",
            "content": f"""Answer the following question using the context below:

Context:
{context}

Question: {query}"""
        },
    ]


    prompt = pipe.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # 4. Generate
    output = pipe(prompt, max_new_tokens=max_new_tokens)
    return output[0]["generated_text"][len(prompt):].strip()


## Generating Answers using Evaluation Q&A Dataset

In [21]:
import jsonlines

with jsonlines.open("data/eval/hgss-QA-base.jsonl") as f:
    eval_data = list(f)


for item in eval_data:
    item["generated_answer"] = generate_answer(item["question"])

with jsonlines.open("data/eval/hgss-QA-complete.jsonl", mode = "w") as f:
    f.write_all(eval_data)


## Computing Cosine Similarity on Evaluation Q&A Dataset

In [22]:
import json
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

cosine_scores = []
hf_token = userdata.get('HF_TOKEN')
model = SentenceTransformer('all-MiniLM-L6-v2', use_auth_token=hf_token)

with open('data/eval/hgss-QA-complete.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        gold = data['gold_answer']
        generated = data['generated_answer']

        embeddings = model.encode([gold, generated])
        score = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

        cosine_scores.append(score)


print(f'Average cosine similarity: {sum(cosine_scores) / len(cosine_scores)}')



Average cosine similarity: 0.47737744450569153


## Sample Query

In [None]:
query = "I'm about to challenge Falkner in Violet City, what is his team?"
print("💬", generate_answer(query))

💬 To answer the question, Falkner's team in Violet City is [{'pokemon': 'Pidgey', 'level': 9}, {'pokemon': 'Pidgeotto', 'level': 13}].
