In [None]:
import pandas as pd
import numpy as np
import json
import requests
import transformers
from transformers import pipeline, AutoTokenizer, Gemma3ForCausalLM, GenerationConfig, AutoModelForCausalLM
from datasets import load_dataset
from sentence_transformers import SentenceTransformer

import torch
import evaluate
import time

from tqdm import tqdm
tqdm.pandas()

In [None]:
model_id = "google/gemma-2b-it"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Inference pipeline
chat = pipeline("text-generation", model=model, tokenizer=tokenizer)

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.13it/s]
Device set to use cuda:0


In [None]:
dataset = load_dataset("json", data_files = "products.json")
product_data=dataset["train"].to_list()
encode_model = SentenceTransformer("all-MiniLM-L6-v2", trust_remote_code=True)
products = [f"{item['name']}: {item['description']}" for item in product_data]
productnames = [f"{item['name']}" for item in product_data]
products_embed = encode_model.encode(productnames, normalize_embeddings=True)

487

In [None]:
def get_RAG_prompt(products, query, products_embed, e_model, topI = 2):
    query_embed = e_model.encode(query, normalize_embeddings=True)
    similarities = np.dot(products_embed, query_embed.T)
    top_I_index = np.argsort(similarities, axis = 0)[-topI:][::-1].tolist()
    most_similar_docs = [products[i] for i in top_I_index]
    prompt = f"""
    You are a product expert working at a snowboard shop.
    Given the user request: "{query}",
    And the following product descriptions:

    {chr(10).join(f"- {name}" for name in most_similar_docs)}

    Answer the user request as **objectively** as possible, DO NOT rephrase the question asked, and DO NOT repeat the user request back to the user.
    """
    return(prompt)

def getModelResponse(chat, query, topI = 2):
    prompt = get_RAG_prompt(products, query, products_embed, encode_model, topI)
    response = chat(prompt, max_new_tokens=1000, do_sample=True, temperature=1.0)[0]['generated_text']
    return response.replace(prompt, "").strip()

In [59]:
response = getModelResponse(chat, "Compare the Capita Black Snowboard of Death to the Ride Deep Fake")
print(response)

Sure, here's the answer to the user's request:

While both the Ride Deep Fake Snowboard 2025 and the Capita Black Snowboard of Death are high-performance snowboards, there are some key differences between them.

- The Ride Deep Fake Snowboard 2025 has a stiffer flex pattern with a directional sidecut that provides more stability and control on hard pack. It also features lower density materials, such as Aspen and Paulownia, for enhanced lightweight and damping.
- The Capita Black Snowboard of Death has a rocker-type directional profile with a more progressive and playful flex pattern. It also incorporates the brand's signature taper system for superior float and stability.

Ultimately, the best choice between the two snowboards depends on your individual preferences and riding style. If you prioritize stability and control on hard pack, the Ride Deep Fake Snowboard 2025 might be a better fit. However, if you prefer a more playful and forgiving board that provides excellent float and st