In [1]:
print("Hello World")

Hello World


In [2]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, PayloadSchemaType
import pandas as pd

In [3]:
df_items = pd.read_json("../data/meta_Electronics_1000.jsonl", lines=True)

df_items.head(3)

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,available_date
0,All Electronics,"Adjustable Metal Stand for E cho Show 15, Tilt...",4.5,111,[The VMEI adjustable metal bracket is made of ...,[],36.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Small footprint, minimalist design...",VMEI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'VMEI', 'Color': 'Black', 'Room Type...",B0B13T2GY5,,2022-05-11
1,Computers,"HP Laptop, 15.6"" HD Touchscreen, AMD Athlon Go...",4.4,205,[【High Speed RAM And Enormous Space】8GB high-b...,[PConline365 sells computers with professional...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],HP,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'HP', 'Color': 'Silver', 'Room Type'...",B08DC5KLJZ,,2022-03-24
2,Amazon Home,50 Pcs Groot Stickers for Water Bottle Waterpr...,4.7,156,[🏆Groot Stickers:Anime theme party favors for ...,[Groot Stickers Quantity: 50 Pcs/pack.Non-dupl...,5.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': '200-Piece Stickers Pack for Laptop...,Fcelery,"[Electronics, Computers & Accessories, Laptop ...","{'Brand': 'Fcelery', 'Color': 'Groot', 'Room T...",B0B4QQSRY3,,2022-06-22


In [4]:
def preprocess_data(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [5]:
def extract_first_large_image(row):
    return row["images"][0].get("large", '')

In [6]:
df_items["preprocessed_data"] = df_items.apply(preprocess_data, axis=1)
df_items["first_large_image"] = df_items.apply(extract_first_large_image, axis=1)


In [9]:
df_items.head(2)

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,available_date,preprocessed_data,first_large_image
0,All Electronics,"Adjustable Metal Stand for E cho Show 15, Tilt...",4.5,111,[The VMEI adjustable metal bracket is made of ...,[],36.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Small footprint, minimalist design...",VMEI,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'VMEI', 'Color': 'Black', 'Room Type...",B0B13T2GY5,,2022-05-11,"Adjustable Metal Stand for E cho Show 15, Tilt...",https://m.media-amazon.com/images/I/415fkdvFA9...
1,Computers,"HP Laptop, 15.6"" HD Touchscreen, AMD Athlon Go...",4.4,205,[【High Speed RAM And Enormous Space】8GB high-b...,[PConline365 sells computers with professional...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],HP,"[Electronics, Computers & Accessories, Compute...","{'Brand': 'HP', 'Color': 'Silver', 'Room Type'...",B08DC5KLJZ,,2022-03-24,"HP Laptop, 15.6"" HD Touchscreen, AMD Athlon Go...",https://m.media-amazon.com/images/I/41oJoaJ+Vw...


In [8]:
qdrant_client = QdrantClient(
    url="http://localhost:6333",
)

In [11]:
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-01-hybrid",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

In [16]:
qdrant_client.create_payload_index(
    collection_name="Amazon-items-collection-01-hybrid",
    field_name="text",
    field_schema=PayloadSchemaType.TEXT
)

UpdateResult(operation_id=5, status=<UpdateStatus.COMPLETED: 'completed'>)

In [10]:
df_sample = df_items.sample(n=50, random_state=25)

In [11]:
import openai

def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=[text],
        model=model,
    )
    return response.data[0].embedding

In [12]:
data_to_embed = df_sample[["preprocessed_data", "first_large_image", "rating_number", "price", "average_rating"]].to_dict(orient="records")


In [13]:
data_to_embed

  'first_large_image': 'https://m.media-amazon.com/images/I/31pIP8yGglS._AC_.jpg',
  'rating_number': 368,
  'price': 199.99,
  'average_rating': 4.3},
 {'preprocessed_data': 'Wireless Earbuds Bluetooth 5.3 Headphones Deep Bass in-Ear Earphones Premium Sound with Mic Wireless Charging Case for iPhone, Samsung, Android Smartphone Laptop Sports [Advanced Bluetooth Technology] Our earbuds adopt Bluetooth 5.3 technology, which greatly improves the transmission speed and provides a low-latency listening experience. With support for A2DP, AVRCP, HFP, and more, you can enjoy seamless connectivity and superb audio quality. [Enhanced Audio Experience] Immerse yourself in powerful, deep bass with our wireless earbuds. Designed to provide extra punch, warmth, and impact to keep you energized during your workout and deliver crystal-clear sound for a superior listening experience. [Long Battery Life] Enjoy up to 5 hours of playback time per earbud, and a total of over 22 hours (100% volume) when co

In [14]:
pointstructs = []
for i, data in enumerate(data_to_embed):
    embedding = get_embedding(data['preprocessed_data'])
    pointstructs.append(PointStruct(
        id=i,
        vector=embedding,
        payload={
            "text": data['preprocessed_data'],
            "first_large_image": data['first_large_image'],
            "average_rating": data['average_rating'],
            "rating_number": data['rating_number'],
            "price": data['price'],
        }
    ))


In [17]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-01-hybrid",
    points=pointstructs,
    wait=True
)

UpdateResult(operation_id=6, status=<UpdateStatus.COMPLETED: 'completed'>)

In [15]:
from qdrant_client.models import Prefetch, Filter, FieldCondition, MatchText, FusionQuery

def retrieve_data(query, k=5):
    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-01-hybrid",
        prefetch=[
            Prefetch(
                query=query_embedding,
                limit=20,
            ),
            Prefetch(
                filter=Filter(
                    must=[
                        FieldCondition(
                            key="text",
                            match=MatchText(
                                text=query,
                            )
                        )
                    ]
                ),
                limit=20
            )
        ],
        query=FusionQuery(fusion="rrf"),
        limit=k,
    )
    return results

In [16]:
retrieve_data("earphones", k=5).points

[ScoredPoint(id=11, version=6, score=0.8333334, payload={'text': 'IKG Wireless Earbuds, Bluetooth Headphones with Microphone, 30H Playtime with Transparent Charging Case, Deep Bass, IPX7 Waterproof, Earphones for Work Sport Game Ergonomic design and unique appearance: the weight of a single earbud is only 3.5g, round and smooth sound cavity design, 35 ° of the golden angle of entry into the ear, the curve of the body design of the fit to provide a good airtightness. Headphones wireless unique transparent cover and simple charging compartment design, easily adapt to different scenarios. Bluetooth 5.3 and automatic one-step connection: The in-ear headphones provide simultaneous binaural transmission through the use of Bluetooth 5.3 chip, increasing the connection speed by 80%. Dual-channel transmission technology for seamless switching between single/binaural modes. By opening the lid of the charging case, the Bluetooth headset can automatically connect to the previously paired device. S

In [2]:
import instructor
from pydantic import BaseModel
from openai import OpenAI

In [6]:

class RAGGenerationResponse(BaseModel):
    answer: str

client = instructor.from_openai(OpenAI())

# response -> pydantic response, raw_response -> raw response from openai

prompt = 'You are a helpful assistant. Return an answer to the question: "What is the capital of France?"'

response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4.1",
    response_model=RAGGenerationResponse,
    messages=[
        {
            "role": "user",
            "content": prompt
        }
    ],
    temperature=0.5,
)


In [15]:
def kwadraty(n):
    for i in range(n):
        yield i * i

gen = kwadraty(5)

print(next(gen))
print(next(gen))
print(next(gen))

0
1
4


In [16]:
def licz_do_trzech():
    yield 1
    yield 2
    yield 3

for liczba in licz_do_trzech():
    print(liczba)

1
2
3
