In [1]:
import weaviate
from weaviate.classes.config import Property, DataType, Configure
# import weaviate_schema
# from weaviate_schema.schemas2 import _amazonschema

In [2]:
client = weaviate.connect_to_local()
print(client.is_ready())

True


In [3]:
if client.collections.exists("Amazon"):
    client.collections.delete("Amazon")


client.collections.create(
    name="Amazon",
    properties=[
        Property(name="title", data_type=DataType.TEXT, vectorize_property_name=True), 
        Property(name="brand", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="description", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="top_review", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="features", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="categories", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="final_price", data_type=DataType.NUMBER, vectorize_property_name=True),
        Property(name="reviews_count", data_type=DataType.INT, vectorize_property_name=True),
        Property(name="rating", data_type=DataType.NUMBER, vectorize_property_name=True),
        Property(name="availability", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="image_url", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="answered_questions", data_type=DataType.INT, vectorize_property_name=True),
        Property(name="url", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="asin", data_type=DataType.TEXT, vectorize_property_name=True),
        Property(name="currency", data_type=DataType.TEXT, vectorize_property_name=True),
    ],
    vectorizer_config=[
        # Set a named vector
        Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-cohere" vectorizer
            name="brand_title_descriptions", source_properties=["brand","title","description"]       # Set the source property(ies)
        ),
        Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-openai" vectorizer
            name="features_topreview", source_properties=["features", "top_review"] # Set the source property(ies)
        ),
        Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-openai" vectorizer
            name= "categories", source_properties=["categories"] # Set the source property(ies)
        ),
        Configure.NamedVectors.text2vec_transformers(  # Use the "text2vec-openai" vectorizer
            name= "brand_title", source_properties=["brand", "title"] # Set the source property(ies)
        ),
    ],
)

<weaviate.collections.collection.Collection at 0x7d90467a82f0>

## Processing Data

In [4]:
import pandas as pd

df = pd.read_csv(input("Enter the path to the csv file: "))
# printing number of rows in df
print(len(df))

# remove all other coloumns except the ones we need
df = df[['title', 'brand', 'description', 'top_review', 'features', 'categories', 'final_price', 'reviews_count', 'rating', 'availability', 'image_url', 'answered_questions', 'url', 'asin']]
# replace all NaN values with empty string
df['image_url'].fillna('')
df['answered_questions'].fillna(0)


30


0     237.0
1       4.0
2       0.0
3      27.0
4       4.0
5       0.0
6       0.0
7       8.0
8       0.0
9       0.0
10      0.0
11      0.0
12      0.0
13      0.0
14      0.0
15      0.0
16      0.0
17      0.0
18      0.0
19      0.0
20      0.0
21      0.0
22      0.0
23      0.0
24      0.0
25      0.0
26      0.0
27      0.0
28      0.0
29      0.0
Name: answered_questions, dtype: float64

## Converting to Dictionary

In [5]:
rows = df.to_dict(orient='records')
print(type(rows[0]))

<class 'dict'>


## Inserting in Database

In [6]:
collection = client.collections.get(name="Amazon")

In [7]:
collection.data.insert_many(rows)

BatchObjectReturn(all_responses=[UUID('ab197443-f23c-447c-88e9-744b70f0d77e'), UUID('51fc41d6-d64d-4d5f-8d32-a156819cbaaa'), ErrorObject(message="invalid integer property 'answered_questions' on class 'Amazon': requires an integer, the given value is 'NaN'", object_=_BatchObject(collection='Amazon', vector=None, uuid='47ede726-20d8-45e1-b0a6-6b285fe3360d', properties={'title': 'iSi Thermo Whip Multifunctional Cream/Food Whipper for All Thermal Insulated Applications, 1 Pint, Polished Stainless/Red', 'brand': 'iSi North America', 'description': 'The Thermo Whip from ISI offers the ultimate tool to foam various foods. Designed to be used with both hot and cold ingredients with the added benefit of a thermally insulated bottle. This patented system will keep cold items cold up to 8-hour and hot items warm for up to 3-hour without the use of additional equipment such as refrigerators or bain-maries. 1-pint size.', 'top_review': 'I bought this based on my chef friends recommendation and was

## Vector Similarity search

In [8]:
from weaviate.classes.query import MetadataQuery

response = collection.query.near_text(
    query="Earphones",
    limit=10,
    target_vector="brand_title_descriptions",  # Specify the target vector for named vector collections
    return_metadata=MetadataQuery(distance=True)
)

print(response)



In [9]:
for o in response.objects:
    # print(o.properties["title"])
    print(o.properties["title"])
    print(o.metadata.distance)
    print("")

Geekria Shield Headphones Case Compatible with Bowers & Wilkins PI7, PI5 in-Ear True Wireless Headphones Case, Replacement Hard Shell Travel Carrying Bag with Cable Storage (Grey)
0.465856671333313

AirPods Case,with Keychain Easy Outdoors,HappyCover Thicken Shockproof 360° Protective Silicone Cover Skin with Integrated Dust Plug Compatible for Airpods Charging Case 2 & 1 (Milk Tea)
0.5634986162185669

Happypapa Airpods Case Designed for Apple AirPods 2 & 1 Full Protective Case Cover with Keychain Cute Girls Men Durable Shockproof Anti Lost Case for Airpods Charging Case (Ice Blue Marble)
0.713636040687561

Garmin Vivoactive 3 GPS Smartwatch Black (Black Stainless)
0.7606279253959656

Skechers Sport Men's Vigor 2.0 Sneaker
0.7904127836227417

IX INOXTO Hydration Pack Backpack, Lightweight Water Backpack with 2L Leakproof Hydration Bladder Daypack for Men Women,Running Hydration Vest for Outdoor Trail
0.7961657047271729

FH Group Flat Cloth Fabric Armrest Cover Semi-Universal Fit, One P