We're using version `4.7.1` of the weaviate-client

## Import Libraries

In [1]:
import weaviate
import weaviate.classes.config as wvcc
import weaviate.classes as wvc
from weaviate.classes.config import Property, DataType, ReferenceProperty
from weaviate.util import generate_uuid5
from weaviate.classes.init import Auth
from weaviate.classes.query import Filter


import os
import base64
from dotenv import load_dotenv, find_dotenv
import json
from PIL import Image
import matplotlib.pyplot as plt


## Connect to WCD

In [2]:
OPENAI_KEY = os.getenv('OPENAI_KEY')
headers = {"X-OpenAI-Api-Key": OPENAI_KEY}

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.getenv('WEAVIATE_URL'),
    auth_credentials=Auth.api_key(os.getenv('WEAVIATE_AUTH')),
    headers=headers
)

client.is_ready()

WeaviateInvalidInputError: Invalid input provided: Argument 'cluster_url' must be one of: [<class 'str'>], but got <class 'NoneType'>.

## Define Schema

In [None]:
client.collections.delete("Products")

In [None]:
dataset = client.collections.create(
    name='Products',
    vectorizer_config=wvcc.Configure.Vectorizer.text2vec_openai(model='ada'),
    generative_config=wvcc.Configure.Generative.openai(model='gpt-4-1106-preview'),

    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="description", data_type=DataType.TEXT),
        Property(name="file_path", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="price", data_type=DataType.NUMBER, skip_vectorization=True),
        Property(name="category", data_type=DataType.TEXT)
    ]
) 

## Import Data

In [None]:
data_path = 'data.json'

with open(data_path, 'r') as json_file:
    data = json.load(json_file)

print(data[0])

In [None]:
products = client.collections.get("Products")

with products.batch.dynamic() as batch:
    for item in data:

        properties = {
            "title": item['title'],
            "description": item['description'],
            "file_path": item['file_path'],
            "price": item['price'],
            "category": item['category']
        }

        batch.add_object(
            properties=properties
        )

In [None]:
# count the number of objects

products = client.collections.get("Products")

products.aggregate.over_all(total_count=True)

## Query Time

### Vector Search

In [None]:
products = client.collections.get("Products")

response = products.query.near_text(
    query="Boots for the snow",
    return_properties=['title', 'description','file_path'],
    limit=4
)

for item in response.objects:
    print(item.properties['title'])

    image = Image.open(item.properties['file_path'])
    plt.imshow(image)
    plt.axis('off')
    plt.show()

### Hybrid Search

In [None]:
products = client.collections.get("Products")

response = products.query.hybrid(
    query="Boots for the snow",
    alpha=0.5,
    return_properties=['title', 'description','file_path'],
    limit=4
)

for item in response.objects:
    print(item.properties['title'])

    image = Image.open(item.properties['file_path'])
    plt.imshow(image)
    plt.axis('off')
    plt.show()

### Autocut

In [None]:
products = client.collections.get("Products")

response = products.query.near_text(
    query="Lawn power equipment",
    return_properties=['title', 'description','file_path'],
    auto_limit= 1
)

for item in response.objects:
    print(item.properties['title'])

    image = Image.open(item.properties['file_path'])
    plt.imshow(image)
    plt.axis('off')
    plt.show()

### Filtered Search

In [None]:
products = client.collections.get("Products")

response = products.query.near_text(
    query="lifestyle sneakers",
    return_properties=['title', 'description','file_path'],
    filters=Filter.by_property("category").equal("sneakers"),
    limit=3
)

for item in response.objects:
    print(item.properties['title'])

    image = Image.open(item.properties['file_path'])
    plt.imshow(image)
    plt.axis('off')
    plt.show()

## Generative Search

In [None]:
prompt = "Given this: {description}, how would you sell it to people?"

products = client.collections.get("Products")
response = products.generate.hybrid(
    query="lawn power equipment",
    alpha=0.5,
    limit=2,
    single_prompt=prompt
)

for item in response.objects:
    print(json.dumps(item.properties['title'], indent=2))
    print(item.generated)

    image = Image.open(item.properties['file_path'])
    plt.imshow(image)
    plt.axis('off')
    plt.show()
    
    print('====')