In [2]:
%pip install weaviate-client


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import weaviate
import weaviate.classes.config as wc
import os
from dotenv import load_dotenv
from weaviate.classes.config import Property, DataType, ReferenceProperty, Configure, VectorDistances

In [4]:
load_dotenv()

True

In [5]:
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.getenv("WEAVIATE_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WEAVIATE_API_KEY")),
)
print(client.is_ready())

True


# Create Candidate collection

In [None]:
# --- Create the collection ---
client.collections.create(
    name="Candidate",
    properties=[
        wc.Property(name="candidate_id", data_type=wc.DataType.TEXT, index_filterable=True, index_searchable=True),
        wc.Property(name="name", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="email", data_type=wc.DataType.TEXT, index_searchable=True, index_filterable=True),
        wc.Property(name="age", data_type=wc.DataType.INT, index_range_filters=True),
        wc.Property(name="skills", data_type=wc.DataType.TEXT_ARRAY,
                    index_searchable=True, index_filterable=True),
        wc.Property(name="social_links", data_type=wc.DataType.TEXT_ARRAY, index_searchable=True),
        wc.Property(name="years_of_experience", data_type=wc.DataType.NUMBER, index_range_filters=True),
        wc.Property(name="highest_education", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="current_role", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="function", data_type=wc.DataType.TEXT),
        wc.Property(name="resume_summary", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="job_id", data_type=wc.DataType.TEXT, index_filterable=True),

        # --- Nested list of Education objects ---
        wc.Property(
            name="education",
            data_type=wc.DataType.OBJECT_ARRAY,
            nested_properties=[
                wc.Property(name="institution", data_type=wc.DataType.TEXT, index_searchable=True),
                wc.Property(name="qualification", data_type=wc.DataType.TEXT, index_searchable=True),
                wc.Property(name="graduation_date", data_type=wc.DataType.DATE),
                wc.Property(name="details", data_type=wc.DataType.TEXT_ARRAY, index_searchable=True),
            ],
        ),

        # --- Nested list of Experience objects ---
        wc.Property(
            name="experience",
            data_type=wc.DataType.OBJECT_ARRAY,
            nested_properties=[
                wc.Property(name="company", data_type=wc.DataType.TEXT),
                wc.Property(name="location", data_type=wc.DataType.TEXT),
                wc.Property(name="role", data_type=wc.DataType.TEXT, index_searchable=True),
                wc.Property(name="start_date", data_type=wc.DataType.DATE),
                wc.Property(name="end_date", data_type=wc.DataType.DATE),
                wc.Property(name="responsibilities", data_type=wc.DataType.TEXT_ARRAY, index_searchable=True),
            ],
        ),

        # --- Nested list of Project objects ---
        wc.Property(
            name="projects",
            data_type=wc.DataType.OBJECT_ARRAY,
            nested_properties=[
                wc.Property(name="name", data_type=wc.DataType.TEXT, index_searchable=True),
                wc.Property(name="description", data_type=wc.DataType.TEXT, index_searchable=True),
            ],
        ),
    ],

    # We'll provide our own (single) vector per object (the resume summary embedding)
    vector_config=wc.Configure.Vectors.self_provided(
        vector_index_config=Configure.VectorIndex.hnsw(
            distance_metric=VectorDistances.COSINE
        )
    )
)

<weaviate.collections.collection.sync.Collection at 0x701c59a84b90>

# Create Job collection

In [9]:
# --- Create the collection ---
client.collections.create(
    name="Job",
    properties=[
        wc.Property(name="name", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="job_id", data_type=wc.DataType.TEXT, index_filterable=True, index_searchable=True),
        wc.Property(name="job_description", data_type=wc.DataType.TEXT, index_searchable=True),
        wc.Property(name="job_creation_date", data_type=wc.DataType.DATE),
    ],

    # We'll provide our own (single) vector per object (the resume summary embedding)
    vector_config=wc.Configure.Vectors.self_provided(
        vector_index_config=Configure.VectorIndex.hnsw(
            distance_metric=VectorDistances.COSINE
        )
    )
)

<weaviate.collections.collection.sync.Collection at 0x701c59c4fbf0>

In [7]:
# To delete collection
client.collections.delete('Job')