# Section 2 : Load Data To Weaviate Database
- In this section you'll see:
    - Set up Weaviate, including python client, weaviate instance, connection to WCS;
    - Populate the database, including create collection, import data

In [1]:
# Import libraries
import weaviate
import os
import weaviate.classes.config as wc
from weaviate.util import generate_uuid5

import pandas as pd
import requests
from tqdm import tqdm

In [2]:
# Create credentials
os.environ['WCS_URL'] = "enter_credentials"
os.environ['WCS_API_KEY'] = "enter_credentials"
os.environ['OPENAI_APIKEY'] = "enter_credentials"

### Create the 'Tracks' collection

In [3]:
# Instantiate Weaviate client
headers = {
    "X-OpenAI-Api-Key": os.environ['OPENAI_APIKEY']}  # Replace with your OpenAI API key

client = weaviate.connect_to_wcs(
    cluster_url=os.environ['WCS_URL'],  # Replace with your WCS URL
    auth_credentials=weaviate.auth.AuthApiKey(
        os.environ['WCS_API_KEY']
    ),  # Replace with your WCS key
    headers=headers,
)

try:
    client.collections.delete("Tracks")  # Replace with your class name

    client.collections.create(
    name="Tracks",
    properties=[
        wc.Property(name="track_uri", data_type=wc.DataType.TEXT),
        wc.Property(name="track_href", data_type=wc.DataType.TEXT),
        wc.Property(name="analysis_url", data_type=wc.DataType.TEXT),
        
    ],
    # Define the vectorizer module (none, as we will add our own vectors)
    vectorizer_config=wc.Configure.Vectorizer.none(),
    # Define the generative module
    generative_config=wc.Configure.Generative.openai()
    )
finally:  # This will always be executed, even if an exception is raised
    client.close()  # Close the connection & release resources

### Import data (ps.think twice before running this cell because it'll take a long time)

In [5]:
# Instantiate your client (not shown). e.g.:
headers = {
    "X-OpenAI-Api-Key": os.environ['OPENAI_APIKEY']
}  # Replace with your OpenAI API key

client = weaviate.connect_to_wcs(
    cluster_url=os.environ['WCS_URL'],  # Replace with your WCS URL
    auth_credentials=weaviate.auth.AuthApiKey(
        os.environ['WCS_API_KEY']
    ),  # Replace with your WCS key
    headers=headers,
)

try:
    data_url = "vec_tracks.csv"
    # data_resp = requests.get(data_url)
    df = pd.read_csv(data_url)
    
    # Load the embeddings (embeddings from the previous step)
    embs_path = "scaled_vec_tracks.csv"
    # Or load embeddings from a local file (if you generated them earlier)    
    emb_df = pd.read_csv(embs_path)
    
    # Get the collection
    tracks = client.collections.get("Tracks")
    
    # Enter context manager
    with tracks.batch.dynamic() as batch:
        # Loop through the data
        for i, track in enumerate(df.itertuples(index=False)):
   
            # Build the object payload
            track_obj = {
                "track_uri": track.track_uri,
                "track_href": track.track_href,
                "analysis_url": track.analysis_url,
            }
    
            # Get the vector
            vector = emb_df.iloc[i,1:].to_list()
    
            # Add object (including vector) to batch queue
            batch.add_object(
                properties=track_obj,
                uuid=generate_uuid5(track.id),
                vector=vector  # Add the custom vector
                # references=reference_obj  # You can add references here
            )
            # Batcher automatically sends batches
    
    # Check for failed objects
    if len(tracks.batch.failed_objects) > 0:
        print(f"Failed to import {len(tracks.batch.failed_objects)} objects")
finally:  
    client.close()



Failed to import 22690 objects
