# Load talk data
Reads the csv file with all the talks and stores them into a Weaviate cluster.
 

In [None]:
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

talks = pd.read_csv('./data_source/all_talks_data.csv')

talks.head()

## Create Langchain Documents
Transform the talks from the csv file into an array of Documents that can be used by the chain. 

In [None]:
from langchain.schema import Document

talkdocs = []
for index, row in talks.iterrows():
    doc = Document(page_content=row["Title"],
                   metadata={
                       "talk_url": row["talk_url"], 
                       "speakers": row["speakers"],
                       "talk_times": row["talk_times"]
                   })
    talkdocs.append(doc)
    
print(talkdocs[0])

## Initialize vector database (Weaviate)
You can change this into another vector database if you can.

In [None]:
import weaviate
import os
from langchain.vectorstores import Weaviate


def __init_weaviate_client() -> weaviate.Client:
    weaviate_url = os.getenv('WEAVIATE_URL')

    auth_config = weaviate.auth.AuthApiKey(
        api_key=os.getenv('WEAVIATE_API_KEY'),
    )

    return weaviate.Client(
        url=weaviate_url,
        auth_client_secret=auth_config,
        additional_headers={
            "X-OpenAI-Api-Key": os.getenv('OPEN_AI_API_KEY')
        }
    )


create the client and verify the connection

In [None]:
wv_client = __init_weaviate_client()

if wv_client.schema.exists("DevoxxTalk"):
    print(wv_client.schema.delete_class("DevoxxTalk"))
else:
    print("Not available")

## Import the documents into the vector database

In [None]:
from langchain.embeddings import OpenAIEmbeddings

openai_embeddings = OpenAIEmbeddings(
    openai_api_key=os.getenv('OPEN_AI_API_KEY'),
    model="text-embedding-ada-002"
)

weaviate_vs = Weaviate.from_documents(documents=talkdocs,
                                  index_name="DevoxxTalk",
                                  embedding=openai_embeddings,
                                  client=__init_weaviate_client(),
                                  by_text=False)



## Verify the data

In [None]:
query = "What talks deal with large language models?"
results = weaviate_vs.similarity_search_with_score(query)

for result in results:
    document, score = result 
    print(f"score: {score}\n {document.page_content}")