In [1]:
import os
import json
from tqdm import tqdm as tqdm

import weaviate
from weaviate.classes.init import Auth
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.docstore.document import Document

In [2]:
weaviate_url = os.getenv("WEAVIATE_URL")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
    headers={
        "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY")  # Or any other inference API keys
    },
    skip_init_checks=True
)

In [5]:
folder_path = 'JIRA tickets'
# Get a sorted list of all JSON files in the folder
all_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.json')])

final_docs = []

for filename in tqdm(all_files[:1000]):
    if filename.endswith('.json'):
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, 'r') as file:
            results = json.load(file)
            
            ticket_number = filename.split('.')[0]
            final_docs.append(
                Document(
                    page_content=json.dumps(results),
                    metadata={
                        "source": f"https://rubinobs.atlassian.net/rest/api/latest/issue/{ticket_number}",
                        "source_key": "jira"
                    }
                )
            )

100%|██████████| 1000/1000 [00:00<00:00, 5908.80it/s]


In [6]:
db = WeaviateVectorStore.from_documents(final_docs, OpenAIEmbeddings(), client=client)