In [None]:
%pip install weaviate-client

In [3]:
from weaviate import Client

# Initialize Weaviate client
client = Client("http://localhost:8080")

In [21]:
def purge_weaviate_db():
    # Assuming class name is "CodeExample"
    # This will delete all objects of the class "CodeExample"
    client.data_object.delete(class_name="CodeExample", where="*")

In [22]:
import openai

def get_embeddings(text):
    response = openai.Embedding.create(input=text, engine="text-similarity-babbage-001")
    return response["data"][0]["embedding"]

In [74]:
import os
import json
import re
import yaml

# Define the path to the data directory
DATA_DIR = "../data"

# Define a function to extract tags, description, and code from a file
def extract_information(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
        
        # Initialize the data dictionary
        data = {
            'file_name': os.path.basename(file_path),
            'tags': None,
            'description': None,
            'code': None
        }

        # Define the regular expression pattern
        pattern = r'"""(.*?)"""'

        # Extract the YAML block
        match = re.search(pattern, content, re.DOTALL)
        yaml_block = match.group(1)

        # Parse the YAML block
        metadata = yaml.safe_load(yaml_block)
        
        # Regular expressions to match tags and description
        data['tags'] = metadata['tags']
        data['description'] = metadata['description']
        
        # Extract code
        code_start = content.find("# ---", content.rfind("# ---")) + len("# ---")
        if code_start != -1:
            data['code'] = content[code_start:].strip()
        
        # Check for missing data
        missing_data = [key for key, value in data.items() if value is None]
        if missing_data:
            print(f"Error in file {data['file_name']}: Missing data for {', '.join(missing_data)}.")
        
        return data

# Define the main function to iterate through the files and extract information
def main():
    for file_name in os.listdir(DATA_DIR):
        # Check if the file is a Python file and not a subdirectory
        if file_name.endswith('.py') and os.path.isfile(os.path.join(DATA_DIR, file_name)):
            file_path = os.path.join(DATA_DIR, file_name)
            file_data = extract_information(file_path)
            print(json.dumps(file_data, indent=2))

# Run the main function
main()

../data/example_4.py
<re.Match object; span=(0, 270), match='"""\ntags: [langchain]\ndescription: |\n    Creat>
{
  "file_name": "example_4.py",
  "tags": [
    "langchain"
  ],
  "description": "Create a chain that does the following and streams the response:\n- Accept a string as input\n- Format messages from System and Human as a prompt\n- Pass messages to OpenAI\n- Parse the OpenAI response as a string\n",
  "code": "tags: [langchain]\ndescription: |\n    Create a chain that does the following and streams the response:\n    - Accept a string as input\n    - Format messages from System and Human as a prompt\n    - Pass messages to OpenAI\n    - Parse the OpenAI response as a string\n\"\"\"\n\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts import ChatPromptTemplate\nfrom langchain.schema.messages import HumanMessage, SystemMessage\nfrom langchain.schema.output_parser import StrOutputParser\n\n# Generate system and human messages\nmessages = [\n    SystemMessage(