In [None]:
import nbformat
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
import uuid
from datetime import datetime
from openai import OpenAI
from sqlalchemy.orm import Session

In [None]:
DATABASE_URL = "postgresql://postgres@localhost:5432/my-rag-example"

client = OpenAI()


def read_notebook(file_path):
    """Read and parse the Jupyter Notebook."""
    with open(file_path, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)
    return notebook

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

def extract_chunks(notebook, chunk_size=500):
    """Extract and chunk the contents of the notebook."""
    chunks = []
    for cell in notebook.cells:
        if cell.cell_type == 'markdown' or cell.cell_type == 'code':
            content = cell.source
            # Split content into smaller chunks
            for i in range(0, len(content), chunk_size):
                chunks.append(content[i:i + chunk_size])
    return chunks

def generate_chunks(input_text):
    return [sentence.strip() for sentence in input_text.strip().split('.') if sentence.strip()]

def generate_embeddings_with_chunks(chunks, embedding_model="text-embedding-ada-002"):
    embeddings = []
    for chunk in chunks:
        embedding = get_embedding(chunk, model=embedding_model)
        embeddings.append(embedding)
    return [{"content": chunk, "embedding": embedding} for chunk, embedding in zip(chunks, embeddings)]

def create_resource(content):
    try:
        engine = create_engine(DATABASE_URL)
        with Session(engine) as session:
            try:
                resource_id = str(uuid.uuid4())
                
                session.execute(
                    text("INSERT INTO resources (id, content, created_at, updated_at) VALUES (:id, :content, now(), now())"),
                    {"id": resource_id, "content": content}
                )

                chunks = generate_chunks(content)
                embeddings = generate_embeddings_with_chunks(chunks)

                for embedding in embeddings:
                    session.execute(
                        text("INSERT INTO embeddings (id, resource_id, content, embedding) VALUES (:id, :resource_id, :content, :embedding)"),
                        {
                            "id": str(uuid.uuid4()),
                            "resource_id": resource_id,
                            "content": embedding["content"],
                            "embedding": embedding["embedding"]
                        }
                    )
                
                session.commit()
                return "Resource successfully created."
            
            except Exception as inner_e:
                session.rollback()
                return f"Insertion Error: {str(inner_e)}"

    except SQLAlchemyError as e:
        return f"SQLAlchemyError: {str(e)}"
    except Exception as e:
        return f"Unexpected Error: {str(e)}"

            
# Main script
if __name__ == "__main__":
    file_path = "sample.ipynb"
    
    # Step 1: Read the notebook
    notebook = read_notebook(file_path)
    
    # Step 2: Extract Markdown content (ignore code cells)
    markdown_content = "\n".join(
        cell.source for cell in notebook.cells if cell.cell_type == 'markdown'
    )

    # Step 5: Save resource and embeddings to PostgreSQL
    result = create_resource(markdown_content)

Starting
