# Ask the docs anything about SuperDuperDB

In [None]:
!pip install superduperdb

In [None]:
import os
os.environ['OPENAI_API_KEY'] = '<YOUR-OPENAI-API-KEY>'

In [1]:
import os
from superduperdb import superduper
from superduperdb.db.mongodb.query import Collection

# Uncomment one of the following lines to use a bespoke MongoDB deployment
# For testing the default connection is to mongomock

mongodb_uri = os.getenv("MONGODB_URI","mongomock://test")
# mongodb_uri = "mongodb://localhost:27017"
# mongodb_uri = "mongodb://superduper:superduper@mongodb:27017/documents"
# mongodb_uri = "mongodb://<user>:<pass>@<mongo_cluster>/<database>"
# mongodb_uri = "mongodb+srv://<username>:<password>@<atlas_cluster>/<database>"

# Super-Duper your Database!
from superduperdb import superduper
db = superduper(mongodb_uri)

collection = Collection('questiondocs')



In [2]:
import glob

STRIDE = 5       # stride in numbers of lines
WINDOW = 10       # length of window in numbers of lines

content = sum([open(file).readlines() for file in glob.glob('../*/*.md') + glob.glob('../*.md')], [])
chunks = ['\n'.join(content[i: i + WINDOW]) for i in range(0, len(content), STRIDE)]

In [3]:
from IPython.display import Markdown
Markdown(chunks[2])

```python

backend

├── ai  # RAG app-specific code

│   ├── ...

├── documents  # routes for our app

│   ├── __init__.py

│   ├── models.py  # pydantic models

│   └── routes.py  # AI-enhanced CRUD logic

├── __init__.py

├── app.py  # events that occur at app startup/shutdown


In [6]:
from superduperdb.container.document import Document

db.execute(collection.insert_many([Document({'txt': chunk}) for chunk in chunks]))

INFO:root:found 0 uris
INFO:root:Adding model text-embedding-ada-002 to db
INFO:root:Done.
INFO:root:Adding model text-embedding-ada-002 to db
INFO:root:Done.


Computing chunk 0/0


0it [00:00, ?it/s]


(<pymongo.results.InsertManyResult at 0x157063fa0>,
 TaskWorkflow(database=<superduperdb.db.base.db.DB object at 0x156aaa150>, G=<networkx.classes.digraph.DiGraph object at 0x15740a710>))

In [7]:
db.execute(collection.find_one())

Document({'_id': ObjectId('652459fb412eff4a8f12f259'), 'txt': "# Building a RAG application with FastAPI, MongoDB and SuperDuperDB\n\n\n\nIn this use-case, we'll use FastAPI to serve SuperDuperDB on [fly.io](https://fly.io/) using MongoDB as a databackend.\n\nThe task we'll be implementing is a retrieval augmented text-generation (RAG) app for answering\n\nquestions about a particular trove of documents. Read more [on our blog](/blog/...).\n\n\n\n## Create the FastAPI app file structure\n\n\n\nThere are many choices here. Please refer to the FastAPI [documentation](https://fastapi.tiangolo.com/tutorial/bigger-applications/) for other possible choices. The structure that we chose looks like the following:\n\n\n", '_fold': 'train'})

In [8]:
from superduperdb.container.vector_index import VectorIndex
from superduperdb.container.listener import Listener
from superduperdb.ext.openai.model import OpenAIEmbedding

db.add(
    VectorIndex(
        identifier='my-index',
        indexing_listener=Listener(
            model=OpenAIEmbedding(model='text-embedding-ada-002'),
            key='txt',
            select=collection.find(),
        ),
    )
)

INFO:root:Adding model text-embedding-ada-002 to db
INFO:root:Done.
443it [00:00, 739.03it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.62s/it]


[]

In [9]:
from superduperdb.ext.openai.model import OpenAIChatCompletion

chat = OpenAIChatCompletion(
    model='gpt-3.5-turbo',
    prompt=(
        'Use the following description and code-snippets aboout SuperDuperDB to answer this question about SuperDuperDB\n'
        'Do not use any other information you might have learned about other python packages\n'
        'Only base your answer on the code-snippets retrieved\n'
        '{context}\n\n'
        'Here\'s the question:\n'
    ),
)

db.add(chat)

print(db.show('model'))

['gpt-3.5-turbo', 'splitter', 'text-embedding-ada-002']


In [10]:
db.show('model', 'gpt-3.5-turbo')

[0]

In [12]:
from superduperdb.container.document import Document
from IPython.display import display, Markdown


q = 'Can you give me a code-snippet to set up a `VectorIndex`?'

output, context = db.predict(
    model_name='gpt-3.5-turbo',
    input=q,
    context_select=(
        collection
            .like(Document({'txt': q}), vector_index='my-index', n=5)
            .find()
    ),
    context_key='txt',
)

Markdown(output.content)

Sure! Here's a code-snippet to set up a `VectorIndex` in SuperDuperDB:

```python
from superduperdb.container.vector_index import VectorIndex
from superduperdb.container.listener import Listener

# Define the VectorIndex
vector_index = VectorIndex(
    identifier='my-index',
    indexing_listener=Listener(
        model=OpenAIEmbedding(model='text-embedding-ada-002'),
        key='txt',
        select=collection.find(),
    ),
)

# Add the VectorIndex to the database
db.add(vector_index)
```

This code creates a `VectorIndex` object named 'my-index' and sets up a listener using the model 'text-embedding-ada-002' from OpenAI. The 'txt' key is used to extract text data from the collection during indexing. Finally, the `VectorIndex` is added to the SuperDuperDB database using the `db.add()` method.