### Imports

In [73]:
import os
import pandas as pd

from langchain.vectorstores import pinecone
from langchain.embeddings.openai import OpenAIEmbeddings

import openai

import pinecone
from pinecone import Pinecone

from dotenv import load_dotenv
env_loaded = load_dotenv()

In [75]:
df = pd.read_csv("./data/train.csv")

In [76]:
# create index on https://app.pinecone.io/
pc = Pinecone()
pinecone_index_name = os.getenv("PINECONE_INDEX_NAME")
index = pc.Index(name=pinecone_index_name)

In [None]:
def create_docstore_from_texts(texts: list[str]) -> None:
    """Function to take a sample from our dataset and insert
    them into our Pinecone Index.

    Args:
        texts (list[str]): list of texts to insert
    """
    pinecone.Pinecone.from_texts(
        texts=df["ABSTRACT"].to_list(),
        embedding=OpenAIEmbeddings(
            openai_api_key=os.getenv("OPENAI_API_KEY"),
            model="text-embedding-3-large" # this will have dimensions of 3072, small will have 1536
        ),
        index_name=pinecone_index_name
    )

# run this once you have created a pinecone index on https://app.pinecone.io/
create_docstore_from_texts(df.sample(n=1000)["ABSTRACT"].to_list())

### Query the Index and get some results

In [78]:
def query_index(query: str) -> None:
    query_embedding = openai.embeddings.create(
        input=[query],
        model="text-embedding-3-large"
    ).data[0].embedding

    assert isinstance(query_embedding, list), "'query_embedding' is not of type list!"

    result = index.query(
        vector=query_embedding,
        top_k=3,
        include_metadata=True
    )

    for idx, match in enumerate(result["matches"]):
        print(f"Result {idx}: {match['metadata']['text'][:70]}")
    
query_index("MRI")

Result 0:   Magnetic Resonance Imaging (MRI) and Positron Emission Tomography (P
Result 1:   Obtaining magnetic resonance images (MRI) with high resolution and g
Result 2:   Purpose: Magnetic Resonance Fingerprinting (MRF) is a relatively new


#### See app.py for streamlit chatbot interfacing with pinecone embeddings