### Imports

In [37]:
import os
import random
import itertools
import pandas as pd

import langchain
from langchain.vectorstores import pinecone as LCPC
from langchain.embeddings.openai import OpenAIEmbeddings

import openai
from openai import OpenAI

import pinecone
from pinecone import Pinecone, ServerlessSpec, PodSpec

from dotenv import load_dotenv
env_loaded = load_dotenv()

In [50]:
df = pd.read_csv("./data/train.csv")
df.sample(n=20)[["ABSTRACT"]].head(20)

Unnamed: 0,ABSTRACT
17657,This article introduces the notion of arbitr...
7415,Magnetic resonance image (MRI) reconstructio...
19690,Parity and time-reversal violating electric ...
17803,We show how an ensemble of $Q^*$-functions c...
12466,We present a general-purpose method to train...
9715,The origin of colossal magnetoresistance (CM...
18724,This paper considers the joint design of use...
16402,As a large-scale instance of dramatic collec...
5255,Follicle-stimulating hormone (FSH) and lutei...
8671,"A bounce universe model, known as the couple..."


In [55]:
pc = Pinecone()
pinecone_index_name = "openai"
index = pc.Index(name=pinecone_index_name)

In [54]:
def create_docstore_from_texts(texts: list[str]):
    doc_store = pinecone.Pinecone.from_texts(
        texts=df["ABSTRACT"].to_list(),
        embedding=OpenAIEmbeddings(
            openai_api_key=os.getenv("OPENAI_API_KEY"),
            model="text-embedding-3-large"
        ),
        index_name=pinecone_index_name
    )
    
    return doc_store

In [64]:
query = "MRI"

query_embedding = openai.embeddings.create(
    input=[query],
    model="text-embedding-3-large"
).data[0].embedding

assert isinstance(query_embedding, list), "'query_embedding' is not of type list!"

result = index.query(
    vector=query_embedding,
    top_k=3,
    include_metadata=True
)

for idx, match in enumerate(result["matches"]):
    print(f"Result {idx}: {match['metadata']['text'][:70]}")

Result 0:   Magnetic Resonance Imaging (MRI) and Positron Emission Tomography (P
Result 1:   Obtaining magnetic resonance images (MRI) with high resolution and g
Result 2:   Purpose: Magnetic Resonance Fingerprinting (MRF) is a relatively new
