In [1]:
import os 
from dotenv import load_dotenv

load_dotenv() 
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
pip install langchain_core langchain_pinecone

[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
index_name = os.getenv("PINECONE_INDEX_ONE")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


In [73]:
#load fake data
all_docs = {
    "doc1": "the effect of shrimp chips on honey badgers",
    "doc2": "analysis of vending machine snack trends in Canadian high schools",
    "doc3": "how humidity affects instant ramen texture over time",
    "doc4": "behavioral study of seagulls when offered barbecue chips",
    "doc5": "historical overview of bubble tea flavor popularity in North America",
    "doc6": "sociological implications of limited edition oreo flavor releases",
    "doc7": "nutritional breakdown of instant noodles versus fresh ramen",
    "doc8": "correlation between soft drink carbonation level and customer satisfaction",
    "doc9": "historical timeline of convenience store chain expansions in japan",
    "doc10": "study on potato chip crispness under varying storage temperatures",
    "doc11": "behavioral economics of buy-one-get-one snack promotions",
    "doc12": "comparison of candy aisle layouts in high vs low income neighborhoods",
    "doc13": "impact of popcorn seasoning variety on movie theater sales",
    "doc14": "analysis of iced coffee popularity trends across north america",
    "doc15": "texture analysis of mochi under different freezing conditions",
    "doc16": "survey on snack preference differences between millennials and gen z",
    "doc17": "experimental study on cheese puff structural integrity under pressure",
    "doc18": "consumer psychology behind energy drink branding colors",
    "doc19": "logistical challenges of distributing fresh donuts to remote areas",
    "doc20": "comparative study of chip bag air volume by brand",
    "doc21": "economic effects of pumpkin spice seasonal product releases",
    "doc22": "analysis of gum flavor longevity across major brands",
    "doc23": "how store lighting impacts snack purchase decisions",
    "doc24": "effects of spicy snack consumption on short-term cognitive performance",
    "doc25": "survey of most stolen items in convenience stores",
    "doc26": "price elasticity of premium chocolate bar purchases",
    "doc27": "relationship between coffee aroma intensity and impulse buying",
    "doc28": "behavioral patterns of customers waiting in checkout snack lanes",
    "doc29": "study on gum wrapper disposal habits in urban vs rural settings",
    "doc30": "impact of product placement height on snack selection"
}


In [74]:
#create embeddings
embeddings = OpenAIEmbeddings()

#create vector store (using pinecone)
vectorstore = PineconeVectorStore.from_texts(
    list(all_docs.values()), embeddings, index_name = "rag-fusion-1536"
)

In [None]:
#Create Retriever
vectorstore = PineconeVectorStore.from_existing_index(index_name=", OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [14]:
#define query generator

#define a chain to do query generation

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI


In [15]:
pip install langchainhub

Collecting langchainhub
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.4.20250913-py3-none-any.whl.metadata (2.0 kB)
Downloading langchainhub-0.1.21-py3-none-any.whl (5.2 kB)
Downloading types_requests-2.32.4.20250913-py3-none-any.whl (20 kB)
Installing collected packages: types-requests, langchainhub
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [langchainhub]
[1A[2KSuccessfully installed langchainhub-0.1.21 types-requests-2.32.4.20250913
[0mNote: you may need to restart the kernel to use updated packages.


In [41]:
#loading prompt from langchainhub
from langchain import hub

prompt = hub.pull("langchain-ai/rag-fusion-query-generation")

In [42]:
#returning a list of strings
#called a runnable

#define query generation chain
generate_queries = (
    prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split("\n"))
)

In [44]:
Define the full chain

chain will 
1. generate queries
2. look up each query in the retriever
3. joins all the results together using reciprocal rank fusion

**does not do a final genration step

SyntaxError: invalid syntax (1172543766.py, line 1)

In [68]:
#query

original_query = "studies relating to aquatic animals"

In [69]:
#create retriever

vectorstore = PineconeVectorStore.from_existing_index("rag-fusion-1536", OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [70]:
#Define Reciprocal Rank Fusion Function

from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        #assumes docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1  / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [71]:
#define full chain

chain = generate_queries | retriever.map() | reciprocal_rank_fusion

# invoke chain
chain.invoke({"original_query": original_query})

In [72]:
 chain.invoke({"original_query": original_query})

[(Document(id='bb8e7a62-6959-4cab-868c-6fcc841daa6c', metadata={}, page_content='behavioral study of seagulls when offered barbecue chips'),
  0.06506215742069787),
 (Document(id='a0027755-ec76-4f57-b4e7-17d42dd59db5', metadata={}, page_content='behavioral study of seagulls when offered barbecue chips'),
  0.06506215742069787),
 (Document(id='0834a919-5705-4802-afae-2619a66a9f76', metadata={}, page_content='the effect of shrimp chips on honey badgers'),
  0.049189141547682),
 (Document(id='0da7d9d0-ca64-49b9-bb8d-92b348660c12', metadata={}, page_content='the effect of shrimp chips on honey badgers'),
  0.04893312516263336),
 (Document(id='96cca58e-3d9f-41f1-a2f5-4cbf1efb3790', metadata={}, page_content='analysis of vending machine snack trends in Canadian high schools'),
  0.016129032258064516),
 (Document(id='d65290d8-d6a8-457c-bebe-797e7e86cfbf', metadata={}, page_content='analysis of vending machine snack trends in Canadian high schools'),
  0.015873015873015872)]