In [1]:
import os
import pandas as pd
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings, ChatOpenAI #

# 1. Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-"  # Replace with your actual key

# 2. Load your CSV file
loader = CSVLoader(file_path="wwii_aircraft_production.csv")  # Replace with your file path and csv
documents = loader.load()

# 3. Create embeddings and vector store
embeddings = OpenAIEmbeddings()  
vectorstore = FAISS.from_documents(documents, embeddings)

# 4. Configure the retriever to get more results (e.g. 20 chunks)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# 5. Build the QA chain using GPT-3.5
llm = ChatOpenAI(model_name="gpt-3.5-turbo")  
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# 6. Ask a question
query = "How many Albemarle were produced in 1942?" # Replace with a question relevant to your dataset
result = qa_chain.invoke(query) 

# 7. Output the answer
print("Answer:", result['result'])


Answer: 165 Albemarle aircraft were produced in 1942.
