In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
print(f"Loaded API Key starts with: {OPENAI_API_KEY[:8]}")

Loaded API Key starts with: sk-proj-


In [9]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4.1-mini")

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

'The MLB team that won the World Series during the COVID-19 pandemic in 2020 was the Los Angeles Dodgers. They defeated the Tampa Bay Rays to claim the championship.'

In [13]:
import fitz

doc = fitz.open("David Ahn_Cover Letter_Federal Board Reserve.pdf") 
text = ""

for i, page in enumerate(doc):
    text += page.get_text()

In [14]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_text(text) 

documents

["David Ahn  \nCollege Park, MD, 20742, \ndavidahn1108@gmail.com | (213) 447-5377  \nDear Hiring Committee, \nI am writing to express my strong interest in the 2026 Summer Internship Program at the Federal \nReserve Bank of Richmond. As a Computer Science major at the University of Maryland with a \nconcentration in machine learning, I bring a strong technical foundation, extensive research \nexperience, and genuine passion for applying data and technology to serve the public good. I am \nespecially drawn to the Federal Reserve's mission to strengthen and protect the economy, and I \nwould be honored to contribute to that purpose. \n \nThrough the First-Year Innovation and Research Experience (FIRE) program at UMD, I am \ncurrently developing a machine learning model that predicts stock market behavior using \nsentiment analysis from employee reviews and financial news, which is work that involves \ngathering, cleaning, and analyzing large datasets to produce meaningful insights. My ea

In [18]:
from langchain_pinecone import PineconeVectorStore
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

index_name = "risk-assessment"

pinecone = PineconeVectorStore.from_texts(
    documents, embeddings, index_name=index_name
)

In [None]:
from pymongo import MongoClient
from typing import Optional

MONGODB_URI = os.getenv("MONGODB_URI")  
DB_NAME = os.getenv("MONGODB_DB")     
COLLECTION_NAME = os.getenv("MONGODB_COLLECTION")  

client = MongoClient(MONGODB_URI)
db = client[DB_NAME]
users_collection = db[COLLECTION_NAME]

def get_investing_type(email: str) -> Optional[str]:
    user = users_collection.find_one({"email": email}, {"risk_category": 1, "_id": 0})
    if user:
        return user["risk_category"]
    return None


TypeError: name must be an instance of str, not <class 'NoneType'>

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know". Keep in mind that the user's investing type is {investType}

Context: {context}

Question: {question}

"""

prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough(), "investType": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

user_id = "placeholder" #this should be retrived from the login session, either from frontend or the authorization apge
question = "What investment strategy should I follow?"

investing_type = get_investing_type(user_id)

if investing_type is None:
    response = "I don't know"
else:
    response = chain.invoke({
        "question": question,
        "investType": investing_type
    })

print(response)


"I don't know"