# Libraries

In [48]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import ConversationalRetrievalChain
import asyncio
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv

from IPython.display import display, Markdown

# Configuration

In [8]:
load_dotenv()

True

# Data stuff

In [15]:
loader = CSVLoader('../data/jobs/aug3.csv',     metadata_columns=["job_title", "company_name", "location", "industry", "date_posted"],)
data = loader.load()

In [16]:
print(f"Loaded {len(data)} documents, example metadata:", data[0].metadata)

Loaded 79 documents, example metadata: {'source': '../data/jobs/aug3.csv', 'row': 0, 'job_title': 'Software Engineer', 'company_name': 'Coinbase', 'location': 'United States (Remote)', 'industry': 'Financial Services', 'date_posted': 'Reposted 1 week ago'}


In [17]:

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=50
)
chunked_docs = text_splitter.split_documents(data)
print(f"After chunking: {len(chunked_docs)} chunks (one per ~800 characters)")


After chunking: 385 chunks (one per ~800 characters)


In [18]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

vector_store = FAISS.from_documents(
    documents=chunked_docs,
    embedding=embeddings
)

In [19]:
vector_store.save_local("faiss_jobs_index")


vector_store = FAISS.load_local(
    "faiss_jobs_index",
    embeddings,
    allow_dangerous_deserialization=True
)

In [20]:
results = vector_store.similarity_search(
    query="Django",
    k=5
)

In [24]:
res = vector_store.similarity_search_with_score("Django", k=20)
for doc, score in res:
    print(f"{score:.3f} │ {doc.metadata['company_name']} – {doc.metadata.get('job_title', '<no title>')}")

1.179 │ Otter.ai – Software Engineer, Backend (Product)
1.225 │ Salt AI – Senior Fullstack Platform Engineer (Backend Focus)
1.254 │ MassMutual – Python AI Developer - Investment Management Technology
1.260 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.260 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.260 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.276 │ Salt AI – Senior Fullstack Platform Engineer (Backend Focus)
1.296 │ Carry – Senior Software Engineer - Backend (Python)
1.327 │ Acorns – Senior Backend Engineer, AI Team
1.351 │ MassMutual – Python AI Developer - Investment Management Technology
1.383 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.383 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.383 │ SMBC Group – Full Stack Developer - React / Python (Django)
1.388 │ Pragmatike – Product Engineer (Fullstack) - Remote
1.400 │ Forage – Senior Software Engineer
1.403 │ Sardine – Software Engineer

In [29]:
retriever = vector_store.as_retriever()

# Agent Stuff

In [46]:
def sync_chat(q, history):
    handler = StreamingStdOutCallbackHandler()
    llm = ChatOpenAI(
        streaming=True,
        callbacks=[handler],
        temperature=0.0,
        model_name="gpt-4.1"
    )

    qa = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        return_source_documents=False
    )

    result = qa.invoke({"question": q, "chat_history": history})
    return result["answer"], result.get("source_documents", [])


In [60]:
res = sync_chat("Find me job descriptions required for Fintech Django Positions. Write the name of the company and summarize JD properly", [])

Based on the provided context, here are summarized job descriptions for Fintech Django positions, including company size and required qualifications. The company names are not specified in the context, but the summaries reflect typical requirements for such roles:

---

### 1. **Fintech Company (11-50 employees) – Senior Full Stack Engineer (Python/Django & React)**

**Summary of Job Description:**
- **Role:** Senior Full Stack Engineer focusing on Python (Django) and React.
- **Responsibilities:**  
  - Build and maintain enterprise applications with complex workflow logic, approvals, adjustments, and audit requirements.
  - Work directly with business users and cross-functional teams to deliver solutions.
  - Ensure applications meet financial reporting and regulatory standards.
  - Implement CI/CD pipelines, Docker, and cloud-native development practices.
  - Collaborate in Agile development environments.
- **Requirements:**  
  - 8+ years of full stack software development experien

In [61]:
display(Markdown(res[0]))

Based on the provided context, here are summarized job descriptions for Fintech Django positions, including company size and required qualifications. The company names are not specified in the context, but the summaries reflect typical requirements for such roles:

---

### 1. **Fintech Company (11-50 employees) – Senior Full Stack Engineer (Python/Django & React)**

**Summary of Job Description:**
- **Role:** Senior Full Stack Engineer focusing on Python (Django) and React.
- **Responsibilities:**  
  - Build and maintain enterprise applications with complex workflow logic, approvals, adjustments, and audit requirements.
  - Work directly with business users and cross-functional teams to deliver solutions.
  - Ensure applications meet financial reporting and regulatory standards.
  - Implement CI/CD pipelines, Docker, and cloud-native development practices.
  - Collaborate in Agile development environments.
- **Requirements:**  
  - 8+ years of full stack software development experience.
  - Strong Python (Django) and React skills.
  - Solid SQL and experience with relational databases and ORM tools.
  - Experience with Databricks or Spark.
  - Exposure to financial products, product control functions, and financial reporting platforms.
  - Strong communication skills.
  - Experience in highly regulated environments is a plus.

---

### 2. **Fintech Startup (11-50 employees) – Senior Python Developer**

**Summary of Job Description:**
- **Role:** Senior Python Developer with Django expertise.
- **Responsibilities:**  
  - Own and maintain live services, including monitoring, alerting, and deployment.
  - Integrate with third-party APIs, especially in the Fintech domain.
  - Build and optimize money movement or payment systems (bonus).
  - Work with PostgreSQL and other relational databases.
  - Collaborate with a small, agile team to deliver high-quality software.
- **Requirements:**  
  - 5+ years of Python development experience in production environments.
  - Strong experience with PostgreSQL.
  - Experience in Fintech or with third-party API integrations.
  - Bonus: direct experience with payment systems.
  - Technologies used: Python, Django, TypeScript, React, Next.js, Chakra UI, NestJS, PostgreSQL, Memcached, AWS (EKS, SQS, SNS, Lambda).

---

**Note:**  
The company names are not provided in the context. If you need actual company names, please provide more specific information or job postings. The above summaries are based on the qualifications and requirements listed in the context for Fintech Django positions.

### Async Agent Function (Good for FastAPI Backend Streaming Responses)

In [34]:
async def async_chat(q, history, retriever):
    handler = AsyncIteratorCallbackHandler()
    llm = ChatOpenAI(
        streaming=True,
        callbacks=[handler],
        temperature=0.0,
        model_name="gpt-4"
    )
    qa = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        return_source_documents=True,
    )

    fut = asyncio.create_task(qa.ainvoke({"question": q, "chat_history": history}))

    async for token in handler.aiter():
        yield {"type":"token", "content": token}

    result = await fut
    yield {
        "type": "result",
        "answer": result["answer"],
        "sources": getattr(result, "source_documents", None),
    }
