##### Import depenedencies


In [48]:
import os
from dotenv import load_dotenv

load_dotenv()

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


import psycopg2
from pgvector.psycopg2 import register_vector
from psycopg2.extras import Json

##### Langsmith Tracing Test


In [None]:
llm = ChatOpenAI(model="gpt-4o")
llm.invoke("LangSmith tracing test")

AIMessage(content="It sounds like you’re trying to perform or learn about tracing tests in LangSmith. However, without specific details on what aspect you’re interested in, here’s a generalized approach:\n\n1. **Understanding LangSmith**: LangSmith is a tool or framework (hypothetical in this context since I don't have specific up-to-date information about a product by this name) that might be dedicated to linguistic processing, language model training, or computational linguistics.\n\n2. **Set Up Environment**:\n   - Ensure you have your development environment set up with the necessary installations. This might include Python, relevant libraries, and LangSmith-specific packages.\n\n3. **Data Preparation**:\n   - Gather and prepare your data for testing. This involves cleaning the data and formatting it according to the requirements of a tracing test.\n\n4. **Create Tracing Functions**:\n   - Define the tracing functions or scripts. These would track specific linguistic features or pr

##### PSQL Connection


In [28]:
try:
    conn = psycopg2.connect(os.getenv("POSTGRES_URI"))
    register_vector(conn)
    cursor = conn.cursor()

    # Simple ping
    cursor.execute("SELECT 1;")
    result = cursor.fetchone()

    print("✅ PostgreSQL connection successful:", result)

except Exception as e:
    print("❌ PostgreSQL connection failed:")
    print(e)

✅ PostgreSQL connection successful: (1,)


##### PDF Ingestion pipeline


In [None]:
def ingest_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    return docs

In [7]:
docs = ingest_pdf("Swapnil_Resume_Nov.pdf")
docs

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-03-26T10:09:11+05:30', 'author': 'Austin, Heather', 'moddate': '2025-03-26T10:09:11+05:30', 'source': 'Swapnil_Resume_Nov.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Swapnil Katiyar  \n+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India \nFront-End Developer \nNext.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira \nPassionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in \nbuilding responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and \ncrafting exceptional user experiences through collaboration and innovation. \nPROFESSIONAL SKILLS \n• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js \n• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS \n• Version Control

##### Clean Documents


In [None]:
import re


def clean_text(text):
    text = re.sub(r"\n+", "\n", text)
    text = re.sub(r"Page \d+", "", text)
    return text.strip()


for doc in docs:
    doc.page_content = clean_text(doc.page_content)
    print(doc.page_content)

Swapnil Katiyar  
+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India 
Front-End Developer 
Next.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira 
Passionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in 
building responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and 
crafting exceptional user experiences through collaboration and innovation. 
PROFESSIONAL SKILLS 
• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js 
• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS 
• Version Control & Collaboration: Git, Jira 
• Testing: React Testing Library, Jest, Vitest 
WORK EXPERIENCE 
Treeroot Informatics – Ahmedabad, Gujarat, India January 2023 – April 2024 
Front End Developer 
• Worked closely with a team of 8 developers to deliver high-quality front-end features for projects, utilizing 
React and 

##### Text splitter


In [None]:
def split_texts(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=80)
    chunks = splitter.split_documents(docs)

    return chunks

In [14]:
chunks = split_texts(docs)
chunks

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-03-26T10:09:11+05:30', 'author': 'Austin, Heather', 'moddate': '2025-03-26T10:09:11+05:30', 'source': 'Swapnil_Resume_Nov.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Swapnil Katiyar  \n+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India \nFront-End Developer \nNext.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira \nPassionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in'),
 Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-03-26T10:09:11+05:30', 'author': 'Austin, Heather', 'moddate': '2025-03-26T10:09:11+05:30', 'source': 'Swapnil_Resume_Nov.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='building responsive and interactive web applications. Strong focus on delivering high

##### Create Embeddings and store them into database


In [20]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

for i, chunk in enumerate(chunks):
    vector = embeddings.embed_query(chunk.page_content)

    cursor.execute(
        """
        INSERT INTO documents (content, embedding, metadata)
        VALUES (%s, %s, %s)
        """,
        (
            chunk.page_content,
            vector,  # pgvector accepts list[float]
            Json(
                {
                    "page": chunk.metadata.get("page"),
                    "source": "Swapnil_Resume_Nov.pdf",
                    "chunk_id": i,
                }
            ),
        ),
    )

conn.commit()

##### Vector Search


In [31]:
def vector_search(query: str, k: int = 10):
    query_vector = embeddings.embed_query(query)

    cursor.execute(
        """
        SELECT content,
               1 - (embedding <=> %s::vector) AS similarity
        FROM documents
        ORDER BY embedding <=> %s::vector 
        LIMIT %s
        """,
        (query_vector, query_vector, k),
    )

    return cursor.fetchall()

In [None]:
# Roll back the transaction to reset the connection if it's in an aborted state
if conn.closed == 0:  # Ensure the connection is open
    conn.rollback()

query = "list all the professional skills"
results = vector_search(query, k=10)
results

[('building responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and \ncrafting exceptional user experiences through collaboration and innovation. \nPROFESSIONAL SKILLS \n• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js',
  0.4176468600245209),
 ('work focused on front-end development technologies. \n• Mentored 8+ aspiring developers, offering guidance and support to accelerate their skill development. \n• Completed diverse self-learning projects, showcasing practical skills and a commitment to improvement.',
  0.385278983678939),
 ('• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js \n• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS \n• Version Control & Collaboration: Git, Jira \n• Testing: React Testing Library, Jest, Vitest \nWORK EXPERIENCE',
  0.3338577150360862),
 ('EDUCATION   \nABES Engineering College – Ghaziabad, Uttar Pradesh, India \nBachelor of T

#### Generate answers with LLM


In [None]:
def rag_query(query):
    llm = ChatOpenAI(model="gpt-4o", temperature=0, streaming=True)

    context = "\n\n".join(
        f"[Score: {round(score, 3)}]\n{text}" for text, score in results
    )
    # print(context)

    prompt = f"""
    Answer strictly from the context below.

    Context:
    {context}

    Question:
    {query}

    Answer:
    """

    response = llm.invoke(prompt)
    return response

In [45]:
response = rag_query(query)

for line in response.content.split("\n"):
    print(line)

Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js  
Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS  
Version Control & Collaboration: Git, Jira  
Testing: React Testing Library, Jest, Vitest


##### using chatPromptTemplate

In [51]:
import uuid

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. "
            "Answer strictly using the provided context. "
            "If the answer is not in the context, say you don't know.",
        ),
        ("human", "Context:\n{context}\n\nQuestion:\n{question}"),
    ]
)

llm = ChatOpenAI(model="gpt-4o", temperature=0)
output_parser = StrOutputParser()

chain = prompt | llm | output_parser

context = "\n\n".join(f"[Score: {round(score, 3)}]\n{text}" for text, score in results)
my_uuid = uuid.uuid4()
result = chain.invoke({"question": query, "context": context}, {"run_id": my_uuid})
print(my_uuid)
print(result)

b468b99f-90e4-4bf5-aecf-44d14f379326
- Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js
- Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS
- Version Control & Collaboration: Git, Jira
- Testing: React Testing Library, Jest, Vitest
