In [24]:
import requests
import json
from langchain_openai import ChatOpenAI
import os
from pydantic import BaseModel
from typing import List, Dict, Any
from langchain_core.prompts import ChatPromptTemplate
import os
import pathlib
from bs4 import BeautifulSoup
import re
from bs4 import BeautifulSoup
import re
from langchain_openai import ChatOpenAI
from pydantic import Field
from typing import Optional, Literal
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain.schema import HumanMessage, AIMessage
from __future__ import print_function
import sib_api_v3_sdk
from sib_api_v3_sdk.rest import ApiException
from IPython.display import Markdown, display

In [43]:
with open(".env", "r") as f:
  for line in f:
    key, value = line.strip().split("=")
    os.environ[key] = value


In [None]:
search_terms = ["Entry level Software Engineer Jobs", "University Graduate Software Engineer Jobs", "Junior Software Developer Jobs", "AI Engineer Jobs", "LangChain Jobs"]

In [4]:
class ResultRelevance(BaseModel):
  explanation: str
  id: str

class RelevanceCheckOutput(BaseModel):
  relevant_results: List[ResultRelevance]

def search_serper(search_query):
  url = "https://google.serper.dev/search"
    
  payload = json.dumps({
    "q": search_query,
    "gl": "us", 
    "num": 30,
    "tbs": "qdr:d"
  })

  headers = {
    'X-API-KEY': '0130e7da8283ec8a8d9721c6c38a97d557867923',
    'Content-Type': 'application/json'
  }

  response = requests.request("POST", url, headers=headers, data=payload)
  results = json.loads(response.text)
  results_list = results['organic']

  all_results = []
  for id, result in enumerate(results_list, 1):
    result_dict = {
      'title': result['title'],
      'link': result['link'],
      'snippet': result['snippet'],
      'search_term': search_query,
      'id': id
    }
    all_results.append(result_dict)
  return all_results

def load_prompt(prompt_name):
  with open(f"prompts/{prompt_name}.md", "r") as file:
    return file.read()
  
def check_search_relevance(search_results: Dict[str, Any]) -> RelevanceCheckOutput:
  prompt = load_prompt("relevance_check")
  prompt_template = ChatPromptTemplate.from_messages(
    ("system_prompt", prompt)
  )

  llm = ChatOpenAI(model="gpt-4o").with_structured_output(RelevanceCheckOutput)
  llm_chain = prompt_template | llm
  return llm_chain.invoke({'input_search_results': search_results})

In [5]:
relevant_results = []
for search_term in search_terms:
  python_results = search_serper(search_term)
  results = check_search_relevance(python_results)
  
  # Get the relevant result IDs from the LLM output
  relevant_ids = [r.id for r in results.relevant_results]
  
  # Filter original results to only include those with matching IDs
  filtered_results = [r for r in python_results if str(r['id']) in relevant_ids]
  
  relevant_results.extend(filtered_results)
relevant_results

[{'title': 'Software Engineer jobs at Y Combinator startups',
  'link': 'https://www.ycombinator.com/jobs/role/software-engineer',
  'snippet': 'Many YC startups are seeing breakout growth, and are actively hiring for software engineers. Find some of the top YC companies at Y Combinator.',
  'search_term': 'Software Engineer New Grad',
  'id': 1},
 {'title': 'Software Engineer, New Grad & Entry Level - LinkedIn',
  'link': 'https://www.linkedin.com/jobs/view/software-engineer-new-grad-entry-level-at-jobright-ai-4269426530',
  'snippet': 'Learning & Development: Participate in code reviews, mentorship opportunities, and continuous learning to deepen your expertise in full-stack development.',
  'search_term': 'Software Engineer New Grad',
  'id': 2},
 {'title': '2025 New Grad Computer Science Jobs (NOW HIRING) - ZipRecruiter',
  'link': 'https://www.ziprecruiter.com/Jobs/2025-New-Grad-Computer-Science',
  'snippet': 'Software Engineer, New Grad (2025). Sentry. San Francisco, CA. $130K -

In [11]:
def convert_html_to_markdown(html_content):
    # Create BeautifulSoup object
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Convert common HTML elements to markdown
    
    # Headers
    for h in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        level = int(h.name[1])
        h.replace_with('#' * level + ' ' + h.get_text() + '\n\n')
    
    # Links
    for a in soup.find_all('a'):
        href = a.get('href', '')
        text = a.get_text()
        if href and text:
            a.replace_with(f'[{text}]({href})')
    
    # Bold
    for b in soup.find_all(['b', 'strong']):
        b.replace_with(f'**{b.get_text()}**')
    
    # Italic
    for i in soup.find_all(['i', 'em']):
        i.replace_with(f'*{i.get_text()}*')
    
    # Lists
    for ul in soup.find_all('ul'):
        for li in ul.find_all('li'):
            li.replace_with(f'- {li.get_text()}\n')
    
    for ol in soup.find_all('ol'):
        for i, li in enumerate(ol.find_all('li'), 1):
            li.replace_with(f'{i}. {li.get_text()}\n')
    
    # Get text and clean up
    text = soup.get_text()
    
    # Remove excess whitespace/newlines
    text = re.sub(r'\n\s*\n', '\n\n', text)
    text = text.strip()
    
    return text

def scrape_and_save_markdown(relevant_results):
    """
    Scrapes HTML content from URLs in relevant_results and saves as markdown files.
    
    Args:
        relevant_results: List of dictionaries containing search results with URLs
        
    Returns:
        List of dictionaries containing markdown content and metadata
    """
    # Create scraped_html directory if it doesn't exist
    pathlib.Path("scraped_markdown").mkdir(exist_ok=True)

    markdown_contents = []
    for result in relevant_results:
        if 'link' in result:
            payload = {
                "api_key": "zZwEqATYReQC4ogVNYnaLUYdm47ITPMNiFbRtvw1o7IpQbTubwZUvnkCycrWx2wCWt9wuA4aUIizsSvtQH", 
                "url": result['link'],
                "render_js": "true"
            }

            response = requests.get("https://scraping.narf.ai/api/v1/", params=payload)
            if response.status_code == 200:
                # Create filename from ID or URL if ID not available
                filename = f"{result.get('id', hash(result['link']))}.md"
                filepath = os.path.join("scraped_markdown", filename)
                
                # Convert HTML to markdown
                markdown_content = convert_html_to_markdown(response.content.decode())
                
                # Save markdown content to file
                with open(filepath, 'w', encoding='utf-8') as f:
                    f.write(markdown_content)
                
                markdown_contents.append({
                    'url': result['link'],
                    'filepath': filepath,
                    'markdown': markdown_content,
                    'title': result.get('title', ''),
                    'id': result.get('id', '')
                })
            else:
                print(f"Failed to fetch {result['link']}: Status code {response.status_code}")

    print(f"Successfully downloaded and saved {len(markdown_contents)} pages as markdown to scraped_markdown/")
    return markdown_contents

markdown_contents = scrape_and_save_markdown(relevant_results)

Failed to fetch https://www.linkedin.com/jobs/view/software-engineer-new-grad-entry-level-at-jobright-ai-4269426740: Status code 500
Successfully downloaded and saved 39 pages as markdown to scraped_markdown/


In [12]:
temp = markdown_contents

In [13]:
markdown_contents = temp
markdown_contents

[{'url': 'https://www.ycombinator.com/jobs/role/software-engineer',
  'filepath': 'scraped_markdown/1.md',
  'markdown': "Software Engineer jobs at Y Combinator startups | Y Combinator\n\n[About](/about)[What Happens at YC?](/about)[Apply](/apply)[YC Interview Guide](/interviews)[FAQ](/faq)[People](/people)[YC Blog](/blog)[Companies](/companies)[Startup Directory](/companies)[Founder Directory](/companies/founders)[Launch YC](/launches)[Startup Jobs](/jobs)[All Jobs](/jobs)[◦ Engineering](/jobs/role/software-engineer)[◦ Operations](/jobs/role/operations)[◦ Marketing](/jobs/role/marketing)[◦ Sales](/jobs/role/sales)[Startup Job Guide](/startup-job-guide)[YC Startup Jobs Blog](/blog/jobs)[Find a Co-Founder](/cofounder-matching)[Library](/library)[SAFE](/documents)[Resources](/library)[Startup School](https://startupschool.org?utm_source=yc&utm_campaign=ycdc_header)[Newsletter](/subscribe)[Requests for Startups](/rfs)[For Investors](/investors)[Hacker News](https://news.ycombinator.com/)[

In [14]:
class Details(BaseModel):
  company_name: str = Field(description="This indicates the name of the company in which the role is in.", examples = ["Meta", "Amazon"])
  role_name: str = Field(description="This indicates the name of the role in the company.", examples = ["Software Developer", "AI Engineer"])
  description: str = Field(description="A short description on what the work will involve.")
  location: Optional[str] = Field(description="This is the location or locations in which the job is available.")
  skills: Optional[str] = Field(description="Note down the skills the role is demanding or is mentioned as minimum requirements.", examples = ["Node, React", "LangChain, LangGraph"])
  experience: Optional[str] = Field(description="How many years of experience is the job looking for")
  sponsors: Optional[bool] = Field(description = "Whether it sponsors or not. True if it does False if it doesn't.")
  


def generate_summaries(markdown_contents):
  pathlib.Path("markdown_summaries").mkdir(exist_ok=True)

  summary_prompt = load_prompt("summaries_markdown_page")

  summary_template = ChatPromptTemplate.from_messages([
    ("system", summary_prompt)
  ])

  llm = ChatOpenAI(model="gpt-4o").with_structured_output(Details)
  summary_chain = summary_template | llm

  summaries = []
  for content in markdown_contents:
    try:
      summary = summary_chain.invoke({
        'markdown_input': ' '.join(content['markdown'].split()[:2000])
      })

      # Create filename for summary
      summary_filename = f"summary_{content['id']}.md"
      summary_filepath = os.path.join("markdown_summaries", summary_filename)
      
      # Save summary to file
      temp = f"""### Job Summary
      - **Company Name:** {summary.company_name}
      - **Role Name:** {summary.role_name}
      - **Description:** {summary.description}
      - **Location:** {summary.location or 'Not specified'}
      - **Skills Required:** {summary.skills or 'Not specified'}
      - **Experience Level:** {summary.experience or 'Not specified'}
      - **Sponsorship Available:** {"Yes" if summary.sponsors else "No"}
      """
      with open(summary_filepath, 'w', encoding='utf-8') as f:
        f.write(f"{temp}")
      
      # Add to summaries list
      summaries.append({
        'markdown_summary': temp,
        'url': content['url']
      })

    except Exception as e:
      print(f"Failed to summarize {content['filepath']}:{str(e)}")
  
  print(f"Successfully generated summaries for {len(markdown_contents)} pages in markdown_summaries/")
  return summaries


summaries = generate_summaries(markdown_contents)


Successfully generated summaries for 39 pages in markdown_summaries/


In [19]:
summaries

[{'markdown_summary': '### Job Summary\n      - **Company Name:** Aviator\n      - **Role Name:** Software Engineer, Recent Grad\n      - **Description:** The role involves working on the development of Google-level engineering productivity tools that enhance efficiency and facilitate workflows.\n      - **Location:** San Francisco, CA, US\n      - **Skills Required:** Frontend development skills\n      - **Experience Level:** Recent graduates\n      - **Sponsorship Available:** No\n      ',
  'url': 'https://www.ycombinator.com/jobs/role/software-engineer'},
 {'markdown_summary': '### Job Summary\n      - **Company Name:** Jobright.ai\n      - **Role Name:** Software Engineer, New Grad & Entry Level\n      - **Description:** Assist in building, maintaining, and enhancing user interfaces, APIs, and core components of the platform using Python, Typescript, and Go Lang, with a primary focus on Python and Typescript. Collaborate with cross-functional teams and contribute to open-source SD

In [20]:
class State(TypedDict):
  messages: Annotated[list, add_messages]
  summaries: List[dict]
  approved: bool
  created_summaries: Annotated[List[dict], Field(description="The summaries that have been created by the summariser")]

graph_builder = StateGraph(State)

In [21]:
llm = ChatOpenAI(model="gpt-4o")

In [22]:
with open("email_template.md", "r") as f:
  email_template = f.read()

class SummariserOutput(BaseModel):
  email_summary: str = Field(description="The summary email of the content")
  message: str = Field(description="A message to the reviewer, asking for feedback on the summary")

summariser_prompt = ChatPromptTemplate.from_messages([
  ("system", load_prompt("summariser")),
  ("placeholder", "{messages}")
])

llm_summariser = summariser_prompt | llm.with_structured_output(SummariserOutput)

def summariser(state: State):
  summariser_output = llm_summariser.invoke({"messages": state["messages"], "list_of_summaries": state["summaries"], "input_template": email_template})
  new_messages = [AIMessage(content=summariser_output.email_summary), AIMessage(content=summariser_output.message)]
  return {"messages": new_messages, "created_summaries": [summariser_output.email_summary]}

In [23]:
class ReviewerOutput(BaseModel):
  approved: bool = Field(description="Whether the summary is approved or not")
  message: str = Field(description="A message to the reviewer, asking for feedback on the summary")

reviewer_prompt = ChatPromptTemplate.from_messages([
  ("system", load_prompt("reviewer")),
  ("placeholder", "{messages}"),
])

llm_reviewer = reviewer_prompt | llm.with_structured_output(ReviewerOutput)


def reviewer(state: State):
  # Convert AIMessages to HumanMessages and vice versa
  converted_messages = []
  for msg in state["messages"]:
    if isinstance(msg, AIMessage):
      converted_messages.append(HumanMessage(content=msg.content))
    elif isinstance(msg, HumanMessage):
      converted_messages.append(AIMessage(content=msg.content))
    else:
      converted_messages.append(msg)
  state["messages"] = converted_messages
  reviewer_output = llm_reviewer.invoke({"messages": state["messages"]})
  new_messages = [HumanMessage(content=reviewer_output.message)]
  return {"messages": new_messages, "approved": reviewer_output.approved}

In [25]:
def conditional_edge(state: State) -> Literal["summariser", END]:
  if state["approved"]:
    return END
  else:
    return "summariser"


# Create and configure the graph
graph_builder.add_node("summariser", summariser)
graph_builder.add_node("reviewer", reviewer)
graph_builder.add_edge(START, "summariser")
graph_builder.add_edge("summariser", "reviewer")
graph_builder.add_conditional_edges('reviewer', conditional_edge)

# Compile and run the graph
graph = graph_builder.compile()

In [36]:
output = graph.invoke({"summaries": summaries})

In [37]:
final_summary = output["created_summaries"][-1]

display(Markdown(final_summary))

<h1>New Jobs</h1>
<h2>Key Job Highlights</h2>
<p>Here's a curated selection of the most relevant job opportunities based on your interests and profile:</p>
<h3>Software Engineer at Tech Innovators</h3>
<ul>
    <li><strong>Company:</strong> Tech Innovators</li>
    <li><strong>Role:</strong> Software Engineer</li>
    <li><strong>Short Description:</strong> Lead the development of cutting-edge software solutions. 🌟</li>
    <li><strong>Location:</strong> San Francisco, CA</li>
    <li><strong>Skills:</strong> Java, Python, Agile methodologies</li>
    <li><strong>Experience:</strong> 3+ years</li>
    <li><strong>Whether it Sponsors:</strong> Yes</li>
    <li><a href="https://www.techinnovators.com/careers/software-engineer">Job Link</a></li>
</ul>
<h3>Marketing Specialist at Creative Solutions</h3>
<ul>
    <li><strong>Company:</strong> Creative Solutions</li>
    <li><strong>Role:</strong> Marketing Specialist</li>
    <li><strong>Short Description:</strong> Develop marketing strategies for new product launches. 🚀</li>
    <li><strong>Location:</strong> Remote</li>
    <li><strong>Skills:</strong> Digital marketing, Content Creation</li>
    <li><strong>Experience:</strong> 2+ years</li>
    <li><strong>Whether it Sponsors:</strong> No</li>
    <li><a href="https://www.creativesolutions.com/jobs/marketing-specialist">Job Link</a></li>
</ul>
<h3>Data Analyst at HealthTech</h3>
<ul>
    <li><strong>Company:</strong> HealthTech</li>
    <li><strong>Role:</strong> Data Analyst</li>
    <li><strong>Short Description:</strong> Analyze healthcare data to improve patient outcomes. 🏥</li>
    <li><strong>Location:</strong> New York, NY</li>
    <li><strong>Skills:</strong> SQL, Statistical Analysis</li>
    <li><strong>Experience:</strong> 1+ years</li>
    <li><strong>Whether it Sponsors:</strong> Yes</li>
    <li><a href="https://www.healthtech.com/careers/data-analyst">Job Link</a></li>
</ul>
<h3>Project Manager at Green Energy Co</h3>
<ul>
    <li><strong>Company:</strong> Green Energy Co</li>
    <li><strong>Role:</strong> Project Manager</li>
    <li><strong>Short Description:</strong> Oversee renewable energy projects. 🌍</li>
    <li><strong>Location:</strong> Austin, TX</li>
    <li><strong>Skills:</strong> Project Management, Renewable Energy</li>
    <li><strong>Experience:</strong> 5+ years</li>
    <li><strong>Whether it Sponsors:</strong> No</li>
    <li><a href="https://www.greenenergyco.com/careers/project-manager">Job Link</a></li>
</ul>
<h3>Online Course Developer at EduInspire</h3>
<ul>
    <li><strong>Company:</strong> EduInspire</li>
    <li><strong>Role:</strong> Online Course Developer</li>
    <li><strong>Short Description:</strong> Create engaging online content for learners worldwide. 📚</li>
    <li><strong>Location:</strong> Remote</li>
    <li><strong>Skills:</strong> Instructional Design, LMS platforms</li>
    <li><strong>Experience:</strong> 2+ years</li>
    <li><strong>Whether it Sponsors:</strong> Yes</li>
    <li><a href="https://www.eduinspire.com/careers/course-developer">Job Link</a></li>
</ul>
<h2>Other Jobs Related to your Resume</h2>
<p>Explore additional positions that align with your experience and skill set:</p>
<h3>Accountant at FinanceCo</h3>
<ul>
    <li><strong>Company:</strong> FinanceCo</li>
    <li><strong>Role:</strong> Accountant</li>
    <li><strong>Short Description:</strong> Manage company accounts and financial reports. 💼</li>
    <li><a href="https://www.financeco.com/careers/accountant">Job Link</a></li>
</ul>
<h3>Graphic Designer at ArtHub</h3>
<ul>
    <li><strong>Company:</strong> ArtHub</li>
    <li><strong>Role:</strong> Graphic Designer</li>
    <li><strong>Short Description:</strong> Design visuals for a variety of digital platforms. 🎨</li>
    <li><a href="https://www.arthub.com/careers/graphic-designer">Job Link</a></li>
</ul>
<h3>Network Engineer at WorldConnect</h3>
<ul>
    <li><strong>Company:</strong> WorldConnect</li>
    <li><strong>Role:</strong> Network Engineer</li>
    <li><strong>Short Description:</strong> Maintain and enhance network infrastructure. 🔌</li>
    <li><a href="https://www.worldconnect.com/careers/network-engineer">Job Link</a></li>
</ul>
<h3>Research Scientist at NanoTech</h3>
<ul>
    <li><strong>Company:</strong> NanoTech</li>
    <li><strong>Role:</strong> Research Scientist</li>
    <li><strong>Short Description:</strong> Conduct research on advanced nano-materials. 🔬</li>
    <li><a href="https://www.nanotech.com/careers/research-scientist">Job Link</a></li>
</ul>
<h3>Cybersecurity Analyst at SecureSafe</h3>
<ul>
    <li><strong>Company:</strong> SecureSafe</li>
    <li><strong>Role:</strong> Cybersecurity Analyst</li>
    <li><strong>Short Description:</strong> Develop security protocols to protect data. 🔐</li>
    <li><a href="https://www.securesafe.com/careers/cybersecurity-analyst">Job Link</a></li>
</ul>
<h3>Travel Consultant at TravelAdventures</h3>
<ul>
    <li><strong>Company:</strong> TravelAdventures</li>
    <li><strong>Role:</strong> Travel Consultant</li>
    <li><strong>Short Description:</strong> Plan and book travel arrangements for clients. ✈️</li>
    <li><a href="https://www.traveladventures.com/careers/travel-consultant">Job Link</a></li>
</ul>
<h3>Event Planner at BrightSkies</h3>
<ul>
    <li><strong>Company:</strong> BrightSkies</li>
    <li><strong>Role:</strong> Event Planner</li>
    <li><strong>Short Description:</strong> Organize and execute high-profile events. 📅</li>
    <li><a href="https://www.brightskies.com/careers/event-planner">Job Link</a></li>
</ul>
<h3>Chef at FoodiesUnited</h3>
<ul>
    <li><strong>Company:</strong> FoodiesUnited</li>
    <li><strong>Role:</strong> Chef</li>
    <li><strong>Short Description:</strong> Prepare gourmet meals for our upscale clientele. 🍽️</li>
    <li><a href="https://www.foodiesunited.com/careers/chef">Job Link</a></li>
</ul>
<h3>Educational Consultant at LearningPartners</h3>
<ul>
    <li><strong>Company:</strong> LearningPartners</li>
    <li><strong>Role:</strong> Educational Consultant</li>
    <li><strong>Short Description:</strong> Advise schools on curriculum development. 📑</li>
    <li><a href="https://www.learningpartners.com/careers/educational-consultant">Job Link</a></li>
</ul>
<h3>Product Manager at InnovateCH</h3>
<ul>
    <li><strong>Company:</strong> InnovateCH</li>
    <li><strong>Role:</strong> Product Manager</li>
    <li><strong>Short Description:</strong> Lead the lifecycle of new tech products. 📈</li>
    <li><a href="https://www.innovatech.com/careers/product-manager">Job Link</a></li>
</ul>

In [47]:
def send_email(email_content: str):
    """Send email using Sendinblue API"""
    configuration = sib_api_v3_sdk.Configuration()
    configuration.api_key['api-key'] = "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"
    print(configuration.api_key['api-key'])
    
    api_instance = sib_api_v3_sdk.TransactionalEmailsApi(sib_api_v3_sdk.ApiClient(configuration))
    
    email_params = {
        "subject": "Jobs",
        "sender": {"name": "Dhiraj Shah", "email": "dhirajsanjayshah@gmail.com"},
        "html_content": email_content,
        "to": [{"email": "dhirajssh@gmail.com", "name": "Dhiraj Shah"}],
        "params": {"subject": "Jobs"}
    }
    
    send_smtp_email = sib_api_v3_sdk.SendSmtpEmail(**email_params)
    
    try:
        api_response = api_instance.send_transac_email(send_smtp_email)
        print(api_response)
    except ApiException as e:
        print(f"Exception when calling SMTPApi->send_transac_email: {e}\n")



send_email(final_summary)


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
{'message_id': '<202507191818.14492062898@smtp-relay.mailin.fr>',
 'message_ids': None}
