In [33]:
from typing import List
from dotenv import load_dotenv

from IPython.display import display, Markdown, HTML


from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_community.document_loaders import SeleniumURLLoader, UnstructuredURLLoader, PlaywrightURLLoader



from prompt import JOB_DESCRIPTION_EXTRACTOR
from config import MODEL_NAME

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
load_dotenv()

True

In [39]:
urls = [
    # "https://www.google.com/about/careers/applications/jobs/results/111711038985904838-software-engineering-manager-ii-site-reliability-engineering-shopping"
    "https://jobs.careers.microsoft.com/global/en/job/1761526/Principal-Applied-Machine-Learning-Engineer",
]


In [14]:
loader = UnstructuredURLLoader(urls=urls)

data = loader.load()

data[0]

Document(metadata={'source': 'https://www.google.com/about/careers/applications/jobs/results/111711038985904838-software-engineering-manager-ii-site-reliability-engineering-shopping'}, page_content='')

In [15]:
loader = PlaywrightURLLoader(urls=urls, remove_selectors=["header", "footer"])

data = await loader.aload()

data[0]

Document(metadata={'source': 'https://www.google.com/about/careers/applications/jobs/results/111711038985904838-software-engineering-manager-ii-site-reliability-engineering-shopping'}, page_content='')

In [16]:
loader = SeleniumURLLoader(urls=urls)

data = loader.load()
data[0]

Document(metadata={'source': 'https://www.google.com/about/careers/applications/jobs/results/111711038985904838-software-engineering-manager-ii-site-reliability-engineering-shopping', 'title': 'Software Engineering Manager II, Site Reliability Engineering, Shopping — Google Careers', 'description': 'No description found.', 'language': 'en-US'}, page_content='')

In [18]:
from selenium import webdriver

In [19]:
driver = webdriver.Firefox()

In [40]:
driver.get(urls[0])

In [41]:
from bs4 import BeautifulSoup

In [42]:
soup = BeautifulSoup(driver.page_source, features="html.parser")

for script in soup(["script", "style"]):
    script.extract()    # rip it out

text = soup.get_text()

# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
text = '\n'.join(chunk for chunk in chunks if chunk)


In [59]:
len(text)

9962

In [46]:
model = ChatOpenAI(model=MODEL_NAME, temperature=0, verbose=True)


In [None]:
import html2text


In [47]:

messages = [
    SystemMessage(
        content=JOB_DESCRIPTION_EXTRACTOR
    ),
    HumanMessage(
        content=text
    )
]

# Define a chat model and invoke it with the messages
response = model.invoke(messages)

In [51]:
display(Markdown(response.content))

**Job Title:** Principal Applied Machine Learning Engineer  
**Location:** Redmond, Washington, United States  
**Date Posted:** August 28, 2024  
**Job Number:** 1761526  
**Work Site:** Up to 100% work from home  
**Travel:** 0-25%  
**Role Type:** Individual Contributor  
**Profession:** Research, Applied, & Data Sciences  
**Discipline:** Applied Sciences  
**Employment Type:** Full-Time  

### Overview
Microsoft Security is seeking a Principal Applied Machine Learning Engineer to join a dedicated team focused on cybersecurity. The role involves leveraging machine learning and artificial intelligence to address the evolving challenges in digital security. The team aims to create innovative solutions that enhance security for users, customers, and developers.

### Responsibilities
- Develop and execute advanced research projects in machine learning and AI for cybersecurity, particularly in cloud security.
- Stay updated on industry trends and advancements, providing expertise to the team.
- Communicate research findings to senior leadership and external audiences through publications and presentations.
- Write production-ready code to implement research projects into AI products.

### Qualifications

#### Required/Minimum Qualifications
- **Education:** 
  - Bachelor's Degree in Statistics, Econometrics, Computer Science, Electrical or Computer Engineering, or related field AND 6+ years of related experience
  - OR Master's Degree in the same fields AND 4+ years of related experience
  - OR Doctorate in the same fields AND 3+ years of related experience
  - OR equivalent experience.
  
- **Experience:**
  - 5+ years of hands-on experience in programming languages such as C# or Python.
  - Proven problem-solving and architecture design skills with the ability to write high-quality production code.
  - Experience working with cross-functional teams and strong communication skills.

#### Additional or Preferred Qualifications
- **Education:** 
  - Master's Degree AND 9+ years of related experience
  - OR Doctorate AND 6+ years of related experience.
  
- **Experience:**
  - 5+ years of experience in creating publications (patents, peer-reviewed papers).
  - 3+ years of experience developing and deploying live production systems.
  - 2+ years of experience presenting at conferences as an invited speaker.
  - Prior experience in AI product development and familiarity with LLM/NLP.
  - Knowledge of large language models, deep learning, and cloud technologies.

### Compensation
- **Base Pay Range (U.S.):** $137,600 - $267,000 per year.
- **Base Pay Range (San Francisco Bay Area and New York City):** $180,400 - $294,000 per year.

### Benefits
- Industry-leading healthcare
- Educational resources
- Discounts on products and services
- Generous time away
- Maternity and paternity leave
- Opportunities to network and connect

### Application Information
Microsoft will accept applications for this role until September 18, 2024. Microsoft is an equal opportunity employer and encourages applications from all qualified individuals.

For more details and to apply, visit the Microsoft Careers website.