# AgentJo CV Generator
- Uses Selenium headless web browser to get information via query (generated by agent)
- Able to reformat output to CV format as required by user

Created by John Tan Chong Min in Nov 2024, Modified 4 Dec 2024 for better CV generationn

In [1]:
from agentjo import *

In [2]:
import os
import requests
import openai
import tiktoken
from langchain.text_splitter import TokenTextSplitter
import json
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import pandas as pd
import pprint
import re
from selenium import webdriver
from selenium.webdriver.firefox.options import Options

# AZURE OPENAI 

In [3]:
### Put in your Azure OpenAI keys here ###
AZURE_OPENAI_KEY = ""
AZURE_OPENAI_ENDPOINT = ""
API_VERSION = ""

In [5]:
from openai import AzureOpenAI

azure_open_ai_client = AzureOpenAI(
    api_key = AZURE_OPENAI_KEY,
    azure_endpoint = AZURE_OPENAI_ENDPOINT,
    api_version = API_VERSION
)
 
def azure_llm(system_prompt: str, user_prompt: str) -> str:
    response = azure_open_ai_client.chat.completions.create(
        model = "gpt-4o-mini",
        messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
        temperature = 0
    )
 
    return response.choices[0].message.content

In [6]:
azure_llm('Give me 5 items of a category', 'names')

'Sure! Here are five names:\n\n1. Emily\n2. Michael\n3. Sophia\n4. James\n5. Olivia'

# AWS BEDROCK

In [7]:
### Put in your AWS keys here ###
MY_KEY = ""
MY_SECRET = ""

In [9]:
import boto3
import json

def claude(system_prompt: str, user_prompt: str):
    bedrock = boto3.client(service_name="bedrock-runtime",
                        aws_access_key_id=MY_KEY,
                        aws_secret_access_key=MY_SECRET,
                        region_name="us-west-2",
                        )
    
    body = json.dumps({
    "max_tokens": 16000,
    "temperature": 0,
    "system": system_prompt,
    "messages": [{"role": "user", "content": user_prompt}],
    "anthropic_version": "bedrock-2023-05-31"
    })
    

    response = bedrock.invoke_model(body=body, 
                                    # modelId="anthropic.claude-3-haiku-20240307-v1:0"
                                    modelId = "anthropic.claude-3-5-sonnet-20241022-v2:0"
                                    # modelId="anthropic.claude-3-sonnet-20240229-v1:0"
                                    # modelId="anthropic.claude-3-opus-20240229-v1:0"
                                   )
    response_body = json.loads(response.get("body").read())

    response_text = response_body.get("content")[0].get("text")
    return response_text

def llama(system_prompt: str, user_prompt: str):
    bedrock = boto3.client(service_name="bedrock-runtime",
                        aws_access_key_id=MY_KEY,
                        aws_secret_access_key=MY_SECRET,
                        region_name="us-west-2",
                        )
    
    # Define the user message to send.
    user_message = 'System Prompt:' + system_prompt + '\nUser Prompt' + user_prompt

    # Embed the message in Llama 3's prompt format.
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>
{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
    
    body = json.dumps({
    "max_gen_len": 2048,
    "temperature": 0,
    "prompt": prompt,
    })

    response = bedrock.invoke_model(body=body, 
                                    # modelId = "meta.llama3-1-405b-instruct-v1:0"
                                    modelId = "meta.llama3-1-70b-instruct-v1:0"
                                   )
    
    # Decode the native response body.
    model_response = json.loads(response["body"].read())

    # Extract and print the generated text.
    response_text = model_response["generation"]
    
    return response_text

In [10]:
claude('Give me 5 items of a category', 'names')

'Here are 5 names:\n\n1. Emma\n2. Alexander\n3. Sofia\n4. Lucas\n5. Isabella'

# OPENAI

In [11]:
### Put in your OpenAI API key here ###
os.environ['OPENAI_API_KEY'] = ""

In [13]:
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import OpenAI
    
    # define your own LLM here
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

In [14]:
llm('You are a friendly assistant', 'Tell me a joke')

'Sure! Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!'

# Custom Search API
- https://developers.google.com/custom-search/v1/introduction

In [15]:
### Fill in your api_key and search engine (cx) here ###
api_key = ""
cx = ""

In [17]:
def spaced_text(soup):
    return " ".join(t.strip() for t in soup.findAll(string=True) if t.parent.name not in ['style', 'script', 'head', 'title', 'meta', '[document]'])

def start_driver():
    try:
        driver.quit()
        del driver
        print('Deleting existing driver')
    except Exception as e:
        print('No existing driver to delete')
        
    print('Initializing new driver')
    options = Options()
    options.add_argument("--headless")
    
    # Set the User-Agent
    options.add_argument("user-agent=Mozilla")

    # Set the option to accept all SSL certificates by default
    options.add_argument('--ignore-certificate-errors')

    # initialize a browser
    driver = webdriver.Firefox(options=options)
    print('Driver initialized:', driver)
    return driver

In [18]:
def view_url(url, driver = start_driver(), timeout = 2):
    ''' Views how GPT would see a webpage '''
    try:
        # Get the webpage
        driver.get(url)

        # Let's wait for the JavaScript to run with an implicit wait
        # Here we wait up to timeout seconds for the elements to become available
        driver.implicitly_wait(timeout)

        # Get the page source and parse it with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Extract metadata information
        text_content = 'Metadata:\n'
        meta_tags = soup.find_all("meta")
        for meta_tag in meta_tags:
            # Get the 'name' and 'content' attributes if present
            name = meta_tag.get("name")
            contents = meta_tag.get("content")
            if name and contents:
                text_content += f"{name}: {contents}\n"
                
        text_content = text_content + '\nMain Text:\n' + spaced_text(soup)

        # Do space processing
        text_content = re.sub('\n+', '\n', text_content)
        text_content = re.sub('\ {2,}', ' ', text_content)

    except Exception as e:
        print(e)
        return 'Unable to retrieve data'

    return text_content

No existing driver to delete
Initializing new driver
Driver initialized: <selenium.webdriver.firefox.webdriver.WebDriver (session="2c49a676-1d03-4bb7-a4fa-5fb86007bbeb")>


# Agent Functions - Search Web

In [19]:
def search_web(query: str) -> str:
    ''' Searches the web based on query to give response '''
    datalist = []
    # this is for the first 10 sites for each search term
    for search_term in [query]:
        # Send a GET request to the Custom Search API
        response = requests.get(f'https://www.googleapis.com/customsearch/v1?key={api_key}&cx={cx}&q={search_term}')

        # Extract the relevant information from the response
        data = response.json()

        datalist.append(data)
        
    # Use the Google results and populate the URL dictionary
    urldict = {}
    mainurllist = []
    for num, data in enumerate(datalist):
        print(f'Doing split {num+1} out of {len(datalist)}, search term: {query}')
        # Process the search results and get list of secondary sources
        items = data.get('items', [])
        for item in items:
            title = item.get('title')
            snippet = item.get('snippet')
            url = item.get('link')
            # if url has been done before, skip it
            if url in urldict: continue

            urldict[url] = title
            mainurllist.append(url)
            
    # just read the first url from mainurllist
    url = mainurllist[0]
    out = view_url(url)
    return out

In [20]:
search_web('John Tan Chong Min google scholar')

Doing split 1 out of 1, search term: John Tan Chong Min google scholar


'Metadata:\nreferrer: origin-when-cross-origin\nviewport: width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=2\nformat-detection: telephone=no\ndescription: \u202aNational University of Singapore\u202c - \u202a\u202aCited by 80\u202c\u202c - \u202aLearning Abilities of LLMs\u202c - \u202aMemory and Cognition\u202c - \u202aMulti-Agent Learning\u202c\ntwitter:card: summary\nMain Text:\nLoading... The system can\'t perform the operation now. Try again later. Citations per year Duplicate citations The following articles are merged in Scholar. Their combined citations are counted only for the first article. Merged citations This "Cited by" count includes citations to the following articles in Scholar. The ones marked * may be different from the article in the profile. Add co-authors Co-authors Follow New articles by this author New citations to this author New articles related to this author\'s research Email address for updates Done [if lte IE 9]><div class="gs_alrt" style="p

In [29]:
agent = Agent('Person Profile Builder', 
              '''Writes a professional CV based on profile.
Prioritise google scholar and linkedin profiles
Use only information found on the web''',
              default_to_llm = False,
              llm = llm).assign_functions(search_web)

In [30]:
agent.print_functions()

Name: end_task
Description: Passes the final output to the user
Input: []
Output: {}

Name: search_web
Description:  Searches the web based on <query: str> to give response 
Input: ['query']
Output: {'output_1': 'str'}



In [31]:
agent.reset()
output = agent.run('John Tan Chong Min')

[1m[30mObservation: No subtasks have been completed yet for the assigned task of building a professional CV for John Tan Chong Min.[0m
[1m[32mThoughts: To create a professional CV, I need to gather relevant information about John Tan Chong Min from his Google Scholar and LinkedIn profiles. Since no information has been collected yet, the next step is to search the web for his profiles.[0m
[1m[34mSubtask identified: Search the web for John Tan Chong Min's Google Scholar and LinkedIn profiles to gather information for the CV.[0m
Calling function search_web with parameters {'query': 'John Tan Chong Min Google Scholar profile'}
Doing split 1 out of 1, search term: John Tan Chong Min Google Scholar profile
> {'output_1': 'Metadata:\nreferrer: origin-when-cross-origin\nviewport: width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=2\nformat-detection: telephone=no\ndescription: \u202aNational University of Singapore\u202c - \u202a\u202aCited by 80\u202c\u202c - \u202aLea

In [32]:
output = agent.reply_user("Write a CV for an assistant professor position")

John Tan Chong Min
National University of Singapore
Email: [Verified email at nus.edu.sg]

**Objective**
To secure an assistant professor position where I can leverage my expertise in learning abilities of large language models (LLMs), memory and cognition, and multi-agent learning to contribute to academic research and teaching.

**Education**
- PhD in [Field of Study] - National University of Singapore, [Year of Completion]

**Research Interests**
- Learning Abilities of LLMs
- Memory and Cognition
- Multi-Agent Learning

**Publications**
1. Tan, C. M. J., & Motani, M. (2020). Dropnet: Reducing neural network complexity via iterative pruning. International Conference on Machine Learning, 9356-9366. Cited by 59.
2. Tan, J. C. M., & Motani, M. (2023). Large language model (LLM) as a system of multiple expert agents: An approach to solve the abstraction and reasoning corpus (ARC) challenge. arXiv preprint arXiv:2310.05146. Cited by 9.
3. Tan, J. C. M., & Motani, M. (2024). LLMs as a sys

## How about a more targeted CV
- https://openai.com/careers/machine-learning-engineer-applied-ai/

In [34]:
output = agent.reply_user('''Write a CV for this job profile:
```
About the Team

OpenAI is at the forefront of artificial intelligence, driving innovation and shaping the future with cutting-edge research. Our mission is to ensure that AI's benefits reach everyone. We are looking for visionary Machine Learning Engineers to join our Applied Group, where you'll transform groundbreaking research into real-world applications that can change industries, enhance human creativity, and solve complex problems.

About the Role

As a Machine Learning Engineer in OpenAI's Applied Group, you will have the opportunity to work with some of the brightest minds in AI. You'll contribute to deploying state-of-the-art models in production environments, helping turn research breakthroughs into tangible solutions. If you're excited about making AI technology accessible and impactful, this role is your chance to make a significant mark.

In this role, you will:

Innovate and Deploy: Design and deploy advanced machine learning models that solve real-world problems. Bring OpenAI's research from concept to implementation, creating AI-driven applications with a direct impact.

Collaborate with the Best: Work closely with researchers, software engineers, and product managers to understand complex business challenges and deliver AI-powered solutions. Be part of a dynamic team where ideas flow freely and creativity thrives.

Optimize and Scale: Implement scalable data pipelines, optimize models for performance and accuracy, and ensure they are production-ready. Contribute to projects that require cutting-edge technology and innovative approaches.

Learn and Lead: Stay ahead of the curve by engaging with the latest developments in machine learning and AI. Take part in code reviews, share knowledge, and lead by example to maintain high-quality engineering practices.

Make a Difference: Monitor and maintain deployed models to ensure they continue delivering value. Your work will directly influence how AI benefits individuals, businesses, and society at large.

You might thrive in this role if you:

Master's/  PhD degree in Computer Science, Machine Learning, Data Science, or a related field. 

Demonstrated experience in deep learning and transformers models

Proficiency in frameworks like PyTorch or Tensorflow

Strong foundation in data structures, algorithms, and software engineering principles.

Experience with search relevance, ads ranking  or LLMs is a plus.

Are familiar with methods of training and fine-tuning large language models, such as distillation, supervised fine-tuning, and policy optimization

Excellent problem-solving and analytical skills, with a proactive approach to challenges.

Ability to work collaboratively with cross-functional teams.

Ability to move fast in an environment where things are sometimes loosely defined and may have competing priorities or deadlines

Enjoy owning the problems end-to-end, and are willing to pick up whatever knowledge you're missing to get the job done

About OpenAI

OpenAI is an AI research and deployment company dedicated to ensuring that general-purpose artificial intelligence benefits all of humanity. We push the boundaries of the capabilities of AI systems and seek to safely deploy them to the world through our products. AI is an extremely powerful tool that must be created with safety and human needs at its core, and to achieve our mission, we must encompass and value the many different perspectives, voices, and experiences that form the full spectrum of humanity. 

We are an equal opportunity employer and do not discriminate on the basis of race, religion, national origin, gender, sexual orientation, age, veteran status, disability or any other legally protected status. 

OpenAI Affirmative Action and Equal Employment Opportunity Policy Statement

For US Based Candidates: Pursuant to the San Francisco Fair Chance Ordinance, we will consider qualified applicants with arrest and conviction records.

We are committed to providing reasonable accommodations to applicants with disabilities, and requests can be made via this link.

OpenAI Global Applicant Privacy Policy

At OpenAI, we believe artificial intelligence has the potential to help people solve immense global challenges, and we want the upside of AI to be widely shared. Join us in shaping the future of technology.```''')

John Tan Chong Min
National University of Singapore
Email: [Verified email at nus.edu.sg]

**Objective**
To secure a Machine Learning Engineer position at OpenAI where I can leverage my expertise in deep learning, large language models (LLMs), and innovative problem-solving to contribute to the development of AI-driven applications that have a significant impact on industries and society.

**Education**
- PhD in [Field of Study] - National University of Singapore, [Year of Completion]

**Research Interests**
- Learning Abilities of LLMs
- Memory and Cognition
- Multi-Agent Learning

**Relevant Experience**
- Demonstrated experience in deep learning and transformers models through various research projects, including the development of models for memory-augmented reinforcement learning and multi-agent systems.
- Proficient in frameworks like PyTorch and TensorFlow, as evidenced by my publications and research work.
- Strong foundation in data structures, algorithms, and software enginee