---
format:
    html:
        embed-resources: true
---

---
format:
    html:
        embed-resources: true
---

# Optional Bonus: Resume and cover letter creator 

This is not required, but will earn you up to +5 bonus points of extra credit. 

Using a combination of an LLM API wrapper and Quarto, write a pipeline that will create a resume PDF (or HTML), and a cover letter, for each of the given job descriptions. 

If you are doing this, for privacy reasons, you MUST do it for a fake "Jane-doe" (e.g. fake education, fake experience, etc.). 

Here is something to get you started, it is partially complete and quite hacky.

https://jfh.georgetown.domains/centralized-lecture-content/content/general-resources/share/resume-tool/resume-tool%202.zip

In [16]:
import pandas as pd
from openai import OpenAI
import os
import yaml
from pathlib import Path
import subprocess
import json
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()

True

In [17]:

class ResumeGenerator:
    def __init__(self, openai_api_key):
        """Initialize the resume generator with OpenAI API key"""
        self.client = OpenAI(api_key=openai_api_key)
        
        # Load Jane Doe's base information
        self.candidate_info = {
            "name": "Jane Doe",
            "email": "jane.doe@email.com",
            "phone": "(555) 123-4567",
            "linkedin": "linkedin.com/in/jane-doe",
            "github": "github.com/jane-doe",
            "education": [
                {
                    "degree": "Master of Science in Data Science",
                    "institution": "State University",
                    "location": "Springfield, ST",
                    "graduation": "May 2023",
                    "gpa": "3.92/4.0"
                },
                {
                    "degree": "Bachelor of Science in Computer Science",
                    "institution": "Tech University",
                    "location": "Techville, ST",
                    "graduation": "May 2021",
                    "gpa": "3.85/4.0"
                }
            ],
            "experience": [
                {
                    "title": "Data Scientist",
                    "company": "Tech Solutions Inc.",
                    "location": "Remote",
                    "dates": "June 2023 - Present",
                    "highlights": [
                        "Led development of ML models improving customer retention by 25%",
                        "Implemented automated data pipeline reducing processing time by 60%",
                        "Mentored 3 junior data scientists in ML best practices"
                    ]
                },
                {
                    "title": "Data Science Intern",
                    "company": "Innovation Corp",
                    "location": "Springfield, ST",
                    "dates": "May 2022 - August 2022",
                    "highlights": [
                        "Developed predictive models for customer behavior",
                        "Created interactive dashboards using Python and Streamlit",
                        "Optimized ETL processes improving efficiency by 40%"
                    ]
                }
            ],
            "skills": {
                "programming": ["Python", "R", "SQL", "Java"],
                "tools": ["TensorFlow", "PyTorch", "scikit-learn", "Docker"],
                "databases": ["PostgreSQL", "MongoDB", "Redis"],
                "cloud": ["AWS", "GCP", "Azure"],
                "other": ["Git", "CI/CD", "Agile"]
            }
        }

    def _generate_tailored_content(self, job_description):
        """Generate tailored resume content and cover letter using OpenAI API"""
        
        # First, analyze the job description to identify key requirements
        analysis_prompt = f"""
        Analyze this job description and identify:
        1. Key technical skills required
        2. Soft skills emphasized
        3. Main responsibilities
        4. Company values and culture indicators
        
        Job Description:
        {job_description}
        """
        
        analysis_response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": analysis_prompt}
            ]
        )
        
        job_analysis = analysis_response.choices[0].message.content
        
        # Generate tailored resume content
        resume_prompt = f"""
        Using the job analysis below and Jane Doe's information, generate:
        1. A tailored professional summary (2-3 sentences)
        2. Prioritized and tailored versions of her experience bullet points
        3. A relevant skills section
        
        Job Analysis:
        {job_analysis}
        
        Jane's Information:
        {json.dumps(self.candidate_info, indent=2)}
        
        Format the response as JSON with keys: 'summary', 'experience_bullets', 'relevant_skills'
        """
        
        resume_response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": resume_prompt}
            ]
        )
        
        resume_content = json.loads(resume_response.choices[0].message.content)
        
        # Generate cover letter
        cover_letter_prompt = f"""
        Write a professional cover letter for Jane Doe using the job analysis and her background.
        The letter should be engaging, specific to the role, and highlight relevant experiences.
        
        Job Analysis:
        {job_analysis}
        
        Jane's Background:
        {json.dumps(self.candidate_info, indent=2)}
        """
        
        cover_letter_response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": cover_letter_prompt}
            ]
        )
        
        cover_letter = cover_letter_response.choices[0].message.content
        
        return resume_content, cover_letter

    def generate_documents(self, job_df, output_dir):
        """Generate resume and cover letter for each job description"""
        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)
        
        for idx, row in job_df.iterrows():
            company_name = row['Company Name'].replace(' ', '_')
            job_title = row['Job Title'].replace(' ', '_')
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            base_filename = f"{company_name}_{job_title}_{timestamp}"
            
            # Generate content
            resume_content, cover_letter = self._generate_tailored_content(row['Job Responsibilities/Duties'])
            
            # Create resume Quarto document
            resume_yaml = {
                "title": "Jane Doe",
                "format": {
                    "pdf": {
                        "documentclass": "article",
                        "geometry": "margin=1in",
                        "fontsize": "11pt"
                    }
                }
            }
            
            resume_qmd = f"""---
{yaml.dump(resume_yaml)}
---

::: {{.content-visible when-format="pdf"}}
| {self.candidate_info['email']} | {self.candidate_info['phone']} |
| {self.candidate_info['linkedin']} | {self.candidate_info['github']} |
:::

## Professional Summary

{resume_content['summary']}

## Education

{self._format_education()}

## Professional Experience

{self._format_experience(resume_content['experience_bullets'])}

## Skills

{self._format_skills(resume_content['relevant_skills'])}
"""
            
            resume_file = output_path / f"{base_filename}_resume.qmd"
            resume_file.write_text(resume_qmd)
            
            # Create cover letter Quarto document
            cover_letter_yaml = {
                "title": f"Cover Letter - {row['Job Title']} at {row['Company Name']}",
                "format": {
                    "pdf": {
                        "documentclass": "article",
                        "geometry": "margin=1in",
                        "fontsize": "11pt"
                    }
                }
            }
            
            cover_letter_qmd = f"""---
{yaml.dump(cover_letter_yaml)}
---

{cover_letter}
"""
            
            cover_letter_file = output_path / f"{base_filename}_cover_letter.qmd"
            cover_letter_file.write_text(cover_letter_qmd)
            
            # Render documents using Quarto
            subprocess.run(['quarto', 'render', str(resume_file)], check=True)
            subprocess.run(['quarto', 'render', str(cover_letter_file)], check=True)

    def _format_education(self):
        """Format education section for Quarto document"""
        edu_text = ""
        for edu in self.candidate_info['education']:
            edu_text += f"""### {edu['institution']}
* {edu['degree']}
* {edu['location']}
* Graduated: {edu['graduation']}
* GPA: {edu['gpa']}

"""
        return edu_text
    
    
    def _format_experience(self, tailored_bullets):
        """Format experience section for Quarto document"""
        exp_text = ""
        for exp in self.candidate_info['experience']:
            exp_text += f"""### {exp['title']} | {exp['company']}
    * {exp['location']} | {exp['dates']}
    """
            # If tailored_bullets is a list, just iterate through it
            if isinstance(tailored_bullets, list):
                for bullet in tailored_bullets:
                    exp_text += f"* {bullet}\n"
            else:  # assuming it's a dict and the title is a key
                for bullet in tailored_bullets.get(exp['title'], exp['highlights']):
                    exp_text += f"* {bullet}\n"
            exp_text += "\n"
        return exp_text


    def _format_skills(self, relevant_skills):
        """Format skills section for Quarto document"""
        skills_text = ""
        for category, skills in relevant_skills.items():
            skills_text += f"* **{category}:** {', '.join(skills)}\n"
        return skills_text

In [18]:
def main():
    try:
        job_df = pd.read_csv('data/processed-jobs-1.csv')
    except UnicodeDecodeError:
        job_df = pd.read_csv('data/processed-jobs-1.csv', encoding='latin-1')
    except:
        job_df = pd.read_csv('data/processed-jobs-1.csv', encoding='cp1252')
    
    # Extract the first 3 rows from job_df as a small sample for testing
    job_df_new = job_df.iloc[0:3]

    # Initialize generator with your OpenAI API key
    openai_api_key = os.getenv('OPENAI_API_KEY')
    generator = ResumeGenerator(openai_api_key)
    
    # Generate documents
    generator.generate_documents(job_df_new, 'job_documents')

if __name__ == "__main__":
    main()

[1mpandoc [22m
  to: latex
  output-file: 'Cardinal_Health_Data_Scientist,_Data_Science_20241109_195416_resume.tex'
  standalone: true
  pdf-engine: xelatex
  variables:
    graphics: true
    tables: true
  default-image-extension: pdf
  
[1mmetadata[22m
  block-headings: true
  title: Jane Doe
  documentclass: article
  fontsize: 11pt
  geometry: margin=1in
  
[1m[34m
Rendering PDF[39m[22m
[1m[34mrunning xelatex - 1[39m[22m
  This is XeTeX, Version 3.141592653-2.6-0.999996 (TeX Live 2024) (preloaded format=xelatex)
   restricted \write18 enabled.
  entering extended mode
  
[1m[34mrunning xelatex - 2[39m[22m
  This is XeTeX, Version 3.141592653-2.6-0.999996 (TeX Live 2024) (preloaded format=xelatex)
   restricted \write18 enabled.
  entering extended mode
  

Output created: Cardinal_Health_Data_Scientist,_Data_Science_20241109_195416_resume.pdf

[1mpandoc [22m
  to: latex
  output-file: 'Cardinal_Health_Data_Scientist,_Data_Science_20241109_195416_cover_letter.tex'