In [13]:
from kaggle_secrets import UserSecretsClient
from openai import OpenAI
import pandas as pd
import chardet
import os

In [4]:
os.environ['OPENAI_API_KEY'] = UserSecretsClient().get_secret("OPENAI_API_KEY")

In [69]:
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

In [70]:
class XYZ():
    
    def __init__(self, job_desc):
        self.job_desc = job_desc
        self.prompt = {
            "role": "system", 
            "content": f"""The XYZ formula is “Did [X] as measured by [Y] by doing [Z].”
                If this bullet point is not in XYZ formatting, then rewrite the following bullet point to match the formula and highlight the technical work, challenges faced and overcame, the impact of their work.
                If you do not have enough context, ask the user for some after giving suggestions to improve it. Whenever you are providing a number metric, replace it with 'XX' so that the user can fill it in.
                The output should be 1-2 lines long with just one sentence and do not end it with a period.
                Don't use personal pronouns.
                Avoid using apostrophes ', ampersands &, and slashes /.
                Avoid the excessive use of adjectives and adverbs.
                Use digits instead of spelling out numbers.
                Each bullet should begin with a strong, past-tense action verb: 
                Good examples of action verbs: analyzed, architected, automated, built, created, decreased, designed, developed, implemented, improved, optimized, published, reduced, refactored
                Try your best to make it relevant to the following needs: {self.job_desc}."""
        }

    def write_to_db(self):
        # will do later when I have a database
        pass


    def input_bullet_point(self, bullet_point, model="gpt-4o-mini"):
        messages = [self.prompt, {"role": "user", "content": bullet_point}]
        completion = client \
            .chat \
            .completions \
            .create(model=model, messages=messages)
        
        answer = completion.choices[0].message.content
        self.write_to_db()
        return answer

In [71]:
class Poptart(): # meow
    
    def __init__(self):
        self.load_model_instructions()
        self.initialize_model()

    
    def initialize_model(self):
        self.messages = [
            {"role": "system", "content": self.model_instructions},
            {"role": "user", "content": "Introduce yourself."}
        ]
        response = self.chat_with_model(self.messages)
        self.messages.append({"role": "assistant", "content": response})
        print(response)

    
    def load_model_instructions(self):
        f = open("/kaggle/input/poptart-instructions/instructions.txt", "r")
        self.model_instructions = f.read()
        f.close()


    def chat_with_model(self, messages, model="gpt-4o"):
        completion = client \
            .chat \
            .completions \
            .create(model=model, messages=messages)
        
        return completion.choices[0].message.content

    
    def add_chat(self, input_text, model="gpt-4o-mini"):
        self.messages.append({"role": "user", "content": input_text})
        response = self.chat_with_model(self.messages)
        
        self.messages.append({"role": "assistant", "content": response})
        print(response)
        return response

    
    def add_prompt_instructions(self, input_text):
        self.messages.append({"role": "system", "content": input_text})

In [94]:
class Section():

    def __init__(self, title):
        self.title = title
        self.dates = ""
        self.content = []

    def get_title(self):
        return self.title

    def get_dates(self):
        return self.dates

    def get_content(self):
        return self.content

    def set_dates(self, dates):
        self.dates = dates

    def add_content(self, item):
        self.content.append(item)

    def display_content(self):
        [print(item) for item in self.content]

In [73]:
poptart = Poptart()

Well, Human, allow me to grace you with my presence. I am Poptart, a cat of refined taste and even more refined waistline. I'm here to sprinkle a bit of my exquisite expertise on the art of resume writing, particularly for tech professionals. My mom, a rather brilliant human, created this nifty app to aid in her job search, and I've decided to step down from my throne to assist you in yours.

Now, since you're here seeking my guidance, why don't you tell me a bit about the job you're applying for? I suppose that's a start, even for someone who might not fully comprehend the intricacies of a properly prepared tuna. Oh, did I mention I'm a bit peckish? A lovely pate would certainly hit the spot.


In [74]:
job_desc = """Minimum qualifications:

    Master's degree in Statistics, Data Science, Mathematics, Physics, Economics, Operations Research, Engineering, or a related quantitative field.
    8 years of work experience using analytics to solve product or business problems, coding (e.g., Python, R, SQL), querying databases or statistical analysis, or 6 years of work experience with a PhD degree.


Preferred qualifications:

    10 years of work experience using analytics to solve product or business problems, coding (e.g., Python, R, SQL), querying databases or statistical analysis, or 8 years of work experience with a PhD degree.

About the job

Google is and always will be an engineering company. We hire people with a broad set of technical skills who are ready to take on some of technology's greatest challenges and make an impact on millions, if not billions, of users. At Google, data scientists not only revolutionize search, they routinely work on massive scalability and storage solutions, large-scale applications and entirely new platforms for developers around the world. From Google Ads to Chrome, Android to YouTube, Social to Local, Google engineers are changing the world one technological achievement after another. As a Data Scientist, you will evaluate and improve Google's products. You will collaborate with a multi-disciplinary team of engineers and analysts on a wide range of problems. This position will bring scientific rigor and statistical methods to the challenges of product creation, development and improvement with an appreciation for the behaviors of the end user.

The US base salary range for this full-time position is $197,000-$291,000 + bonus + equity + benefits. Our salary ranges are determined by role, level, and location. Within the range, individual pay is determined by work location and additional factors, including job-related skills, experience, and relevant education or training. Your recruiter can share more about the specific salary range for your preferred location during the hiring process.

Please note that the compensation details listed in US role postings reflect the base salary only, and do not include bonus, equity, or benefits. Learn more about benefits at Google.
Responsibilities

    Collaborate with stakeholders in cross-projects and team settings to identify and clarify business or product questions to answer. Provide feedback to translate and refine business questions into tractable analysis, evaluation metrics, or mathematical models.
    Use custom data infrastructure or existing data models as appropriate, using specialized knowledge. Design and evaluate models to mathematically express and solve defined problems with limited precedent.
    Gather information, business goals, priorities, and organizational context around the questions to answer, as well as the existing and upcoming data infrastructure.
    Own the process of gathering, extracting, and compiling data across sources via relevant tools (e.g., SQL, R, Python). Independently format, re-structure, and/or validate data to ensure quality, and review the dataset to ensure it is ready for analysis."""

poptart.add_prompt_instructions("The user will then give you the job description or a job title. If the job description is given to you, you will search for main keywords and points that they would want to see on a resume. List them out.")
job_keys = poptart.add_chat(f"Give me only the important information. No need for any other commentary, I am saving this content as a variable: {job_desc}")

Minimum Qualifications:

- Master's degree in Statistics, Data Science, Mathematics, Physics, Economics, Operations Research, Engineering, or related quantitative field
- 8 years of experience using analytics for product or business problems, coding (e.g., Python, R, SQL), querying databases, or statistical analysis, or 6 years with a PhD

Preferred Qualifications:

- 10 years of experience using analytics for product or business problems, coding (e.g., Python, R, SQL), querying databases, or statistical analysis, or 8 years with a PhD

Responsibilities:

- Collaborate with stakeholders to clarify business or product questions and provide feedback to refine these into analysis, metrics, or models
- Utilize custom or existing data models; design and evaluate models for solving problems
- Gather business goals, priorities, and organizational context for questions, leveraging data infrastructure
- Manage the data gathering, extraction, and compilation process using tools like SQL, R, and 

In [75]:
with open('/kaggle/input/resume-data/resume-data(ResumeContent).csv', 'rb') as f:
    result = chardet.detect(f.read())

df = pd.read_csv('/kaggle/input/resume-data/resume-data(ResumeContent).csv', encoding=result['encoding'])[['section', 'content', 'org', 'dates']]
df.head()

experience_df = df[df['section'] == 'Experience']
projects_df = df[df['section'] == 'Projects']
education_df = df[df['section'] == 'Education']
skills_df = df[df['section'] == 'Skills']

In [96]:
class ProjectsSection(Section):
    def __init__(self):
        super().__init__("Projects")
        
    def add_content(self, item):
        self.content.append(item)

projects_section = ProjectsSection()
projects_content = "; ".join([f"{project}" for project in projects_df.content])

['Redesigned an undergraduate traffic prediction model as measured by accuracy and efficiency by implementing advanced algorithms and optimizing data processing techniques',
 'Applied machine learning techniques to optimize traffic congestion forecasting as measured by improved prediction accuracy by developing and validating complex predictive models',
 'Improved model generalization and reliability by integrating real-world data sources as measured by validation accuracy by optimizing data processing techniques',
 'Leveraged Python and scikit-learn to improve model predictions by conducting time series analysis, enabling enhanced accuracy in forecasting outcomes',
 'Implemented feature engineering techniques to extract relevant traffic-related signals as measured by improved model accuracy through the application of advanced statistical methods and data transformation techniques']

In [111]:
xyz_helper = XYZ(job_desc)
poptart.add_prompt_instructions("""You will be writing the projects section of the resume. 
    This section is for personal projects, student design teams, and extracurricular/hobbyist projects, not projects from work. Do not include anything from the Experience section.
    Don't use the word "project" in your project titles, it's redundant.
    There's no need to disclose "Personal Project", "Academic Project", or "Group Project" beside your project title.
    Each project should consist of multiple bullet points, not paragraphs.
    Order projects and bullet points based on relevance to the specific job and general impressiveness. Put your best stuff first and grab the reader's attention!""")

for item in projects_df['org'].unique():
    org_df = projects_df[projects_df['org'] == item]
    section = Section(item)
    dates = str(org_df['dates'].tolist()[0]).replace('nan', '')
    
    section.set_dates(dates)
    
    content = [[xyz_helper.input_bullet_point(x) for x in projects_df[projects_df['org'] == item]['content'].tolist()]]

    poptart_response = poptart.add_chat(f"Choose the most relevant points from this experience {content}, return as a string separated by new lines with the new guidelines. Keep the XYZ format.").split('\n')
    response_clean = [xyz_helper.input_bullet_point(line.strip().lower()) for line in poptart_response]
    section.add_content(response_clean)
    projects_section.add_content(section)

Conducted statistical analysis on DNA methylation patterns, investigating potential epigenetic markers associated with cancer risk by using robust statistical methods  
Explored large-scale genomic datasets, uncovering patterns in methylation variations to assess their potential impact on gene expression with statistical methods  
Collaborated with a research team to analyze and interpret data, presenting findings that contributed to scientific discussions on the role of epigenetics in disease susceptibility using statistical methods  
Analyzed DNA methylation patterns in cancer patients by developing bespoke analytical frameworks, uncovering potential biomarkers and improving patient outcomes  
Identified biomarkers linked to cancer risk using R and statistical methods by overcoming data complexity and improving model accuracy  
Validated findings by collaborating with a research partner, ensuring accurate analysis and interpretation through statistical rigor  
Developed classificatio