<a href="https://colab.research.google.com/github/adarshlearnngrow/StepUp-AI/blob/main/Skill_Gap_Analysis_and_Action_Plan_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing Required Libraries

In [None]:
# !pip install PyPDF2 openai
import openai
import json
import time
import re
from openai import OpenAI
from google.colab import userdata
import pandas as pd
pd.set_option('display.max_colwidth', None)
from PyPDF2 import PdfReader
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
client = OpenAI(api_key=userdata.get("openai_key"))

### Targeted five roles

In [None]:
roles = [
    "Artificial Intelligence",
    "Business Analyst",
    "Business Intelligence Analyst",
    "Data Analyst",
    "Machine Learning"
]

### Simulating 50 Resumes (10 per role)

In [None]:
def generate_student_resume(role: str):
    prompt = f"""
You are generating a **realistic and anonymized resume** for a computer science student or recent graduate (0‚Äì2 years of experience) applying for a role as a {role}.

Return strictly valid JSON in the following format:
{{
  "name": "Candidate_<ID>",
  "education": "<Degree, Masked University (e.g., 'Top Engineering Institute'), Graduation Years>",
  "work_experience": [
    {{
      "role": "<Internship or research title>",
      "company": "<Realistic but anonymized organization (e.g., 'NeuroTech AI', 'InsightSoft Labs')>",
      "duration": "<Month Year ‚Äì Month Year>",
      "description": "Describe the responsibilities in a detailed, narrative style. Mention the purpose of the project, specific tasks completed, tools/technologies used, and measurable impact. Be authentic ‚Äî include dataset sizes, model names, APIs, or product features if relevant."
    }},
    {{
      "role": "<Optional second experience (lab assistant, part-time dev, open-source contributor)>",
      "company": "<Another anonymized organization (e.g., 'Top Tech University - DataLab')>",
      "duration": "<Month Year ‚Äì Month Year>",
      "description": "Follow the same format: outcome-focused, rich in tech and context. Avoid vague phrasing."
    }}
  ],
  "personal_projects": [
    {{
      "title": "<Unique, interesting project title>",
      "description": "Explain the project‚Äôs motivation (e.g. coursework, curiosity, hackathon), what problem it solved, what tech was used, and the result. Include metrics if applicable. Avoid generic projects like 'movie recommender'."
    }},
    {{
      "title": "<Second project (can be unrelated to the role)>",
      "description": "Still provide depth. For example, a photography app, social impact tool, or game with real features. Mention full-stack tools or libraries used."
    }}
  ],
   "technical_skills": ["<Realistic tech skills: some aligned, some adjacent>"],
   "soft_skills": ["<Soft skills like teamwork, leadership, communication, etc.>"]
}}

STRICT INSTRUCTIONS:
- **No markdown**, **no explanations**, only valid, well-formatted JSON.
- **Do not reuse phrases** like ‚Äúworked with a team‚Äù or ‚Äúimproved accuracy.‚Äù
- **Never use placeholder names** like Tech_Startup_123. Instead, use fictional but plausible names like ‚ÄúCortexAI‚Äù or ‚ÄúNuvem Data.‚Äù
- Use **varied and creative projects** ‚Äî not all should be recommendation systems!
- Projects can be from coursework, hackathons, personal exploration, or clubs
- Leave **some skill gaps** compared to what a real job would require
- Descriptions must sound like they were written by a strong student applying to a competitive job ‚Äî technically sharp, reflective, and specific.
- You may simulate imperfections (e.g., slightly over-descriptive soft skills or overuse of buzzwords) occasionally for realism, but keep overall quality high.
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.8
    )
    return response.choices[0].message.content.strip()

resumes = []

for role in roles:
    print(f"Generating resumes for role: {role}")
    for i in range(10):
        try:
            resume_json = generate_student_resume(role=role)
            resume_dict = json.loads(resume_json)
            resume_dict["target_role"] = role
            resumes.append(resume_dict)
            print(f"Resume {i+1} for {role}")
            time.sleep(1)
        except Exception as e:
            print(f"Error at resume {i+1} for {role}: {e}")

# Save all resumes to a JSON file
with open("all_roles_student_resumes.json", "w", encoding="utf-8") as f:
    json.dump(resumes, f, indent=2, ensure_ascii=False)

print("All resumes saved to all_roles_student_resumes.json")

Generating resumes for role: Artificial Intelligence
Resume 1 for Artificial Intelligence
Resume 2 for Artificial Intelligence
Resume 3 for Artificial Intelligence
Resume 4 for Artificial Intelligence
Resume 5 for Artificial Intelligence
Resume 6 for Artificial Intelligence
Resume 7 for Artificial Intelligence
Resume 8 for Artificial Intelligence
Resume 9 for Artificial Intelligence
Resume 10 for Artificial Intelligence
Generating resumes for role: Business Analyst
Resume 1 for Business Analyst
Resume 2 for Business Analyst
Resume 3 for Business Analyst
Resume 4 for Business Analyst
Resume 5 for Business Analyst
Resume 6 for Business Analyst
Resume 7 for Business Analyst
Resume 8 for Business Analyst
Resume 9 for Business Analyst
Resume 10 for Business Analyst
Generating resumes for role: Business Intelligence Analyst
Resume 1 for Business Intelligence Analyst
Resume 2 for Business Intelligence Analyst
Resume 3 for Business Intelligence Analyst
Resume 4 for Business Intelligence Analys

### Loading simulated student resumes

In [None]:
with open("all_roles_student_resumes.json", "r") as f:
    resumes = json.load(f)

jobs_df = pd.read_csv("req_job_desc.csv")

### Summarise Resume and Job Description

In [None]:
def summarize_resume(resume):
    tech_skills = ", ".join(resume.get("technical_skills", []))
    soft_skills = ", ".join(resume.get("soft_skills", []))
    parts = []
    if resume.get("education"):
        parts.append(f"Education: {resume['education']}")
    if resume.get("work_experience"):
        roles = [we.get("role") for we in resume["work_experience"] if we.get("role")]
        if roles:
            parts.append("Experience roles: " + ", ".join(roles))
    if tech_skills:
        parts.append("Technical Skills: " + tech_skills)
    if soft_skills:
        parts.append("Soft Skills: " + soft_skills)
    return " | ".join(parts)

def summarize_job(job):
    title = job.get("Job Title", "Job")
    it_skills = job.get("IT Skills", "")
    soft_skills = job.get("Soft Skills", "")
    return f"Job Title: {title} | Required Technical Skills: {it_skills} | Required Soft Skills: {soft_skills}"

### Printing 120 job descriptions for each role (Total 600)

In [None]:
jobs_df

Unnamed: 0,Query,Description
0,AI Engineer,"Location: Schlumberger-Doll Research, Cambridge, MA, USA At Schlumberger-Doll Research (SDR) in Cambridge, MA, the robotics department is evaluating and developing cutting-edge technology for the automation of unstructured oil and gas applications. We are looking for talented PhD and Master‚Äôs level roboticists specializing in collaborative manipulation to join a multidisciplinary team working on stimulating problems that will lead to disruptive differentiation in functions spanning manufacturing, maintenance and field operations. The successful candidate will closely collaborate with experienced SDR team members and Schlumberger product development entities to design, develop, simulate and test algorithms and software prototypes against harsh oilfield environments. Taking advantage of the vibrant Boston robotic cluster, he/she will participate in strategic engagements with academic research institutions and worldwide industry partners. Qualifications PhD or Master‚Äôs degree with 3 years of industry experience with emphasis on human-robot collaborative manipulation Deep theoretical foundation in relevant artificial intelligence disciplines Working knowledge of Robot Operating System (ROS) framework Demonstrated proficiency with relevant programming languages, such as Python and C++, in a Linux environment Preferred experience Motion planning and inverse kinematics Task planning under uncertainty Supervised, reinforcement/demonstration learning Excellent problem-solving, organizational and communication skills Candidates must have the legal right to work in US Schlumberger is the world‚Äôs leading supplier of technology, integrated project management and information solutions to customers working in the oil and gas industry worldwide. Employing more than 105,000 people representing over 140 nationalities and working in approximately 85 countries, Schlumberger provides the industry‚Äôs widest range of products and services from exploration through production. Schlumberger has always invested significant time and money on R&E; as a long-term strategy to support and grow its technology leadership. Schlumberger invests more each year in R&E; than all other oilfield services companies combined. Schlumberger is an equal employment opportunity employer. Qualified applicants are considered without regard to race, color, religion, sex, national origin, age, disability, status as a protected veteran or other characteristics protected by law. Schlumberger is a VEVRAA Federal Contractor - priority referral Protected Veterans requested."
1,AI Engineer,"About DENSO DENSO is one of the largest global automotive suppliers of advanced technology, systems and components in the areas of thermal, powertrain control, electronics and information and safety. From automated driving to hybrid and electric vehicle components, we‚Äôre crafting the core technologies of modern mobility. We draw on the strengths of individual inventors who are joining together to become an unstoppable force for global good. With our North American headquarters located in Michigan, DENSO employs more than 23,000 people at 28 consolidated subsidiaries and 4 affiliates across the North American region. Worldwide, the company has more than 200 subsidiaries and affiliates in 38 countries and regions and employs more than 150,000 people. Consolidated global sales for the fiscal year ending March 31, 2017, totaled US$40.4 billion. As a Software Engineer you will be part of a small R &D Innovation team developing products and services utilizing future technology. We are looking for a candidate to work with us on innovative products and services inside and outside the vehicle spectrum. Candidate must be creative and collaborative. If you're passionate about building technologies that are going to make a difference in the future, this could be the right job for you! What you'll be doing: Develop software for a variety of automotive applications, including infotainment systems, AR, heads-up display and haptic devices. Develop optimized software to run on variety of platforms and environments including mobile, tablet and laptops. Develop innovative solutions for challenging problems with scalable, production quality software. Work with internal cross functional teams to develop a working model or PoC (proof of concept). Work with customers to build integrations and troubleshoot issues. Learn constantly and dive into new areas with unfamiliar technologies. DENSO will not provide any US immigration sponsorship for this position BS in Computer Science or Computer Engineering. 2-4 years of experience as a software engineer. Strong development experience in C, C , Java. Developing systems applications on Linux/Android. Comfortable in Python language. Experience or knowledge in Artificial Intelligence Ability to take ideas from concept to prototype. Ability to vet technical requirements from cross-functional R&D groups. Ability to work in cross-functional teams with a strong sense of self-direction. Pro-active attitude."
2,AI Engineer,"As a Director of Strategy & Strategy at Xperi, you will participate in industry standardization activities with 2 strategic goals: Maintain a continuous awareness of industry trends in the Audio/Video content creation and distribution spaces, including AR and VR, and other emerging technology areas such as imaging, machine learning and artificial intelligence Consistent with 1, establish DTS technologies in the technical standards and industry consortia publications that are impactful to Xperi business overall In furtherance of these goals, the successful candidate will participate in regular internal meetings with groups around the company to discuss Xperi standards and consortia participation, in light of industry trends and developments, to determine appropriate levels of participation, specific goals and schedules, weighing in available resources, commitments and company priorities. In concert with these regular meetings, the successful candidate will participate in assigned standards and industry consortia activities. Such participation will include travel, which may be substantial, to various f2f meetings and events. Primary work will involve authoring and reviewing technical specifications in the broad range of areas listed above. Qualifications & Skills Undergraduate degree in Engineering or Science. Post graduate degree a definite plus. Technical competence in one or more of the following areas: audio/video codec, media streaming and storage formats, AR/VR, machine learning, artificial intelligence Experience participating in international standards, consortia, or industry fora Experience working in an international and distributed environment Technical skills necessary to author and develop technical specifications Strong interpersonal skills a must Excellent communication skills, both written and oral Computer skills: Outlook, Word, Excel and PowerPoint required Willing to travel frequently and regularly to meetings in domestic and international locations Able to work in a team environment, and independently"
3,AI Engineer,"If you have a strategic mindset and expertise in the latest mobility technological trends, then look no further than P3! As a global consulting, management and engineering services company, we develop and implement innovative solutions to today‚Äôs complex technology challenges and industry changes that help our clients succeed at every stage. As the Business Strategy Consultant, you will have the opportunity to work with leading companies in the automotive industry to help drive business initiatives. As a key driver for developing strategy for leading automotive companies, you will be responsible for bringing transparency and insights to complex technology challenges. This will include working on a broad range of exciting technologies such as autonomous driving, connectivity, cybersecurity, vehicle electrification and artificial intelligence. _A Day in the Life:_ Consult with decision makers and leaders at major mobility companies and support the development of strategies regarding emerging technologies and products Invest time and effort to become a thought leader in a broad array of technologies and their implications on the mobility industry Moderate high level decision making workshops while providing technical and strategic input to guide the discussion Develop and use innovative frameworks and methodologies for solving complex technology planning challenges Support the strategic plan, business development and overall growth of projects, programs and initiatives Be open to challenges that involve new and unfamiliar technologies, complex and ambiguous situations, tight timelines and high client expectations. Build and maintain strong relationships with stakeholders while expanding your network to develop new opportunities _What You Have:_ Bachelor‚Äôs or Master‚Äôs degree in Mechanical Engineering, Electrical Engineering, Mechatronics with courses in Business Economics, Engineering Management, finance or related fields of study Drive, confidence and competence to manage critical work streams and responsibilities independently and to exceed expectations of clients and co-workers 2+ years of experience working in a strategic role focusing on automotive software, electronics, telematics, infotainment, connected vehicle and electrification A consulting mindset with an exposure to a wide variety of technology topics Ability to dive deep into a technology domain to understand basics and fundamentals but also big picture implications such as cost, differentiators, risks and improvement opportunities. Significant experience working and communicating directly with upper level management including C-Level executives Willingness to travel up to 50% of the time German language skills are a plus"
4,AI Engineer,"Mission Who We Are Founded and continuously led by inventor and entrepreneur Tony Aquila, Solera is a global leader in digital technologies that connect and secure life's most important assets: our cars, homes and identities. Since its inception in 2005 as a garage-based startup, Solera has grown aggressively with over 50 acquisitions across its platforms The company's current product solutions include Audatex, Autodata, AutoPoint, CAP/HPI, Colimbra, Digidentity, Enservio, Explore Data, Hollander, Identifix, Inpart, LYNX and TitleTec, as well as the company's flagship Digital Garage application. Today, Solera processes over 300 million transactions annually for approximately 235,000 partners and customers in over 80 countries. Unified by a strong culture that values uncommon entrepreneurial thinking and continuous ""do-it-different"" innovation, Solera's global workforce of 6,700+ associates come from diverse forward-thinking industries that include automotive technology, artificial intelligence, software development, data sciences, cybersecurity, cognitive design, and digital identity protection. For more information, please visit solera.com. The Solera Way Solera‚Äôs uncommon culture is based on three simple principles: Think 80/20 (Focus), Act 30/30 (Efficiency), and Live 90/10 (Accountability). We define our mindset using Our 3H‚Äôs: Humility, a Hunger to succeed, and a desire to Hunt for opportunities to win. We train our volunteers to engage with each other modulating between their intellect (IQ) and emotional intelligence (EQ) using our 3Fs: Facts, Finesse, and Force. Solera has become a global technology leader that is constantly growing in the double digits. The principles, drills, and values associated with the Solera Way have been fundamental to Solera‚Äôs success and our ability to grow, continuously change, and innovate. Are You Uncommon? We‚Äôre on the hunt for an experienced Software Quality Analyst who ranks in the top quartile among their peers. Someone who has a highly competitive and entrepreneurial mindset that is wired with a team-first attitude, has no problem rolling up their sleeves to execute their missions, and can modulate between leading and following as needed. You will serve as the Software Quality Analyst. Position 80s What You‚Äôll Be Doing At Solera, we believe in providing clarity and focus to what are our volunteers are responsible for ‚Äì their mission. The 80‚Äôs, as we call them at Solera, are the employee‚Äôs priorities associated with their role or mission. The mission 80‚Äôs for this role, are: Understand the business flows, design test strategies, specifications and test plans through user stories. Use bug tracking tools to report bugs (JIRA if possible), analyses the test results and report them to SCRUM roles. Partner with automation team and adhere to automation principles and drive automation for the assigned product. Additional responsibilities of the role include: Raise an alert when an important issue is likely to put in jeopardy the whole project Work closely with Quality Assurance roles and other departments to ensure quality products with clear project deliverables and timelines, and to ensure timely completion of deliverables. Participate in a small, experienced, energetic team on a rapid, agile delivery schedule. Design test cases and test scenarios to ensure both new and previous features Execute test cases to ensure business scenario Be hands on in writing, executing and monitoring automated tests Build tools to support development and business functions Participate in new project development to facilitate automation in conjunction with development Qualifications Are You Qualified? 1-3 years of experience in QA role. Strong technical and troubleshooting expertise and mind set. Familiarity with at least one programming language and can write automation. Familiarity with agile software development methodologies Strong Software Testers mindset, good understanding of the software development lifecycle Must be able to finish tasks with minimum supervision Strong written and verbal communication skills including technical writing skill. Our Space We believe that offices should be destinations where people want to be‚Ä¶ places where people want to work, play and, collaborate. That is why we spend a lot of time on enhancing our workspaces. Solera‚Äôs work environments are intersections of the old and the new‚Ä¶ of the past, present, and future. They are spaces designed to stimulate creative thinking, innovation, and cross- functional collaboration. We also embrace wellness. Whether it is our on-site or off-site workout centers, nutritional meals and beverages, or endless snacks for focus and energy. This wellness plan is our commitment to healthy living, which spans across our office environments."
...,...,...
595,Machine Learning,"Are you results-driven? Love challenges? Do you enjoy working in a fast-paced environment? Are you passionate about finding those hidden gem candidates? If yes, keep reading about an exciting new opportunity with Wayfair! Wayfair is looking for technical sourcers with excellent search, relationship management and communication skills, experience in Technical, Software or Data Science Recruiting and who are passionate about working with the Software, Machine Learning or Infrastructure teams. The sourcer will be responsible for helping to advance the talent strategy by identifying and building candidate slates for all levels of technical positions across the business. This is a unique opportunity to join a $5B+ eCommerce company as a true business partner and make a significant and immediate impact working with the departments that drive the continued growth of our company. Responsibilities: Source candidates through a variety of search resources (social media, internal database, job boards, networking, internet searches, company targets, referrals, etc...) Create robust sourcing strategies Build active and passive candidate pipelines aligned to recruitment strategy: hire top talent in a highly competitive market Track metrics and use data for strategic analysis of recruiting campaigns Qualifications: Bachelor‚Äôs Degree or equivalent experience required Minimum 2 + years‚Äô experience sourcing and/or recruiting in a fast-paced and high hiring-bar environment Direct sourcing expertise utilizing a range of methods and approaches Exceptional written and verbal communication skills About Wayfair Inc. Wayfair believes everyone should live in a home they love. Through technology and innovation, Wayfair makes it possible for shoppers to quickly and easily find exactly what they want from a selection of more than 10 million items across home furnishings, d√©cor, home improvement, housewares and more. Committed to delighting its customers every step of the way, Wayfair is reinventing the way people shop for their homes - from product discovery to final delivery. The Wayfair family of sites includes: Wayfair, an online destination for all things home Joss & Main, where beautiful furniture and finds meet irresistible savings AllModern, unbelievable prices on everything modern Birch Lane, a collection of classic furnishings and timeless home d√©cor Perigold, unparalleled access to the finest home d√©cor and furnishings Wayfair generated $5.2 billion in net revenue for the twelve months ended March 31, 2018. Headquartered in Boston, Massachusetts with operations throughout North America and Europe, the company employs more than 8,700 people."
596,Machine Learning,"zulily is a retailer obsessed with bringing our customers something special every day. Unique products from up-and-coming brands are featured alongside favorites from top brands, including clothing, home decor, accessories, toys and gifts, giving customers something new to discover each morning at an incredible price. We are fast-paced, innovative and metric-driven, with a team passionate for delighting our customers. Whether you want to work with incredible product selection, develop smart marketing plans, help us in our pursuit of faster fulfillment, or do something else awesome in between, we have a place for you. With locations in Seattle, Reno, Columbus and Bethlehem, our team is excited to be changing the way people shop every day - and we are just getting started. We have an immediate need for a Software Engineer for our Machine Learning team. As an Engineer at zulily, you are chartered with building the technology that powers the second fastest growing online retailer in history. Our incredible growth means we face new challenges on a consistent basis. In this position, you must explore new technologies and collaborate with other engineers to design and build world-class technical services. Your team deploys production code rapidly to keep pace with zulily‚Äôs changing business, you are mindful of avoiding technical debt, and you take ownership of your team's roadmap and everything you do. Essential Job Functions Collaborate across business teams (stakeholders) Operate and provide 24/7 services to internal and external customers (mom and zuteams) Research new technologies to support build vs. buy and rapid growth of the business (build vs. buy) Design in a mindset of reducing technical debt Consistently delivers work with started timelines QUALIFICATIONS Bachelors Degree in Computer Science or related field required. A combination of education plus equivalent work experience may be considered. Exposure to software development experience building and operating high traffic web services and platforms Past history with Amazon Web Services a plus Ability to work in a fast paced, evolving, growing and dynamic environment Excellent written and verbal communication skills Demonstrated personal initiative and strong ownership of deliverables Ability to explore new ideas and have a passion to make them happen Highly Preferred: experience building large-scale machine learning services a plus zulily is an Equal Opportunity Employer zulily participates in E-Verify. zulily will provide the Social Security Administration (SSA) and, if necessary, the Department of Homeland Security (DHS) with information from each new employee's I-9 to confirm work authorization. Federal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. Please tell us if you require a reasonable accommodation to apply for a job. Examples of reasonable accommodation include making a change to the application process, providing documents in an alternate format, using a sign language interpreter, or using specialized equipment. You may reach us at (206) 388-0920."
597,Machine Learning,"Careers that Change Lives The Patient Care Software team is one of the most cutting edge groups within Medtronic, defining and executing on technology and architectural strategy using skills that span a variety of domains including Mobile Apps (Android), cloud, data analytics, web services, 2d/3d visualization, machine learning, and interfacing with embedded software for management and control of interconnected medical devices. As a Senior Software Test Engineer , you will be working with motivated, diverse, and knowledgeable development teams to deliver world-class products. You will be responsible for designing, implementing, executing and documenting test automation software for the next generation of Patient Care Software products. This role requires the ability to work as part of a team to realize complex, innovative, high reliability systems that improve the quality of life of people around the world. This is a highly trusted position that requires an individual to thoughtfully develop testing strategies and protocols with state of the art software practices, energy, and a passion to win. You will also be responsible to bring state of the art tools and methodologies used in today‚Äôs cloud- and mobile-based enterprise systems to integrate test automation throughout the software development lifecycle, including the DevOps pipeline, that complies with the Medtronic Quality System and that scales to meet future needs. A Day in the Life Lead a global test team to strive for functional excellence in all aspects of daily work Collaborate with leads and development engineers to build testability into the products Collaborate with DevOps to streamline the development and test process Share automation best practices and set high standards for the entire team to build consistent and maintainable tests and test automation frameworks Understand the testing effort by analyzing the requirements of the project Develop the test plan for the tasks, dependencies and participants required to mitigate the risks to system quality and obtain stakeholder support for the plan Track and prepare reports of testing activities Responsibilities may include the following and other duties may be assigned. Designs, develops, and implements testing methods and equipment. Plans and arranges the labor, schedules, and equipment required for testing and evaluating standard and special devices. Provides test area with parameters for sample testing and specifies tests to be performed. Compiles data and defines changes required in testing equipment, testing procedures, manufacturing processes, or new testing requirements. Responsible for testing all customer samples and for special tests that cannot be performed in the test area. Must Have: Minimum Requirements Bachelor‚Äôs degree in Software Engineering, Computer Science, or related field 4+ years professional experience Nice to Have Proficiency in Java Experience in Continuous Integration and Continuous Deployment (CI/CD) Experience in developing test automation tools and frameworks with hands-on technical and programming skills Ability to organize and lead testing efforts Excellent problem solving and analytical skills Clarity and precision in verbal and written communication Experience working with geographically distributed teams Design/test experience with regulated medical devices Experience leading a team of engineers About Medtronic Together, we can change healthcare worldwide. At Medtronic, we push the limits of what technology, therapies and services can do to help alleviate pain, restore health and extend life. We challenge ourselves and each other to make tomorrow better than yesterday. It is what makes this an exciting and rewarding place to be. We want to accelerate and advance our ability to create meaningful innovations - but we will only succeed with the right people on our team. Let‚Äôs work together to address universal healthcare needs and improve patients‚Äô lives. Help us shape the future. Physical Job Requirements The above statements are intended to describe the general nature and level of work being performed by employees assigned to this position, but they are not an exhaustive list of all the required responsibilities and skills of this position. The physical demands described within the Day in the Life section of this job description are representative of those that must be met by an employee to successfully perform the essential functions of this job. Reasonable accommodations may be made to enable individuals with disabilities to perform the essential functions."
598,Machine Learning,"Looking for a company that inspires passion, courage and imagination, where you can be part of the team shaping the future of global commerce? Want to shape how millions of people buy, sell, connect, and share around the world? If you‚Äôre interested in joining a purpose driven community that is dedicated to creating an ambitious and inclusive workplace, join eBay ‚Äì a company you can be proud to be a part of. Do you want to have an huge impact on the largest eCommerce website? Are you interested in solving cutting edge research problems while impacting all eBay advertising channels? Does working with Big Data, cloud computing, large-scale optimization, probabilistic inference, and machine learning excite you? If you answered yes, the Marketing Science team at eBay is the right place for you. We are looking for rockstar Data Scientists to join our team. You will be directly responsible for improving eBay's algorithms that drive customer traffic via Marketing channels. You will work on cutting-edge ML/optimization algorithms against one of the biggest datasets in the world. You‚Äôll work with world-class data scientists and engineers. You‚Äôll solve problems that have a direct impact on a multi-billion dollar business. Bring your ideas, energy, and dedication to reach and engage the next 100M eBay users. We are developing state-of-the-art personalization technologies and optimization strategies that have a direct impact on eBay's users as well as the company's bottom line. You will be expected to research state-of-the- art machine learning, optimization, natural language processing, text mining, and other techniques, and apply them to eBay's marketing platforms. You will roll your solutions to production, analyze results offline and online, and measure site impact. Responsibilities Aggregate huge amount of data and information from large numbers of sources to discover patterns and features necessary to build machine learning models that match eBay‚Äôs inventory with customers‚Äô demand via Marketing channels. Design and implement end-to-end solutions using Machine Learning, Optimization, and other advanced computer science technologies, and own live deployments to drive customer traffic to eBay. Qualifications PhD or MS in computer science, engineering or related field . Excellent understanding of computer science fundamentals, data structures, and algorithms. Strong programming skills (one or more of Java, C/C++, Python). Expertise in specialized areas such as Optimization, NLP, Reinforcement Learning, Probabilistic Inference, Machine Learning, Information Retrieval, Recommendation Systems. Proven experience with large data sets and related technologies, e.g., Hadoop, Pig, Spark. Knowledge of SQL and NoSQL is required. Expert level experience in at least one of the ML learning software (R/Python/Scala/Tensor flow) is required. Good communication skills, ability to work with large cross-functional teams of technical and non-technical members. eBay is a Subsidiary of eBay. This website uses cookies to enhance your experience. By continuing to browse the site, you agree to our use of cookies View our privacy policy View our accessibility info eBay Inc. is an equal opportunity employer. All qualified applicants will receive consideration for employment without regard to race, color, religion, national origin, sex, sexual orientation, gender identity, veteran status, and disability, or other legally protected status. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at talent@ebay.com . We will make every effort to respond to your request for disability assistance as soon as possible. For more information see: EEO is the Law Poster EEO is the Law Poster Supplement"


### Extracting common required technical and soft skills required for each role.

In [None]:
jobs_df = pd.read_csv("req_job_desc.csv")

# Group by target role
grouped_roles = jobs_df.groupby("Query")

def extract_skills_from_descriptions(role, descriptions):
    combined_text = "\n\n---\n\n".join(descriptions)

    prompt = f"""
      You are a highly skilled AI career advisor. You are given a set of job descriptions for the role of "{role}".

      Your task is to extract the **common transferable skills** required across these jobs ‚Äî categorize them as:
      1. Technical Skills
      2. Soft Skills

      Avoid any industry-specific or domain-specific skills (e.g., "healthcare compliance", "lab equipment use", "insurance claim processing"). Focus only on **transferable, job-agnostic skills**.

      Output must be in strict JSON format like this:
      {{
        "role": "{role}",
        "technical_skills": ["skill1", "skill2", ...],
        "soft_skills": ["skillA", "skillB", ...]
      }}

      Here are the job descriptions:
      {combined_text}
      """

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for extracting transferable skills."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2
        )

        raw_output = response.choices[0].message.content.strip()
        if raw_output.startswith("```json"):
            raw_output = raw_output[7:]
        if raw_output.endswith("```"):
            raw_output = raw_output[:-3]

        return json.loads(raw_output)

    except Exception as e:
        print(f"Error for role '{role}': {e}")
        return {
            "role": role,
            "technical_skills": [],
            "soft_skills": []
        }

# Run for all roles
results = []
for role, group in grouped_roles:
    print(f"Extracting skills for role: {role}")
    descriptions = group["Description"].dropna().tolist()
    if not descriptions:
        continue
    skill_summary = extract_skills_from_descriptions(role, descriptions)
    results.append(skill_summary)
    time.sleep(1)

# Save to files
df_out = pd.DataFrame(results)
df_out.to_csv("role_transferable_skills.csv", index=False)
with open("role_transferable_skills.json", "w") as f:
    json.dump(results, f, indent=2)

print("\n Skill extraction complete. Files saved: role_transferable_skills.csv and .json")


Extracting skills for role: AI Engineer
Extracting skills for role: Business Analyst
Extracting skills for role: Business Intelligence Analyst
Extracting skills for role: Data Analyst
Extracting skills for role: Machine Learning

 Skill extraction complete. Files saved: role_transferable_skills.csv and .json


### File containing all common skills required for a particular role

In [None]:
with open("role_skills.json", "r") as f:
    resumes_data = json.load(f)

print(json.dumps(resumes_data, indent=2))

[
  {
    "role": "AI Engineer",
    "technical_skills": [
      "Machine Learning",
      "Artificial Intelligence",
      "Data Analysis",
      "Programming (Python, C++, Java)",
      "Statistical Analysis",
      "Data Visualization",
      "Natural Language Processing",
      "Computer Vision",
      "Big Data Technologies",
      "Cloud Computing",
      "Software Development",
      "Agile Methodologies",
      "Data Management",
      "Algorithm Development",
      "Embedded Systems"
    ],
    "soft_skills": [
      "Problem Solving",
      "Communication",
      "Collaboration",
      "Project Management",
      "Analytical Thinking",
      "Creativity",
      "Attention to Detail",
      "Adaptability",
      "Leadership",
      "Interpersonal Skills",
      "Time Management",
      "Customer Focus",
      "Teamwork"
    ]
  },
  {
    "role": "Business Analyst",
    "technical_skills": [
      "Data Analysis",
      "SQL",
      "Business Process Modeling",
      "Requirem

### Identifying skill gaps in the simulated resumes based on desired role

In [None]:
# Load Files
with open("sim_resume.json", "r") as f:
    resumes = [json.loads(line) for line in f]

with open("role_skills.json", "r") as f:
    raw_roles = json.load(f)
role_skills = {r["role"]: r for r in raw_roles}

# Helper Functions
def summarize_resume(resume):
    """Summarize fields from a student resume."""
    summary = []
    if "target_role" in resume:
        summary.append(f"Target Role: {resume['target_role']}")
    if "education" in resume:
        summary.append(f"Education: {resume['education']}")
    if "work_experience" in resume:
        jobs = [w["role"] for w in resume["work_experience"] if "role" in w]
        if jobs:
            summary.append("Experience: " + ", ".join(jobs))
    if "technical_skills" in resume:
        summary.append("Technical Skills: " + ", ".join(resume["technical_skills"]))
    if "soft_skills" in resume:
        summary.append("Soft Skills: " + ", ".join(resume["soft_skills"]))
    return " | ".join(summary)

def generate_skill_gap(resume_text, expected_tech, expected_soft):
    """Query OpenAI to get skill gaps and transferable skills."""
    prompt = f"""
You are a career advisor helping students identify skill gaps.

Compare the student's resume with the required skills below. Do 3 things:

1. Identify missing **technical skills** from the list that are not in the resume.
2. Identify missing **soft skills** from the list.
3. Suggest **transferable skills** from the resume that could help the student learn the missing technical skills.

Return valid JSON with this structure:
{{
  "technical_skill_gaps": [...],
  "soft_skill_gaps": [...],
  "transferable_skills": [...]
}}

If a category is empty, return an empty list.
Do NOT explain or repeat anything. Just return clean JSON.

---
Resume:
{resume_text}

Required Technical Skills: {", ".join(expected_tech)}
Required Soft Skills: {", ".join(expected_soft)}
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for skill gap analysis."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        result = response.choices[0].message.content.strip()

        if result.startswith("```json"):
            result = result[7:]
        if result.endswith("```"):
            result = result[:-3]

        parsed = json.loads(result)
        return (
            parsed.get("technical_skill_gaps", []),
            parsed.get("soft_skill_gaps", []),
            parsed.get("transferable_skills", [])
        )
    except Exception as e:
        print("Error:", e)
        return [], [], []

# Run Analysis

records = []

for idx, resume in enumerate(resumes):
    name = resume.get("name", f"Candidate_{idx}")
    role = resume.get("target_role", "").strip()

    if not role or role not in role_skills:
        print(f"[{name}] Skipped ‚Äî no target role or unknown role.")
        continue

    resume_summary = summarize_resume(resume)
    expected = role_skills[role]
    tech_skills = expected["technical_skills"]
    soft_skills = expected["soft_skills"]

    print(f"[{name}] ‚Üí Analyzing role: {role}")
    tech_gap, soft_gap, transferable = generate_skill_gap(resume_summary, tech_skills, soft_skills)

    records.append({
        "name": name,
        "target_role": role,
        "resume_summary": resume_summary,
        "technical_skill_gap": ", ".join(tech_gap),
        "soft_skill_gap": ", ".join(soft_gap),
        "transferable_skills": ", ".join(transferable)
    })
    time.sleep(1)

# Save Output
df = pd.DataFrame(records)
df.to_csv("skill_gap_analysis.csv", index=False)
df.to_json("skill_gap_analysis.json", orient="records", indent=2)
print(f"\n Done! Analyzed {len(records)} resumes and saved to CSV and JSON.")


[Candidate_012] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: AI Engineer
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_12345] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Business Analyst
[Candidate_001] ‚Üí Analyzing role: Busin

### Skill Gap Analysis Functionality - allows user to upload their resume

In [None]:
with open("skill_gap_analysis.json", "r") as f:
    examples = json.load(f)

def get_embedding(text, model="text-embedding-ada-002"):
    response = client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding

example_texts = [
    f"Resume: {ex['resume_summary']} | Role: {ex['target_role']}" for ex in examples
]
example_embeddings = [get_embedding(text) for text in example_texts]

with open("role_skills.json", "r") as f:
    role_skills_list = json.load(f)

# Convert list ‚Üí dict: { role_name: {technical_skills: [...], soft_skills: [...]} }
role_skills = {
    entry["role"]: {
        "technical_skills": entry.get("technical_skills", []),
        "soft_skills": entry.get("soft_skills", [])
    }
    for entry in role_skills_list if "role" in entry
}

# Anonymize resume text
def anonymize(text):
    return client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that removes personal identifiers from resumes."},
            {"role": "user", "content": f"Anonymize this resume:\n\n{text}"}
        ],
        temperature=0
    ).choices[0].message.content.strip()

def extract_resume_text(pdf_path):
    reader = PdfReader(pdf_path)
    text = "\n".join([page.extract_text() or "" for page in reader.pages])
    return text.strip()

def retrieve_examples(resume_text, target_role, k=3):
    query = f"Resume: {resume_text} | Role: {target_role}"
    query_embedding = get_embedding(query)
    sims = cosine_similarity([query_embedding], example_embeddings)[0]
    top_k_indices = sims.argsort()[-k:][::-1]
    return [examples[i] for i in top_k_indices]

def generate_skill_gap(resume_text, target_role, retrieved_examples, fallback_skills):
    examples_prompt = "\n\n".join([
        f"Example for role {ex['target_role']}:\nResume: {ex['resume_summary']}\nSkill Gaps: tech={ex['technical_skill_gap']}, soft={ex['soft_skill_gap']}, transferable={ex['transferable_skills']}"
        for ex in retrieved_examples
    ])

    expected_tech = fallback_skills.get("technical_skills", [])
    expected_soft = fallback_skills.get("soft_skills", [])

    full_prompt = f"""
You are a highly experienced career advisor. Below are real examples of skill gap analysis between student resumes and job requirements.

## EXAMPLES
{examples_prompt}

## TASK:

Now, perform the same analysis on the following new resume.

1. Read the student‚Äôs resume.
2. Compare it to the required skills.
3. Return:
   - Skills the student is missing (from the required lists).
   - Transferable skills the student has that help bridge gaps.

## REQUIRED FORMAT ‚Äî Output valid JSON:

{{
  "technical_skill_gaps": [list of missing technical skills],
  "soft_skill_gaps": [list of missing soft skills],
  "transferable_skills": [skills from the resume that can help learn missing ones]
}}

‚ùå Do not explain anything. Do not repeat resume or skills. Just return the JSON.
‚ùå If no gaps, return empty lists.

---

üéØ Target Role: {target_role}
üìå Required Technical Skills: {", ".join(expected_tech)}
üìå Required Soft Skills: {", ".join(expected_soft)}

üìÑ Resume:
{resume_text}
"""


    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant for skill gap analysis."},
            {"role": "user", "content": full_prompt}
        ],
        temperature=0.0
    )
    raw = response.choices[0].message.content.strip()
    if raw.startswith("```json"):
        raw = raw[7:]
    if raw.endswith("```"):
        raw = raw[:-3]
    return json.loads(raw)

# === Full pipeline function ===
def analyze_uploaded_resume(pdf_path, target_role):
    resume_text = extract_resume_text(pdf_path)
    anonymized_resume = anonymize(resume_text)
    fallback_skills = role_skills.get(target_role, {})
    retrieved = retrieve_examples(anonymized_resume, target_role)
    result = generate_skill_gap(anonymized_resume, target_role, retrieved, fallback_skills)
    return {
        "resume_text": anonymized_resume,
        "target_role": target_role,
        **result
    }


In [None]:
from google.colab import files

uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

target_role = input("Enter your target role (e.g., 'AI Engineer'): ").strip()

output = analyze_uploaded_resume(pdf_path, target_role)

print(json.dumps(output, indent=2))

Saving ADITI BHATIA DA.pdf to ADITI BHATIA DA.pdf
Enter your target role (e.g., 'AI Engineer'): Data Analyst
{
  "resume_text": "**ANONYMOUS RESUME**\n\n**PROFILE**  \nMSc Computer Science student with a Bachelor's in Computer Engineering and Honors in Data Science. Achieved a GPA of 9.72/10 (1:1). Skilled in Python, SQL, Tableau, and PowerBI with hands-on experience in data analysis, visualization, and machine learning. Passionate about using data to drive decision-making and solve real-world problems as an aspiring Data Analyst.\n\n**TECHNICAL SKILLS**  \n\u2022 Programming Languages: Python, SQL, Java  \n\u2022 Frameworks: Django, TensorFlow, React.js, Scikit-learn  \n\u2022 Tools: PowerBI, Excel, Figma, Tableau, Git/GitHub, MS Office  \n\u2022 Cloud Technologies: Google Cloud, Microsoft Azure, AWS  \n\n**EDUCATION**  \nMSc Computer Science (Negotiated Learning): University College Dublin, Dublin, Ireland (1:1)                          2024 - 2025  \nRelevant Courses: Data Science, 

### Action Plan Generation (Testing for 3 Candidates)

In [None]:
df = pd.read_csv("skill_gap_analysis.csv").head(3)

def extract_json_from_response(raw):
    """Safely extract JSON object from LLM response using regex."""
    match = re.search(r"\{[\s\S]*\}", raw)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            return {}
    return {}

def generate_action_plan(tech_skills, soft_skills, transferable_skills):
    prompt = f"""
You are a top-tier career advisor and learning coach. For each skill listed, recommend 2‚Äì3 diverse and high-quality, **REAL** learning resources with links.

The resources must be: genuinely helpful for mastering the skill

Resources may include:
- Online courses
- Popular Books with amazon links
- Practice Platforms (e.g. LeetCode, HackerRank)
- GitHub repos or open-source projects
- YouTube Tutorials
- Blogs, articles, or documentation etc-

Ensure resources are **practical**, well-reviewed, REAL and up-to-date.

Respond ONLY in valid JSON:

{{
  "technical_skill_resources": {{
    "Skill 1": ["Resource A", "Resource B"],
    ...
  }},
  "soft_skill_resources": {{
    "Skill 1": ["Resource A", "Resource B"],
    ...
  }},
  "transferable_skill_resources": {{
    "Skill 1": ["Resource A", "Resource B"]
  }}
}}

No explanations. No markdown. No extra text. Just the JSON object. DO NOT make up fake links. Use only real, high-quality resources.

---

Technical Skills Gap: {', '.join(tech_skills)}
Soft Skills Gap: {', '.join(soft_skills)}
Transferable Skills: {', '.join(transferable_skills)}
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for job upskilling."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2
        )
        raw_output = response.choices[0].message.content.strip()
        return extract_json_from_response(raw_output)
    except Exception as e:
        print("API Error:", e)
        return {}

results = []

for idx, row in df.iterrows():
    name = row.get("name", f"Candidate_{idx}")
    print(f"[{name}] ‚Üí Generating action plan...")

    tech = [s.strip() for s in str(row.get("technical_skill_gap", "")).split(",") if s.strip()]
    soft = [s.strip() for s in str(row.get("soft_skill_gap", "")).split(",") if s.strip()]
    trans = [s.strip() for s in str(row.get("transferable_skills", "")).split(",") if s.strip()]

    if not tech and not soft and not trans:
        print("   ‚Üí No skill gaps found. Skipping.")
        continue

    plan = generate_action_plan(tech, soft, trans)

    results.append({
        "name": name,
        "target_role": row.get("target_role", ""),
        "technical_skill_gap": tech,
        "soft_skill_gap": soft,
        "transferable_skills": trans,
        "action_plan": plan
    })

    time.sleep(1)

with open("test_action_plan_output.json", "w") as f:
    json.dump(results, f, indent=2)

print(f"\n Test complete! Results saved to 'test_action_plan_output.json'")

[Candidate_012] ‚Üí Generating action plan...
[Candidate_001] ‚Üí Generating action plan...
[Candidate_001] ‚Üí Generating action plan...

 Test complete! Results saved to 'test_action_plan_output.json'
