In [89]:
import ast

import requests
import pandas as pd

from bs4 import BeautifulSoup

from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential
from azure.ai.inference.models import SystemMessage, UserMessage

from settings import BASE_DIR, GITHUB_TOKEN

# Step 1 Extracting all jobs

In [70]:
FILTER = '&filterSearch=jobType_INTERNSHIP'

In [71]:
response = requests.get(
    url = (
        'https://www.bmwgroup.jobs/'
        'en/_jcr_content/main/layoutcontainer_5337/jobfinder30.jobfinder_table.content.html?'
        'rowIndex=1'
        '&blockCount=10' +
        FILTER
    )
)

In [72]:
html = response.content.decode('utf-8')

In [73]:
soup = BeautifulSoup(html, 'html.parser')

In [74]:
count = soup.find('div', class_ = 'grp-jobfinder__table').get('data-counter')

In [75]:
count

'866'

In [76]:
jobs = list()
for req in range(int(int(count) / 10)):
    response = requests.get(
        url = (
            'https://www.bmwgroup.jobs/'
            'en/_jcr_content/main/layoutcontainer_5337/jobfinder30.jobfinder_table.content.html?'
            f'rowIndex={req * 10}'
            '&blockCount=10' +
            FILTER
        )
    )

    html = response.content.decode('utf-8')

    soup = BeautifulSoup(html, 'html.parser')

    jobfider_wrapper = soup.find_all('div', class_='grp-jobfinder__wrapper')

    for job in jobfider_wrapper:
        jobs.append(
            {
                "title": job.find('a').get('aria-label'),
                "link": 'https://www.bmwgroup.jobs' + job.find('a').get('href'),
                "field": job.find('a').find('div', class_='grp-jobfinder-cell-refno').get('data-job-field'),
                "entity": job.find('a').find('div', class_='grp-jobfinder-cell-refno').get('data-job-legal-entity'),
                "city": job.find('a').find('div', class_='grp-jobfinder-cell-refno').get('data-job-location'),
                "type": job.find('a').find('div', class_='grp-jobfinder-cell-refno').get('data-job-type')
            }
        )
jobs = pd.DataFrame(jobs)

# Step 2: Extracting all jobs descriptions

In [77]:
descriptions = list()
for index in jobs.index:
    descriptions.append({
        "link": jobs.loc[index, ('link')],
        "html": requests.get(jobs.loc[index, ('link')]).content.decode('utf-8')
    })

descriptions = pd.DataFrame(descriptions)

In [78]:
descriptions['html'] = (
    descriptions['html']
    .str.replace('\n', '')
    .str.replace('\t', '')
    .str.replace('\r', '')
    .str.replace('  ', '')
    .str.strip()
)

In [79]:
job_descriptions = jobs.merge(
    descriptions,
    how = 'left',
    on = 'link'
)

del jobs
del descriptions

In [81]:
job_descriptions.insert(
    loc = len(job_descriptions.columns),
    column = 'description',
    value = job_descriptions.apply(
        lambda row: (
            BeautifulSoup(row["html"], 'html.parser')
            .find('div', class_='container-layout container no-top-spacing no-bottom-spacing')
            .text
        ),
        axis = 1
    )
)

# Step 3: Filtering the job keywords

In [93]:
client = ChatCompletionsClient(
    endpoint = "https://models.inference.ai.azure.com",
    credential = AzureKeyCredential(GITHUB_TOKEN),
    api_version = "2024-12-01-preview",
)

In [100]:
job_description = job_descriptions.loc[343, ('description')]

In [108]:
prompt = f"""
You're a Human Resources Manager that helps students land internships at renowned companies.

I need your help to find the keywords of skills RELEVANT for the job description below:
'''{job_description}'''

ANSWER IN A PYTHON LIST FORMAT
"""

In [109]:
response = client.complete(
    messages=[
        {
            "role": "assistant",
            "content": "You are a the output of a function",
        },
        UserMessage(prompt),
    ],
    model = "o1"
)

In [110]:
ast.literal_eval(response["choices"][0]["message"]["content"])

['JavaScript',
 'SQL',
 'Python',
 'R',
 'Power BI',
 'Data Visualization',
 'Basic Web Development',
 'Agile Project Management',
 'Quality Management Systems (QMS)',
 'Continuous Improvement',
 'Problem-Solving',
 'Digitalisation',
 'Business Reporting',
 'Analytical Skills',
 'Communication Skills',
 'Teamwork']