# Extract Tasks

This notebook uses bedrock batch processing to extract a list of tasks associated with
job roles as advertised on civil service jobs. 


In [1]:
import pandas as pd

# Load the data, dropping those roles with missing departments
jobs_data = pd.read_parquet("./data/jobs.pq")
jobs_data["department"] = jobs_data["department"].replace("nan", pd.NA)
jobs_data = jobs_data.dropna(subset=["department"])
print("Large Departments")
large_deparments = jobs_data.value_counts("department").head(10).index.to_list()
print("\n".join(large_deparments))


Large Departments
Ministry of Defence
HM Revenue and Customs
HM Prison & Probation Service
Home Office
Foreign, Commonwealth & Development Office
Ministry of Justice
Cabinet Office
Department for Education
Scottish Government
Department for Work and Pensions


jobs.pq comprises a recent extract of the jobs posted on civil service jobs. Each job 
description contains tasks which can be extracted using a LLM. Each task can also be graded 
according to its potential to be automated with technologies like genAI. Focussing on the 
larger departments, shown above, we can extract a comprehensive list of tasks. 

In [None]:
from typing import Dict 
from extraction import create_job, skewer_department
import boto3  
from dotenv import load_dotenv
load_dotenv()
boto3.setup_default_session()
session = boto3.Session()

job_arns: Dict[str,str] = {}
for department in large_deparments:
    kebab = skewer_department(department)
    department_jobs = jobs_data.loc[jobs_data['department'] == department].copy()
    job_arns.update({kebab: create_job(department_jobs, kebab, "trial-1", session) })





In [None]:
from pprint import pprint
pprint(job_arns)

job_arns = {'cabinet-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/jmeegk5rpit7',
 'department-for-education': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/7jgo6har1v2k',
 'department-for-work-and-pensions': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/1d5s3tp7cuje',
 'foreign-commonwealth-development-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/e4s08jqd6jtt',
 'hm-prison-probation-service': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/0et0ecsnl2mr',
 'hm-revenue-and-customs': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/zmrn780b7af9',
 'home-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/qupta2en20yg',
 'ministry-of-defence': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/vxub08igmtcv',
 'ministry-of-justice': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/133yd2h4amyo',
 'scottish-government': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/hy9y2ah1kzx1'}

{'cabinet-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/jmeegk5rpit7',
 'department-for-education': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/7jgo6har1v2k',
 'department-for-work-and-pensions': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/1d5s3tp7cuje',
 'foreign-commonwealth-development-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/e4s08jqd6jtt',
 'hm-prison-probation-service': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/0et0ecsnl2mr',
 'hm-revenue-and-customs': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/zmrn780b7af9',
 'home-office': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/qupta2en20yg',
 'ministry-of-defence': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/vxub08igmtcv',
 'ministry-of-justice': 'arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/133yd2h4amyo',
 'scottish-government': 'arn:aws:bedrock:us-east-1:992382722318:

In [None]:
from llmbo import StructuredBatchInferer
from extraction import TaskOutput


session = boto3.Session()
instances = {}
for department, job_arn in job_arns.items():
    sbi = StructuredBatchInferer.recover_structured_job(
            job_arn=job_arn, 
            region='us-east-1',
            output_model=TaskOutput,
            session=session, 
        )
    
    sbi.download_results()
    sbi.load_results()

    instances.update({
        department: sbi
    })



2025-02-18 13:09:18,531 - llmbo.llmbo.StructuredBatchInferer - INFO - Attempting to Recover BatchInferer from arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/vxub08igmtcv
2025-02-18 13:09:18,542 - botocore.credentials - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-02-18 13:09:31,853 - llmbo.llmbo.StructuredBatchInferer - INFO - Initialized StructuredBatchInferer with TaskOutput schema
2025-02-18 13:09:31,854 - llmbo.llmbo.StructuredBatchInferer - INFO - Intialising BatchInferer
2025-02-18 13:09:32,809 - llmbo.llmbo.StructuredBatchInferer - INFO - Role 'BatchInferenceRole' exists.
2025-02-18 13:09:32,825 - llmbo.llmbo.StructuredBatchInferer - INFO - Initialized BatchInferer
2025-02-18 13:09:32,826 - llmbo.llmbo.StructuredBatchInferer - INFO - Job arn:aws:bedrock:us-east-1:992382722318:model-invocation-job/vxub08igmtcv is already Completed
2025-02-18 13:09:32,826 - llmbo.llmbo.StructuredBatchInferer - INFO - Job:arn:aws:bedrock:us-east-1:992382

vacancy_id=24
vacancy_id=26
vacancy_id=27
vacancy_id=30
vacancy_id=37
vacancy_id=246
vacancy_id=358
vacancy_id=443
vacancy_id=653
vacancy_id=6121
vacancy_id=6129
vacancy_id=6139
vacancy_id=6151
vacancy_id=6158
vacancy_id=6161
vacancy_id=6164
vacancy_id=6175
vacancy_id=6178
vacancy_id=6180
vacancy_id=6182
vacancy_id=6187
vacancy_id=6190
vacancy_id=6191
vacancy_id=6195
vacancy_id=6196
vacancy_id=6199
vacancy_id=6202
vacancy_id=6203
vacancy_id=6207
vacancy_id=6211
vacancy_id=6224
vacancy_id=6229
vacancy_id=6232
vacancy_id=6247
vacancy_id=6250
vacancy_id=6261
vacancy_id=6265
vacancy_id=6274
vacancy_id=6276
vacancy_id=6286
vacancy_id=6288
vacancy_id=6291
vacancy_id=6308
vacancy_id=6323
vacancy_id=6324
vacancy_id=6325
vacancy_id=6326
vacancy_id=6328
vacancy_id=6329
vacancy_id=6330
vacancy_id=6331
vacancy_id=6359
vacancy_id=6364
vacancy_id=6369
vacancy_id=6405
vacancy_id=6410
vacancy_id=6411
vacancy_id=6419
vacancy_id=6434
vacancy_id=6440
vacancy_id=6441
vacancy_id=6451
vacancy_id=6454
vacanc