In [1]:
import pandas as pd
import seaborn as sns
import json
import os
import numpy as np

def set_custom_seaborn_style():
    custom_params = {
        "axes.facecolor": "white",
        "grid.color": "#e6e6e6",
        "grid.linestyle": "--",
        "axes.edgecolor": "#cccccc",
        "axes.labelsize": 14,
        "axes.titlesize": 16,
        "xtick.labelsize": 12,
        "ytick.labelsize": 12,
        "xtick.color": "#555555",
        "ytick.color": "#555555",
        "legend.frameon": False,
        "legend.fontsize": 12,
        "font.family": "sans-serif",
        "font.sans-serif": ["Arial", "Helvetica", "DejaVu Sans"],
    }
    sns.set_theme(style="whitegrid", rc=custom_params)

set_custom_seaborn_style()

In [2]:
labels_je = pd.read_csv('../../data/manual_automation_labels/business_and_financial_operations_occupations_JE.csv')
labels_mr  = pd.read_csv('../../data/manual_automation_labels/business_and_financial_operations_occupations_MR.csv')

# read in data
df = pd.read_csv('../../data/task_lists/business_and_financial_operations_occupations.csv')
try:
    df =df .drop(['label_JE','label_MR'],axis=1)
except:
    pass

In [3]:
df.shape

(1008, 10)

In [4]:
df_all = df.merge(labels_je[['task_id','label_JE']], on='task_id').\
    merge(labels_mr[['task_id','label_MR']], on='task_id')
df_all['label_MR'] = df_all['label_MR'].astype(int)

In [5]:
# filter only CORE tasks
df_all = df_all[df_all['task_type']=='Core']

In [6]:
np.sum(df_all['label_JE']==df_all['label_MR'])/df_all.shape[0]

np.float64(0.7354330708661417)

In [59]:
label_overlap_counts = df_all.rename(columns={'label_JE':'Labeler \#1', 'label_MR': 'Labeler \#2'}).groupby(['Labeler \#1','Labeler \#2']).size().unstack()
label_overlap = pd.concat([pd.Series(['','']),label_overlap_counts], axis=1)
label_overlap.index.name ='Labeler \#1'
label_overlap.columns = ['Labler \#2','0','1']

latex_table = label_overlap.to_latex(index=True, 
column_format="llcc", 
caption="Label Overlap", label="tab:label_overlap")

# Save to file
with open("../../results/tables/label_overlap.tex", "w") as f:
    f.write(latex_table)

#print(latex_table)  # Display LaTeX output

In [53]:
label_overlap_counts = df_all.rename(columns={'label_JE':'Labeler \#1', 'label_MR': 'Labeler \#2'}).groupby(['Labeler \#1','Labeler \#2']).size().unstack()
label_overlap_perc = np.round(label_overlap_counts/label_overlap_counts.sum().sum()*100,2).reset_index().drop('Labeler \#1',axis=1)

In [60]:
pd.concat([label_overlap, label_overlap_perc], axis=1)

Unnamed: 0,Labler \#2,0,1,0.1,1.1
0,,200,78,31.5,12.28
1,,90,267,14.17,42.05


In [35]:
label_overlap

Labeler \#2,0,0,1
Labeler \#1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,200,78
1,,90,267


In [104]:
df_all[df_all['label_MR']==0]

Unnamed: 0.1,Unnamed: 0,soc_code,task_id,task,task_type,title,gpt4_exposure_alt_rubric,SOC-2digit,SOC Title,label_JE,label_MR
14,1284,13-1023.00,1153.0,Study sales records and inventory levels of cu...,Core,"Purchasing Agents, Except Wholesale, Retail, a...",E2,13,Business and Financial Operations Occupations,1,0
32,1320,13-1032.00,7249.0,Evaluate practicality of repair as opposed to ...,Core,"Insurance Appraisers, Auto Damage",E0,13,Business and Financial Operations Occupations,1,0
34,1324,13-1032.00,7246.0,"Estimate parts and labor to repair damage, usi...",Core,"Insurance Appraisers, Auto Damage",E0,13,Business and Financial Operations Occupations,1,0
35,1325,13-1032.00,7250.0,Determine salvage value on total-loss vehicle.,Core,"Insurance Appraisers, Auto Damage",E0,13,Business and Financial Operations Occupations,1,0
41,1343,13-1041.01,10829.0,Determine the nature of code violations and ac...,Core,Environmental Compliance Inspectors,E0,13,Business and Financial Operations Occupations,1,0
...,...,...,...,...,...,...,...,...,...,...,...
383,2205,13-2099.01,15985.0,Collaborate in the development or testing of n...,Core,Financial Quantitative Analysts,E2,13,Business and Financial Operations Occupations,1,0
384,2211,13-2099.04,16053.0,Document all investigative activities.,Core,"Fraud Examiners, Investigators and Analysts",E1,13,Business and Financial Operations Occupations,1,0
387,2216,13-2099.04,16045.0,Review reports of suspected fraud to determine...,Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,1,0
388,2219,13-2099.04,16038.0,Prepare evidence for presentation in court.,Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,1,0


In [105]:
df_all[df_all['label_JE']==0]

Unnamed: 0.1,Unnamed: 0,soc_code,task_id,task,task_type,title,gpt4_exposure_alt_rubric,SOC-2digit,SOC Title,label_JE,label_MR
0,1226,13-1011.00,12865.0,"Collect fees, commissions, or other payments, ...",Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,1
3,1244,13-1021.00,12883.0,Maintain records of business transactions and ...,Core,"Buyers and Purchasing Agents, Farm Products",E1,13,Business and Financial Operations Occupations,0,1
9,1275,13-1023.00,1142.0,Purchase the highest quality merchandise at th...,Core,"Purchasing Agents, Except Wholesale, Retail, a...",E2,13,Business and Financial Operations Occupations,0,1
16,1286,13-1023.00,1152.0,"Review catalogs, industry periodicals, directo...",Core,"Purchasing Agents, Except Wholesale, Retail, a...",E2,13,Business and Financial Operations Occupations,0,1
18,1289,13-1023.00,1155.0,Arrange the payment of duty and freight charges.,Core,"Purchasing Agents, Except Wholesale, Retail, a...",E2,13,Business and Financial Operations Occupations,0,1
21,1293,13-1031.00,21426.0,Pay and process claims within designated autho...,Core,"Claims Adjusters, Examiners, and Investigators",E0,13,Business and Financial Operations Occupations,0,1
22,1294,13-1031.00,21423.0,"Investigate, evaluate, and settle claims, appl...",Core,"Claims Adjusters, Examiners, and Investigators",E2,13,Business and Financial Operations Occupations,0,1
25,1300,13-1031.00,21429.0,"Enter claim payments, reserves and new claims ...",Core,"Claims Adjusters, Examiners, and Investigators",E1,13,Business and Financial Operations Occupations,0,1
26,1301,13-1031.00,21425.0,"Resolve complex, severe exposure claims, using...",Core,"Claims Adjusters, Examiners, and Investigators",E2,13,Business and Financial Operations Occupations,0,1
27,1302,13-1031.00,21424.0,Adjust reserves or provide reserve recommendat...,Core,"Claims Adjusters, Examiners, and Investigators",E2,13,Business and Financial Operations Occupations,0,1


In [102]:
df_all[(df_all['label_JE']==1) + (df_all['label_MR']==1)]

Unnamed: 0.1,Unnamed: 0,soc_code,task_id,task,task_type,title,gpt4_exposure_alt_rubric,SOC-2digit,SOC Title,label_JE,label_MR
0,1226,13-1011.00,12865.0,"Collect fees, commissions, or other payments, ...",Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,1
1,1227,13-1011.00,21163.0,Send samples of clients' work and other promot...,Core,"Agents and Business Managers of Artists, Perfo...",E2,13,Business and Financial Operations Occupations,1,1
2,1228,13-1011.00,12870.0,Keep informed of industry trends and deals.,Core,"Agents and Business Managers of Artists, Perfo...",E2,13,Business and Financial Operations Occupations,1,1
3,1244,13-1021.00,12883.0,Maintain records of business transactions and ...,Core,"Buyers and Purchasing Agents, Farm Products",E1,13,Business and Financial Operations Occupations,0,1
4,1245,13-1021.00,12882.0,Review orders to determine product types and q...,Core,"Buyers and Purchasing Agents, Farm Products",E2,13,Business and Financial Operations Occupations,1,1
...,...,...,...,...,...,...,...,...,...,...,...
389,2222,13-2099.04,16047.0,Recommend actions in fraud cases.,Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,1,0
390,2223,13-2099.04,16052.0,Evaluate business operations to identify risk ...,Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,1,1
391,2224,13-2099.04,16054.0,"Create and maintain logs, records, or database...",Core,"Fraud Examiners, Investigators and Analysts",E1,13,Business and Financial Operations Occupations,1,1
392,2225,13-2099.04,16035.0,Maintain knowledge of current events and trend...,Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,1,1


In [87]:
df_all[(df_all['label_MR']==0)*(df_all['label_JE']==0)]

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,soc_code,task_id,task,task_type,title,gpt4_exposure_alt_rubric,SOC-2digit,SOC Title,label_JE,label_MR
3,1230,1230,13-1011.00,12869.0,"Negotiate with managers, promoters, union offi...",Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,0
4,1231,1231,13-1011.00,12866.0,Confer with clients to develop strategies for ...,Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,0
5,1232,1232,13-1011.00,12867.0,Develop contacts with individuals and organiza...,Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,0
6,1233,1233,13-1011.00,12868.0,Schedule promotional or performance engagement...,Core,"Agents and Business Managers of Artists, Perfo...",E2,13,Business and Financial Operations Occupations,0,0
7,1234,1234,13-1011.00,12873.0,Arrange meetings concerning issues involving t...,Core,"Agents and Business Managers of Artists, Perfo...",E0,13,Business and Financial Operations Occupations,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
922,2215,2215,13-2099.04,16049.0,Interview witnesses or suspects and take state...,Core,"Fraud Examiners, Investigators and Analysts",E0,13,Business and Financial Operations Occupations,0,0
925,2218,2218,13-2099.04,16048.0,"Lead, or participate in, fraud investigation t...",Core,"Fraud Examiners, Investigators and Analysts",E2,13,Business and Financial Operations Occupations,0,0
927,2220,2220,13-2099.04,16055.0,Coordinate investigative efforts with law enfo...,Core,"Fraud Examiners, Investigators and Analysts",E0,13,Business and Financial Operations Occupations,0,0
933,2227,2227,13-2099.04,16040.0,Negotiate with responsible parties to arrange ...,Core,"Fraud Examiners, Investigators and Analysts",E1,13,Business and Financial Operations Occupations,0,0


In [88]:
np.sum(df_all['label_JE']==0)

np.int64(425)

In [87]:
path_to_data = '../../data/exam_approach/material_lists/materials_T4_gpt3.csv'
file_name = '_'.join(path_to_data.split('/')[-1].split('_')[1:])


In [89]:
test = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/task_lists/E1E2_OR_E1E2/business_and_financial_operations_occupations.csv')

In [91]:
test[['task_id','task','title','task_type']].to_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/task_lists/E1E2_OR_E1E2/business_and_financial_operations_occupations_for_labeling.csv')

In [69]:
path_to_data = '../../data/benchmarking_gpt_judgment/task_lists/E1E2_OR_E1E2/all_tasks.csv'
df = pd.read_csv(path_to_data)

In [71]:
df.columns

Index(['Unnamed: 0', 'O*NET-SOC Code', 'Task ID', 'Task', 'Task Type', 'Title',
       'human_exposure', 'gpt4_exposure', 'gpt4_alt_exposure',
       'gpt_3_relevant', 'gpt4_automation', 'alpha', 'beta', 'gamma',
       'automation', 'human_labels'],
      dtype='object')

In [72]:
df[df['Task']=='Translate or request translation of reference materials.']

Unnamed: 0.1,Unnamed: 0,O*NET-SOC Code,Task ID,Task,Task Type,Title,human_exposure,gpt4_exposure,gpt4_alt_exposure,gpt_3_relevant,gpt4_automation,alpha,beta,gamma,automation,human_labels
4553,5090,19-3093.00,3731.0,Translate or request translation of reference ...,Supplemental,Historians,E1,E1,E1,False,T4,1.0,1.0,1.0,1.0,E1


In [26]:
test_lower = test[50:].rename(columns={'row':'prompt','prompt':'task_id','task_id':'occupation','occupation':'task_description','task_description':'automation_exposure','automation_exposure':'reasoning','reasoning':'output','output':'output2'})
test_lower = test_lower.drop('output2',axis=1)

In [49]:
test_lower

Unnamed: 0.1,Unnamed: 0,prompt,task_id,occupation,task_description,automation_exposure,reasoning,output
50,9341,\n\n## Rubric\n\nPlease label the given task a...,13238.0,"Door-to-Door Sales Workers, News and Street Ve...",Develop prospect lists.,T3,Conversational AI agents can assist significan...,"```json\n{\n ""task_id"": ""13238.0"",\n ""occupa..."
51,970,\n\n## Rubric\n\nPlease label the given task a...,18039.0,Regulatory Affairs Managers,Train staff in regulatory policies or procedures.,T1,While conversational AI agents can assist in p...,"```json\n{\n ""task_id"": ""18039.0"",\n ""occupa..."
52,9628,\n\n## Rubric\n\nPlease label the given task a...,18565.0,Customer Service Representatives,Confer with customers by telephone or in perso...,T2,Conversational AI agents can handle a signific...,"```json\n{\n ""task_id"": ""18565.0"",\n ""occupa..."
53,8123,\n\n## Rubric\n\nPlease label the given task a...,17507.0,Genetic Counselors,Interpret laboratory results and communicate f...,T1,While conversational AI agents can assist in i...,"```json\n{\n ""task_id"": ""17507.0"",\n ""occupa..."
54,11435,\n\n## Rubric\n\nPlease label the given task a...,14545.0,Motorboat Operators,Organize and direct the activities of crew mem...,T0,This task requires real-time human interaction...,"```json\n{\n ""task_id"": ""14545.0"",\n ""occupa..."
...,...,...,...,...,...,...,...,...
295,4707,\n\n## Rubric\n\nPlease label the given task a...,22308.0,Nuclear Technicians,Follow nuclear equipment operational policies ...,T1,While conversational AI agents can assist by p...,"```json\n{\n ""task_id"": ""22308.0"",\n ""occupa..."
296,6379,\n\n## Rubric\n\nPlease label the given task a...,292.0,Art Directors,Review illustrative material to determine if i...,T2,Conversational AI agents can assist in reviewi...,"```json\n{\n ""task_id"": ""292.0"",\n ""occupati..."
297,11484,\n\n## Rubric\n\nPlease label the given task a...,23852.0,Transportation Inspectors,"Record details about freight conditions, handl...",T2,Conversational AI agents can assist in recordi...,"```json\n{\n ""task_id"": ""23852.0"",\n ""occupa..."
298,11183,\n\n## Rubric\n\nPlease label the given task a...,15121.0,"Potters, Manufacturing","Design clay forms and molds, and decorations f...",T1,While conversational AI agents can assist in g...,"```json\n{\n ""task_id"": ""15121.0"",\n ""occupa..."


In [None]:
test_new = pd.concat([test[0:50],test_lower],axis=0)
test_new.to_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/benchmarking_gpt_judgment/automation_labels/random100.csv')

In [51]:
test_new.groupby('automation_exposure').size()

automation_exposure
T0     28
T1    113
T2     94
T3     64
T4      1
dtype: int64

In [53]:
test_new[test_new['automation_exposure']=='T4']

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,automation_exposure,reasoning,output
187,9550,,\n\n## Rubric\n\nPlease label the given task a...,2575.0,Tellers,"Quote unit exchange rates, following daily int...",T4,Conversational AI agents can easily access and...,"```json\n{\n ""task_id"": ""2575.0"",\n ""occupat..."


In [47]:
import numpy as np
np.array(test_new['automation_exposure'])

array(['T2', 'T3', 'T2', 'T1', 'T1', 'T2', 'T1', 'T1', 'T2', 'T3', 'T1',
       'T2', 'T2', 'T2', 'T0', 'T2', 'T2', 'T2', 'T1', 'T1', 'T0', 'T1',
       'T1', 'T2', 'T3', 'T0', 'T2', 'T1', 'T1', 'T2', 'T2', 'T3', 'T1',
       'T0', 'T2', 'T1', 'T3', 'T0', 'T0', 'T2', 'T2', 'T2', 'T1', 'T2',
       'T3', 'T2', 'T2', 'T3', 'T2', 'T2',
       'Conversational AI agents can assist significantly in developing prospect lists by analyzing data, identifying potential leads, and organizing information. However, human oversight is necessary to ensure the quality and relevance of the prospects, as well as to incorporate any nuanced understanding of the market or personal connections that the AI might not be aware of. Therefore, while AI can handle a large portion of the task, human judgment is still required to finalize the list.',
       'While conversational AI agents can assist in providing information and resources about regulatory policies, the task of training staff involves significant huma

In [56]:
gpts_automation = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/external/gpts-are-gpts/full_labelset.tsv', sep='\t')
llms_new = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/benchmarking_gpt_judgment/T4_tasks_automatisation.csv')
gpt_new = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/benchmarking_gpt_judgment/T4_tasks_automatisation_gpt.csv')

In [59]:
gpt_new[gpt_new['automation_exposure']=='T4']

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,automation_exposure,reasoning,output
3,3,3,\nLabel the occupation/task with one of the fo...,15709.0,Online Merchants,Deliver e-mail confirmation of completed trans...,T4,The task of delivering e-mail confirmations fo...,"```json\n{\n ""task_id"": ""15709.0"",\n ""occupa..."
4,4,4,\nLabel the occupation/task with one of the fo...,15730.0,Online Merchants,"Calculate purchase subtotals, taxes, and shipp...",T4,This task involves straightforward calculation...,"```json\n{\n ""task_id"": ""15730.0"",\n ""occupa..."
5,5,5,\nLabel the occupation/task with one of the fo...,15715.0,Online Merchants,"Transfer digital media, such as music, video, ...",T4,The task of transferring digital media to cust...,"```json\n{\n ""task_id"": ""15715.0"",\n ""occupa..."
9,9,9,\nLabel the occupation/task with one of the fo...,5298.0,"Tax Examiners and Collectors, and Revenue Agents",Enter tax return information into computers fo...,T4,The task of entering tax return information in...,"```json\n{\n ""task_id"": ""5298.0"",\n ""occupat..."
11,11,11,\nLabel the occupation/task with one of the fo...,14705.0,Web Developers,Renew domain name registrations.,T4,Renewing domain name registrations is a task t...,"```json\n{\n ""task_id"": ""14705.0"",\n ""occupa..."
15,15,15,\nLabel the occupation/task with one of the fo...,20981.0,Biological Technicians,Input data into databases.,T4,The task of inputting data into databases is p...,"```json\n{\n ""task_id"": ""20981.0"",\n ""occupa..."
55,55,55,\nLabel the occupation/task with one of the fo...,4627.0,Telemarketers,"Maintain records of contacts, accounts, and or...",T4,"Maintaining records of contacts, accounts, and...","```json\n{\n ""task_id"": ""4627.0"",\n ""occupat..."
65,65,65,\nLabel the occupation/task with one of the fo...,13257.0,Telephone Operators,Update directory information.,T4,Updating directory information is a task that ...,"```json\n{\n ""task_id"": ""13257.0"",\n ""occupa..."
66,66,66,\nLabel the occupation/task with one of the fo...,13258.0,Telephone Operators,"Keep records of calls placed and received, and...",T4,The task of keeping records of calls placed an...,"```json\n{\n ""task_id"": ""13258.0"",\n ""occupa..."
75,75,75,\nLabel the occupation/task with one of the fo...,2575.0,Tellers,"Quote unit exchange rates, following daily int...",T4,This task involves quoting unit exchange rates...,"```json\n{\n ""task_id"": ""2575.0"",\n ""occupat..."


In [60]:
gpt_new[gpt_new['automation_exposure']=='T4']

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,automation_exposure,reasoning,output
3,3,3,\nLabel the occupation/task with one of the fo...,15709.0,Online Merchants,Deliver e-mail confirmation of completed trans...,T4,The task of delivering e-mail confirmations fo...,"```json\n{\n ""task_id"": ""15709.0"",\n ""occupa..."
4,4,4,\nLabel the occupation/task with one of the fo...,15730.0,Online Merchants,"Calculate purchase subtotals, taxes, and shipp...",T4,This task involves straightforward calculation...,"```json\n{\n ""task_id"": ""15730.0"",\n ""occupa..."
5,5,5,\nLabel the occupation/task with one of the fo...,15715.0,Online Merchants,"Transfer digital media, such as music, video, ...",T4,The task of transferring digital media to cust...,"```json\n{\n ""task_id"": ""15715.0"",\n ""occupa..."
9,9,9,\nLabel the occupation/task with one of the fo...,5298.0,"Tax Examiners and Collectors, and Revenue Agents",Enter tax return information into computers fo...,T4,The task of entering tax return information in...,"```json\n{\n ""task_id"": ""5298.0"",\n ""occupat..."
11,11,11,\nLabel the occupation/task with one of the fo...,14705.0,Web Developers,Renew domain name registrations.,T4,Renewing domain name registrations is a task t...,"```json\n{\n ""task_id"": ""14705.0"",\n ""occupa..."
15,15,15,\nLabel the occupation/task with one of the fo...,20981.0,Biological Technicians,Input data into databases.,T4,The task of inputting data into databases is p...,"```json\n{\n ""task_id"": ""20981.0"",\n ""occupa..."
55,55,55,\nLabel the occupation/task with one of the fo...,4627.0,Telemarketers,"Maintain records of contacts, accounts, and or...",T4,"Maintaining records of contacts, accounts, and...","```json\n{\n ""task_id"": ""4627.0"",\n ""occupat..."
65,65,65,\nLabel the occupation/task with one of the fo...,13257.0,Telephone Operators,Update directory information.,T4,Updating directory information is a task that ...,"```json\n{\n ""task_id"": ""13257.0"",\n ""occupa..."
66,66,66,\nLabel the occupation/task with one of the fo...,13258.0,Telephone Operators,"Keep records of calls placed and received, and...",T4,The task of keeping records of calls placed an...,"```json\n{\n ""task_id"": ""13258.0"",\n ""occupa..."
75,75,75,\nLabel the occupation/task with one of the fo...,2575.0,Tellers,"Quote unit exchange rates, following daily int...",T4,This task involves quoting unit exchange rates...,"```json\n{\n ""task_id"": ""2575.0"",\n ""occupat..."


In [None]:
llms_new[llms_new['automation_exposure']=='T4']

(46, 9)

In [None]:
llms_new[llms_new['automation_exposure']!='T4']

(86, 9)

In [48]:
gpts_automation.merge(llms_new[['task_id','automation_exposure']], how='left', left_on ='Task ID', right_on='task_id')

Unnamed: 0.1,Unnamed: 0,O*NET-SOC Code,Task ID,Task,Task Type,Title,human_exposure_agg,gpt4_exposure,gpt4_exposure_alt_rubric,gpt_3_relevant,gpt4_automation,alpha,beta,gamma,automation,human_labels,task_id,automation_exposure
0,0,11-1011.00,8823.0,Direct or coordinate an organization's financi...,Core,Chief Executives,E0,E2,E2,False,T2,0.0,0.5,1.0,0.50,E0,,
1,1,11-1011.00,8831.0,Appoint department heads or managers and assig...,Core,Chief Executives,E0,E0,E0,False,T1,0.0,0.0,0.0,0.25,E0,,
2,2,11-1011.00,8825.0,Analyze operations to evaluate performance of ...,Core,Chief Executives,E2,E2,E2,False,T2,0.0,0.5,1.0,0.50,E2,,
3,3,11-1011.00,8826.0,"Direct, plan, or implement policies, objective...",Core,Chief Executives,E0,E2,E0,False,T1,0.0,0.5,1.0,0.25,E0,,
4,4,11-1011.00,8827.0,"Prepare budgets for approval, including those ...",Core,Chief Executives,E2,E2,E2,False,T2,0.0,0.5,1.0,0.50,E2,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19260,19260,53-7121.00,12807.0,Unload cars containing liquids by connecting h...,Supplemental,"Tank Car, Truck, and Ship Loaders",E0,E0,E0,False,T0,0.0,0.0,0.0,0.00,E0,,
19261,19261,53-7121.00,12804.0,"Clean interiors of tank cars or tank trucks, u...",Supplemental,"Tank Car, Truck, and Ship Loaders",E0,E0,E0,False,T0,0.0,0.0,0.0,0.00,E0,,
19262,19262,53-7121.00,12803.0,Lower gauge rods into tanks or read meters to ...,Supplemental,"Tank Car, Truck, and Ship Loaders",E0,E0,E0,False,T0,0.0,0.0,0.0,0.00,E0,,
19263,19263,53-7121.00,12805.0,Operate conveyors and equipment to transfer gr...,Supplemental,"Tank Car, Truck, and Ship Loaders",E0,E0,E0,False,T0,0.0,0.0,0.0,0.00,E0,,


In [31]:
org2= pd.read_csv('/Users/htr365/Downloads/full_labelset.tsv', sep='\t')

In [29]:
org.groupby('gpt4_automation').size()

gpt4_automation
T0    5746
T1    6284
T2    3224
T3    3879
T4     132
dtype: int64

In [33]:
org[org['gpt4_automation']=='T4'].to_csv("/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/task_lists/T4_tasks.csv")

In [35]:
test = pd.read_csv("/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/task_lists/T4_tasks.csv")

In [38]:
test

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,O*NET-SOC Code,Task ID,Task,Task Type,Title,human_exposure_agg,gpt4_exposure,gpt4_exposure_alt_rubric,gpt_3_relevant,gpt4_automation,alpha,beta,gamma,automation,human_labels
0,790,790,11-9051.00,1102.0,Take dining reservations.,Supplemental,Food Service Managers,E1,E2,E2,False,T4,0.0,0.5,1.0,1.0,E1
1,953,953,11-9131.00,5267.0,Collect rents for post office boxes.,Supplemental,Postmasters and Mail Superintendents,E0,E0,E0,False,T4,0.0,0.0,0.0,1.0,E0
2,1845,1845,13-1199.06,15731.0,"Receive and process payments from customers, u...",Core,Online Merchants,E0,E0,E0,False,T4,0.0,0.0,0.0,1.0,E0
3,1847,1847,13-1199.06,15709.0,Deliver e-mail confirmation of completed trans...,Core,Online Merchants,E1,E1,E1,True,T4,1.0,1.0,1.0,1.0,E1
4,1851,1851,13-1199.06,15730.0,"Calculate purchase subtotals, taxes, and shipp...",Core,Online Merchants,E2,E1,E0,True,T4,1.0,1.0,1.0,1.0,E2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,18648,18648,53-3054.00,23772.0,Turn the taximeter on when passengers enter th...,,Taxi Drivers,E0,E0,E0,False,T4,0.0,0.0,0.0,1.0,E0
128,18745,18745,53-4041.00,12771.0,"Make announcements to passengers, such as noti...",Core,Subway and Streetcar Operators,E2,E1,E1,False,T4,1.0,1.0,1.0,1.0,E2
129,19000,19000,53-6061.00,13181.0,Issue and collect passenger boarding passes an...,Supplemental,Passenger Attendants,E0,E0,E0,False,T4,0.0,0.0,0.0,1.0,E0
130,19022,19022,53-7011.00,10764.0,Press console buttons to deflect packages to p...,Supplemental,Conveyor Operators and Tenders,E0,E0,E0,False,T4,0.0,0.0,0.0,1.0,E0


In [3]:
occupation_group = "Business and Financial Operations Occupations"
df = pd.read_csv(f'../../data/real_life_examples/examples/{occupation_group.replace(" ", "_").lower()}.csv')

In [7]:
df.columns

Index(['Unnamed: 0', 'row', 'prompt', 'task_id', 'occupation',
       'task_description', 'e1_description', 'e2_description',
       'e3_description', 'e1_remote_possible', 'e2_remote_possible',
       'e3_remote_possible', 'e1_input_necessary', 'e2_input_necessary',
       'e3_input_necessary', 'e1_required_tools', 'e2_required_tools',
       'e3_required_tools', 'e1_required_materials', 'e2_required_materials',
       'e3_required_materials', 'e1_output', 'e2_output', 'e3_output',
       'output'],
      dtype='object')

In [14]:
df.shape[0]

330

In [13]:
df[(df['e1_input_necessary'] == df['e2_input_necessary'])*(df['e2_input_necessary'] == df['e3_input_necessary'])].shape[0]

262

In [19]:
df_no_input = df[(df['e1_input_necessary'] == False) | (df['e3_input_necessary'] == False ) | (df['e3_input_necessary'] == False)]

In [21]:
df_no_input.head()

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,e1_description,e2_description,e3_description,e1_remote_possible,...,e1_required_tools,e2_required_tools,e3_required_tools,e1_required_materials,e2_required_materials,e3_required_materials,e1_output,e2_output,e3_output,output
1,1,1,\n\n1. **Generate cases **\n\nDescribe three c...,21163.0,"Agents and Business Managers of Artists, Perfo...",Send samples of clients' work and other promot...,An agent is tasked with sending a portfolio of...,A business manager is responsible for sending ...,An agent is tasked with sending a highlight re...,True,...,"['Image editing software', 'Document editor', ...","['Audio editing software', 'Document editor', ...","['Video editing software', 'Document editor', ...","['Images', 'Text documents']","['Audio files', 'Text documents']","['Video files', 'Text documents']","['Sent emails', 'Text documents']","['Sent emails', 'Text documents']","['Sent emails', 'Text documents']","```json\n{\n ""task_id"": ""21163.0"",\n ""occupa..."
11,11,11,\n\n1. **Generate cases **\n\nDescribe three c...,19548.0,"Wholesale and Retail Buyers, Except Farm Products",Compare transportation options to determine th...,A retail buyer is tasked with selecting the mo...,A wholesale buyer needs to determine the most ...,A retail buyer is responsible for choosing the...,True,...,"['Spreadsheets', 'Data analysis software', 'Co...","['Spreadsheets', 'Data analysis software', 'Co...","['Spreadsheets', 'Data analysis software']","['Data files', 'Information']","['Data files', 'Information']","['Data files', 'Information']","['Spreadsheets', 'Text documents']","['Spreadsheets', 'Text documents']","['Spreadsheets', 'Text documents']","```json\n{\n ""task_id"": ""19548.0"",\n ""occupa..."
17,17,17,\n\n1. **Generate cases **\n\nDescribe three c...,21417.0,"Claims Adjusters, Examiners, and Investigators",Examine claims forms and other records to dete...,A homeowner files a claim for water damage cau...,An individual files a claim for a car accident...,A business files a claim for loss of income du...,True,...,"['Spreadsheets', 'Text Editor']","['Spreadsheets', 'Text Editor', 'Communication...","['Spreadsheets', 'Text Editor', 'Communication...","['Data Files', 'PDFs']","['Data Files', 'PDFs', 'Images']","['Data Files', 'PDFs', 'Information']","['Spreadsheets', 'Text Documents']","['Spreadsheets', 'Text Documents', 'Sent Emails']","['Spreadsheets', 'Text Documents', 'Sent Emails']","```json\n{\n ""task_id"": ""21417.0"",\n ""occupa..."
36,36,36,\n\n1. **Generate cases **\n\nDescribe three c...,10833.0,Environmental Compliance Inspectors,"Prepare written, oral, tabular, and graphic re...",An inspector is tasked with evaluating a manuf...,An inspector investigates a complaint about il...,An inspector conducts a routine inspection of ...,False,...,"['Spreadsheets', 'Text editor', 'Presentation ...","['Spreadsheets', 'Text editor', 'Image editor']","['Spreadsheets', 'Text editor', 'Presentation ...","['Data files', 'PDFs', 'Images']","['Data files', 'PDFs', 'Images']","['Data files', 'PDFs', 'Images']","['Text documents', 'Spreadsheets', 'Presentati...","['Text documents', 'Spreadsheets', 'Images']","['Text documents', 'Spreadsheets', 'Presentati...","```json\n{\n ""task_id"": ""10833.0"",\n ""occupa..."
37,37,37,\n\n1. **Generate cases **\n\nDescribe three c...,10844.0,Environmental Compliance Inspectors,Evaluate label information for accuracy and co...,Inspecting food product labels to ensure they ...,Reviewing chemical product labels to verify co...,Assessing pesticide labels to confirm they adh...,True,...,"['Spreadsheets', 'Text Editor']","['Spreadsheets', 'Text Editor']","['Spreadsheets', 'Text Editor']","['PDFs', 'Images']","['PDFs', 'Images']","['PDFs', 'Images']","['Text Documents', 'Spreadsheets']","['Text Documents', 'Spreadsheets']","['Text Documents', 'Spreadsheets']","```json\n{\n ""task_id"": ""10844.0"",\n ""occupa..."


In [None]:
print(" There are ", len(df['title'].unique()), " occupations in this group.")
print("There are ", df.shape[0], " task pertaining to ", occupation_group)
print("On average an occupation has ", df['title'].value_counts().mean(), ' tasks listed.')
print("The minimum number of tasks is ", df['title'].value_counts().min())
print("The maximum number of tasks is ", df['title'].value_counts().max())
print("Share of Core vs. Supplemental tasks ", df['task_type'].value_counts()/df.shape[0])

 There are  47  occupations in this group.
There are  330  task pertaining to  Business and Financial Operations Occupations


KeyError: 'title'

AttributeError: 'tuple' object has no attribute 'head'

In [None]:
pd.DataFrame(json.loads(  """{"Spreadsheets": "Spreadsheets",
    "spreadsheets": "Spreadsheets",
    "Contract documents": "Contract documents",
    "Emails": "Emails",
    "Sent emails": "Emails",
    "Text Documents": "Text Documents",
    "text documents": "Text Documents",
    "Text documents": "Text Documents",
    "Social media posts": "Social media posts",
    "Presentation slides": "Presentations",
    "Presentations": "Presentations",
    "Presentation documents": "Presentations",
    "Shared links": "Shared links",
    "Images": "Images",
    "Calendar entries": "Calendar entries"
}"""


Unnamed: 0,key,value
0,Spreadsheets,Spreadsheets
1,spreadsheets,Spreadsheets
2,Contract documents,Contract documents
3,Emails,Emails
4,Sent emails,Emails
5,Text Documents,Text Documents
6,text documents,Text Documents
7,Text documents,Text Documents
8,Social media posts,Social media posts
9,Presentation slides,Presentations


In [96]:
df = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/real_life_examples/examples_mapped/business_finance_examples.csv', index_col=0)
df.drop('output', inplace=True,axis=1)

In [93]:
df['output']

0     ```json\n{\n  "task_id": "12865.0",\n  "occupa...
1     ```json\n{\n  "task_id": "21163.0",\n  "occupa...
2     ```json\n{\n  "task_id": "12866.0",\n  "occupa...
3     ```json\n{\n  "task_id": "12873.0",\n  "occupa...
4     ```json\n{\n  "task_id": "12871.0",\n  "occupa...
5     ```json\n{\n  "task_id": "12874.0",\n  "occupa...
6     ```json\n{\n  "task_id": "12875.0",\n  "occupa...
7     ```json\n{\n  "task_id": "12886.0",\n  "occupa...
8     ```json\n{\n  "task_id": "12888.0",\n  "occupa...
9     ```json\n{\n  "task_id": "77.0",\n  "occupatio...
10    ```json\n{\n  "task_id": "19547.0",\n  "occupa...
11    ```json\n{\n  "task_id": "19548.0",\n  "occupa...
12    ```json\n{\n  "task_id": "19549.0",\n  "occupa...
13    ```json\n{\n  "task_id": "1146.0",\n  "occupat...
Name: output, dtype: object

In [None]:
# List of id columns to retain
id_vars = ['row', 'prompt', 'task_id', 'occupation', 'task_description']

# Initialize empty DataFrame
df_long = pd.DataFrame()

for value in ['description', 'required_tools', 'required_materials', 'output', 
              'required_materials_mapped', 'required_tools_mapped', 'output_mapped']:
    
    # Dynamically find matching columns for the current 'value'
    value_vars = [col for col in df.columns if value in col.lower() and col not in id_vars]
    
    if value_vars:  # Only melt if there are matching columns
        melted_df = df.melt(id_vars=id_vars, value_vars=value_vars, value_name=value)

        # Merge the melted DataFrame into df_long
        if df_long.empty:
            df_long = melted_df
        else:
            df_long[value] = melted_df[value]

df_long['variable'] = df_long['variable'].str.split('_').str[0]
return df_long[['task_id','occupation','variable','task_description','description','required_tools_mapped','required_materials_mapped','output_mapped']]

Unnamed: 0,task_id,occupation,variable,task_description,description,required_tools_mapped,required_materials_mapped,output_mapped
0,12865.0,"Agents and Business Managers of Artists, Perfo...",e1,"Collect fees, commissions, or other payments, ...",Collecting performance fees from a concert pro...,"['Email client', 'Spreadsheet software', 'Cont...","['Data files', 'PDFs', 'Information']","['Sent emails', 'Spreadsheet software', 'Text ..."
1,21163.0,"Agents and Business Managers of Artists, Perfo...",e1,Send samples of clients' work and other promot...,Sending a digital portfolio of an artist to a ...,"['Email client', 'File sharing service']","['Images', 'PDFs']","['Sent emails', 'Shared links']"
2,12866.0,"Agents and Business Managers of Artists, Perfo...",e1,Confer with clients to develop strategies for ...,An agent meets with a musician to discuss a ne...,"['Video conferencing software', 'Spreadsheet s...","['Information', 'Data files', 'PDFs']","['Sent emails', 'Spreadsheet software', 'Prese..."
3,12873.0,"Agents and Business Managers of Artists, Perfo...",e1,Arrange meetings concerning issues involving t...,Arrange a meeting with a potential sponsor to ...,"['Email client', 'Calendar application', 'Vide...","['Information', 'PDFs']","['Sent emails', 'Calendar entries']"
4,12871.0,"Agents and Business Managers of Artists, Perfo...",e1,Manage business and financial affairs for clie...,Arranging travel and lodging for a client atte...,"['Spreadsheet software', 'Email client', 'Trav...","['Data files', 'Information']","['Sent emails', 'Spreadsheet software']"
5,12874.0,"Agents and Business Managers of Artists, Perfo...",e1,Prepare periodic accounting statements for cli...,Preparing quarterly financial statements for a...,"['Spreadsheet software', 'accounting software']","['data files', 'information']","['Spreadsheet software', 'text documents']"
6,12875.0,"Agents and Business Managers of Artists, Perfo...",e1,"Advise clients on financial and legal matters,...",Advising a client on the best investment oppor...,"['Spreadsheet software', 'Video conferencing s...","['Data files', 'Information']","['Spreadsheet software', 'Sent emails']"
7,12886.0,"Buyers and Purchasing Agents, Farm Products",e1,Calculate applicable government grain quotas.,A purchasing agent needs to calculate the grai...,"['Spreadsheet software', 'Data analysis softwa...","['Data files', 'Information']","['Spreadsheet software', 'Text documents']"
8,12888.0,"Buyers and Purchasing Agents, Farm Products",e1,"Estimate land production possibilities, survey...",Analyzing historical crop rotation data to det...,"['Spreadsheet software', 'Data analysis softwa...","['Data files', 'Information']","['Spreadsheet software', 'Text documents']"
9,77.0,"Wholesale and Retail Buyers, Except Farm Products",e1,Provide clerks with information to print on pr...,Updating clerks with new seasonal pricing info...,"['Spreadsheet software', 'Email client']","['Data files', 'Information']","['Sent emails', 'Spreadsheet software']"


In [82]:
df

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,e1_description,e2_description,e3_description,e1_remote_possible,...,e1_required_tools_mapped,e2_required_tools_mapped,e3_required_tools_mapped,e1_required_materials_mapped,e2_required_materials_mapped,e3_required_materials_mapped,e1_output_mapped,e2_output_mapped,e3_output_mapped,description
0,0.0,0.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12865.0,"Agents and Business Managers of Artists, Perfo...","Collect fees, commissions, or other payments, ...",Collecting performance fees from a concert pro...,Collecting endorsement payments from a brand f...,Collecting royalties from a streaming service ...,True,...,"['Email client', 'Spreadsheet software', 'Cont...","['Email client', 'Spreadsheet software', 'Cont...","['Email client', 'Spreadsheet software', 'Cont...","['Data files', 'PDFs', 'Information']","['Data files', 'PDFs', 'Information']","['Data files', 'PDFs', 'Information']","['Sent emails', 'Spreadsheet software', 'Text ...","['Sent emails', 'Spreadsheet software', 'Text ...","['Sent emails', 'Spreadsheet software', 'Text ...",Collecting performance fees from a concert pro...
1,1.0,1.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,21163.0,"Agents and Business Managers of Artists, Perfo...",Send samples of clients' work and other promot...,Sending a digital portfolio of an artist to a ...,Submitting a highlight reel of an athlete to a...,Sending a demo tape of a musician to a record ...,True,...,"['Email client', 'File sharing service']","['Email client', 'Video editing software']","['Email client', 'Audio editing software']","['Images', 'PDFs']","['Video files', 'Information']","['Audio files', 'Information']","['Sent emails', 'Shared links']","['Sent emails', 'Shared links']","['Sent emails', 'Shared links']",Sending a digital portfolio of an artist to a ...
2,2.0,2.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12866.0,"Agents and Business Managers of Artists, Perfo...",Confer with clients to develop strategies for ...,An agent meets with a musician to discuss a ne...,A business manager consults with an athlete to...,An agent collaborates with an actor to decide ...,True,...,"['Video conferencing software', 'Spreadsheet s...","['Video conferencing software', 'Spreadsheet s...","['Video conferencing software', 'Text editor',...","['Information', 'Data files', 'PDFs']","['Information', 'Data files', 'PDFs']","['Information', 'Data files', 'PDFs']","['Sent emails', 'Spreadsheet software', 'Prese...","['Sent emails', 'Spreadsheet software', 'Text ...","['Sent emails', 'Text documents', 'Contract do...",An agent meets with a musician to discuss a ne...
3,3.0,3.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12873.0,"Agents and Business Managers of Artists, Perfo...",Arrange meetings concerning issues involving t...,Arrange a meeting with a potential sponsor to ...,Organize a meeting with a record label to nego...,Set up a meeting with a film producer to discu...,True,...,"['Email client', 'Calendar application', 'Vide...","['Email client', 'Calendar application', 'Vide...","['Email client', 'Calendar application', 'Vide...","['Information', 'PDFs']","['Information', 'PDFs']","['Information', 'PDFs']","['Sent emails', 'Calendar entries']","['Sent emails', 'Calendar entries']","['Sent emails', 'Calendar entries']",Arrange a meeting with a potential sponsor to ...
4,4.0,4.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12871.0,"Agents and Business Managers of Artists, Perfo...",Manage business and financial affairs for clie...,Arranging travel and lodging for a client atte...,Selling tickets for a client's upcoming concer...,Directing marketing and advertising activities...,True,...,"['Spreadsheet software', 'Email client', 'Trav...","['Ticketing platform', 'Spreadsheet software',...","['Social media management software', 'Image ed...","['Data files', 'Information']","['Data files', 'Information']","['Images', 'Information']","['Sent emails', 'Spreadsheet software']","['Spreadsheet software', 'Sent emails']","['Images', 'Text documents']",Arranging travel and lodging for a client atte...
5,5.0,5.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12874.0,"Agents and Business Managers of Artists, Perfo...",Prepare periodic accounting statements for cli...,Preparing quarterly financial statements for a...,Creating monthly accounting reports for a musi...,Compiling annual financial summaries for an ac...,True,...,"['Spreadsheet software', 'accounting software']","['Spreadsheet software', 'accounting software']","['Spreadsheet software', 'accounting software']","['data files', 'information']","['data files', 'information']","['data files', 'information']","['Spreadsheet software', 'text documents']","['Spreadsheet software', 'text documents']","['Spreadsheet software', 'text documents']",Preparing quarterly financial statements for a...
6,6.0,6.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12875.0,"Agents and Business Managers of Artists, Perfo...","Advise clients on financial and legal matters,...",Advising a client on the best investment oppor...,Helping a client understand and manage their t...,Reviewing and negotiating a contract to ensure...,True,...,"['Spreadsheet software', 'Video conferencing s...","['Spreadsheet software', 'Document editing sof...","['Document editing software', 'Video conferenc...","['Data files', 'Information']","['Information', 'PDFs']","['Information', 'PDFs']","['Spreadsheet software', 'Sent emails']","['Text documents', 'Sent emails']","['Text documents', 'Sent emails']",Advising a client on the best investment oppor...
7,7.0,7.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12886.0,"Buyers and Purchasing Agents, Farm Products",Calculate applicable government grain quotas.,A purchasing agent needs to calculate the grai...,A buyer is tasked with determining the grain q...,An agent calculates the grain quota for a soyb...,True,...,"['Spreadsheet software', 'Data analysis softwa...","['Spreadsheet software', 'Data analysis softwa...","['Spreadsheet software', 'Data analysis softwa...","['Data files', 'Information']","['Data files', 'Information']","['Data files', 'Information']","['Spreadsheet software', 'Text documents']","['Spreadsheet software', 'Text documents']","['Spreadsheet software', 'Text documents']",A purchasing agent needs to calculate the grai...
8,8.0,8.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12888.0,"Buyers and Purchasing Agents, Farm Products","Estimate land production possibilities, survey...",Analyzing historical crop rotation data to det...,Evaluating soil fertility reports to assess th...,Reviewing irrigation facility plans to ensure ...,True,...,"['Spreadsheet software', 'Data analysis softwa...","['Spreadsheet software', 'Data visualization s...","['Presentation software', 'Data analysis softw...","['Data files', 'Information']","['Data files', 'Information']","['Data files', 'Information']","['Spreadsheet software', 'Text documents']","['Spreadsheet software', 'Text documents']","['Presentation slides', 'Text documents']",Analyzing historical crop rotation data to det...
9,9.0,9.0,\n\n\n\n\n1. **Generate examples **\n\nList th...,77.0,"Wholesale and Retail Buyers, Except Farm Products",Provide clerks with information to print on pr...,Updating clerks with new seasonal pricing info...,Communicating a temporary markdown for a promo...,Providing clerks with updated manufacturer num...,True,...,"['Spreadsheet software', 'Email client']","['Spreadsheet software', 'Email client']","['Spreadsheet software', 'Email client']","['Data files', 'Information']","['Data files', 'Information']","['Data files', 'Information']","['Sent emails', 'Spreadsheet software']","['Sent emails', 'Spreadsheet software']","['Sent emails', 'Spreadsheet software']",Updating clerks with new seasonal pricing info...


In [None]:
df.melt(id_vars =, value_vars= ['e1_description', 'e2_description','e3_description'])

Unnamed: 0,row,prompt,task_id,occupation,task_description,variable,value
0,0,\n\n\n\n\n1. **Generate examples **\n\nList th...,12865.0,"Agents and Business Managers of Artists, Perfo...","Collect fees, commissions, or other payments, ...",e1_description,Collecting performance fees from a concert pro...
1,1,\n\n\n\n\n1. **Generate examples **\n\nList th...,21163.0,"Agents and Business Managers of Artists, Perfo...",Send samples of clients' work and other promot...,e1_description,Sending a digital portfolio of an artist to a ...
2,2,\n\n\n\n\n1. **Generate examples **\n\nList th...,12866.0,"Agents and Business Managers of Artists, Perfo...",Confer with clients to develop strategies for ...,e1_description,An agent meets with a musician to discuss a ne...
3,3,\n\n\n\n\n1. **Generate examples **\n\nList th...,12873.0,"Agents and Business Managers of Artists, Perfo...",Arrange meetings concerning issues involving t...,e1_description,Arrange a meeting with a potential sponsor to ...
4,4,\n\n\n\n\n1. **Generate examples **\n\nList th...,12871.0,"Agents and Business Managers of Artists, Perfo...",Manage business and financial affairs for clie...,e1_description,Arranging travel and lodging for a client atte...
5,5,\n\n\n\n\n1. **Generate examples **\n\nList th...,12874.0,"Agents and Business Managers of Artists, Perfo...",Prepare periodic accounting statements for cli...,e1_description,Preparing quarterly financial statements for a...
6,6,\n\n\n\n\n1. **Generate examples **\n\nList th...,12875.0,"Agents and Business Managers of Artists, Perfo...","Advise clients on financial and legal matters,...",e1_description,Advising a client on the best investment oppor...
7,7,\n\n\n\n\n1. **Generate examples **\n\nList th...,12886.0,"Buyers and Purchasing Agents, Farm Products",Calculate applicable government grain quotas.,e1_description,A purchasing agent needs to calculate the grai...
8,8,\n\n\n\n\n1. **Generate examples **\n\nList th...,12888.0,"Buyers and Purchasing Agents, Farm Products","Estimate land production possibilities, survey...",e1_description,Analyzing historical crop rotation data to det...
9,9,\n\n\n\n\n1. **Generate examples **\n\nList th...,77.0,"Wholesale and Retail Buyers, Except Farm Products",Provide clerks with information to print on pr...,e1_description,Updating clerks with new seasonal pricing info...


In [None]:
df = pd.read_csv('../data/external/gpts-are-gpts/full_labelset.tsv', delimiter="\t")


In [None]:
print("Odf.shape

(1008, 19)

In [10]:
materials_list = pd.read_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/material_lists/materials_business_and_financial_operations_occupations.csv')

In [18]:
materials_list[['task_description','can_be_performed_remotely','chain_of_thought','required_tools','required_materials','required_submission']].to_csv('/Users/htr365/Documents/PhD/21_automatisation/gpt_eval/data/material_lists/sub_qual_eval.csv')

In [22]:
json.loads("""{
  "task_id": "12865.0",
  "occupation": "Agents and Business Managers of Artists, Performers, and Athletes",
  "task_description": "Collect fees, commissions, or other payments, according to contract terms.",
  "e1_description": "Collecting performance fees from a concert promoter for a musician's live performance.",
  "e2_description": "Collecting endorsement payments from a brand for an athlete's promotional activities.",
  "e3_description": "Collecting royalties from a streaming service for an artist's music plays.",
  "e1_remote_possible": true,
  "e2_remote_possible": true,
  "e3_remote_possible": true,
  "e1_required_tools": {"email client", "spreadsheet software", "accounting software"},
  "e2_required_tools": {"email client", "contract management software", "accounting software"},
  "e3_required_tools": {"email client", "royalty tracking software", "spreadsheet software"},
  "e1_required_materials": {"contract documents", "performance schedule", "invoice template"},
  "e2_required_materials": {"endorsement contract", "payment schedule", "invoice template"},
  "e3_required_materials": {"royalty agreement", "streaming data reports", "invoice template"},
  "e1_output": {"sent invoice", "received payment confirmation", "updated financial records"},
  "e2_output": {"sent invoice", "received payment confirmation", "updated financial records"},
  "e3_output": {"sent invoice", "received payment confirmation", "updated financial records"}
}""")

JSONDecodeError: Expecting ':' delimiter: line 11 column 39 (char 656)

In [19]:
materials_list

Unnamed: 0.1,Unnamed: 0,row,prompt,task_id,occupation,task_description,can_be_performed_remotely,chain_of_thought,tools.coding/Python,tools.Excel,...,materials.Virtual labs or sandbox environments,materials.Other.name,materials.Other.classification,submission_requirements.exact_answer_questions,submission_requirements.md,submission_requirements.csv,submission_requirements.py,required_tools,required_materials,required_submission
0,0,0,\nYou are an excellent examiner of Agents and ...,12865.0,"Agents and Business Managers of Artists, Perfo...","Collect fees, commissions, or other payments, ...",True,The task of collecting fees and commissions ca...,Not Required,Required,...,Not Required,,,Required,Not Required,Required,Not Required,"['Excel', 'PDF viewer', 'Web Browser']","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'csv']"
1,1,1,\nYou are an excellent examiner of Agents and ...,21163.0,"Agents and Business Managers of Artists, Perfo...",Send samples of clients' work and other promot...,True,The task of sending samples and promotional ma...,Not Required,Not Required,...,Not Required,,,Required,Required,Not Required,Not Required,"['Word', 'PDF viewer', 'PowerPoint', 'Web Brow...","['Text Instructions', 'Images, PNG/JPG, etc', ...","['exact_answer_questions', 'md']"
2,2,2,\nYou are an excellent examiner of Agents and ...,12866.0,"Agents and Business Managers of Artists, Perfo...",Confer with clients to develop strategies for ...,True,The task of conferring with clients to develop...,Not Required,Required,...,Not Required,,,Required,Required,Not Required,Not Required,"['Excel', 'Word', 'PDF viewer', 'PowerPoint', ...","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'md']"
3,3,3,\nYou are an excellent examiner of Agents and ...,12873.0,"Agents and Business Managers of Artists, Perfo...",Arrange meetings concerning issues involving t...,True,Arranging meetings can be done remotely using ...,Not Required,Not Required,...,Not Required,,,Required,Not Required,Not Required,Not Required,"['Word', 'Web Browser']",['Text Instructions'],['exact_answer_questions']
4,4,4,\nYou are an excellent examiner of Agents and ...,12871.0,"Agents and Business Managers of Artists, Perfo...",Manage business and financial affairs for clie...,True,The task of managing business and financial af...,Not Required,Required,...,Not Required,,,Required,Required,Required,Not Required,"['Excel', 'Word', 'PDF viewer', 'Web Browser']","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'md', 'csv']"
5,5,5,\nYou are an excellent examiner of Agents and ...,12874.0,"Agents and Business Managers of Artists, Perfo...",Prepare periodic accounting statements for cli...,True,Preparing periodic accounting statements can b...,Not Required,Required,...,Not Required,,,Not Required,Not Required,Required,Not Required,"['Excel', 'PDF viewer', 'Web Browser']","['Text Instructions', 'Data, CSV']",['csv']
6,6,6,\nYou are an excellent examiner of Agents and ...,12875.0,"Agents and Business Managers of Artists, Perfo...","Advise clients on financial and legal matters,...",True,Advising clients on financial and legal matter...,Not Required,Required,...,Not Required,,,Required,Required,Required,Not Required,"['Excel', 'Word', 'PDF viewer', 'Web Browser']","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'md', 'csv']"
7,7,7,\nYou are an excellent examiner of Buyers and ...,12886.0,"Buyers and Purchasing Agents, Farm Products",Calculate applicable government grain quotas.,True,Calculating government grain quotas can be don...,Not Required,Required,...,Not Required,,,Required,Not Required,Required,Not Required,"['Excel', 'PDF viewer', 'Web Browser']","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'csv']"
8,8,8,\nYou are an excellent examiner of Buyers and ...,12888.0,"Buyers and Purchasing Agents, Farm Products","Estimate land production possibilities, survey...",True,The task can be performed remotely as it invol...,Not Required,Required,...,Not Required,,,Required,Not Required,Required,Not Required,"['Excel', 'PDF viewer', 'Web Browser', 'Other....","['Text Instructions', 'Text PDF reports, books...","['exact_answer_questions', 'csv']"
9,9,9,\nYou are an excellent examiner of Wholesale a...,77.0,"Wholesale and Retail Buyers, Except Farm Products",Provide clerks with information to print on pr...,True,The task of providing clerks with information ...,Not Required,Required,...,Not Required,,,Required,Not Required,Required,Not Required,"['Excel', 'Web Browser']","['Text Instructions', 'Data, CSV']","['exact_answer_questions', 'csv']"
