In [67]:
with open('libraries.py') as f:
    code = f.read()
exec(code)

In [68]:
# determine user
user = getpass.getuser()
if user == 'peymansh':
    main_folder_path = '/Users/peymansh/Dropbox (MIT)/Research/AI and Occupations/ai-exposure'
    data_path = f'{main_folder_path}/output'

### GPT questions for assessing task stats

In [69]:
from edsl.questions import QuestionNumerical

def get_tasks_stas(occupation, tasks):
   scenarios = [Scenario({"occupation": occupation, "task": task}) for task in tasks]

   q_human_cost = QuestionNumerical(
      question_name = "human_cost",
      question_text = dedent("""
                           Consider {{ occupation }} as an occupation. 
                           And consider this task {{ task }}.
                           How long (in minutes) does it take a person to complete this task? 
                           (5 = 5 minutes or less, 480 = a full day, or 8 hours)
                           """),
      min_value = 5, # 5 minutes or less
      max_value = 480, # a full day, or 8 hours
   )

   q_machine_cost = QuestionNumerical(
      question_name = "machine_cost",
      question_text = dedent("""
                           Consider {{ occupation }} as an occupation. 
                           And consider this task {{ task }}.
                           If the task is to be automated using artificial intelligence (AI),
                           how long (in minutes) does it take a machine to complete this task? 
                           (5 = 5 minutes or less, 480 = a full day, or 8 hours)
                           """),
      min_value = 5, # 5 minutes or less
      max_value = 480, # a full day, or 8 hours
   )

   q_difficulty = QuestionNumerical(
      question_name = "difficulty",
      question_text = dedent("""
                           Consider {{ occupation }} as an occupation. 
                           And consider this task {{ task }}.
                           Suppose this task is automated using artificial intelligence (AI).
                           Given that AI is general purpose and is not specilized in doing tasks of {{ occupation }},
                           how difficult is it for a machine to do this task?
                           (0 = not difficult at all, 15 = so difficult AI cannot do it at all)
                           """),
      min_value = 0, # not difficult at all
      max_value = 15, # so difficult AI cannot do it at all
   )


   # Run questions
   results_human_cost = q_human_cost.by(m4).by(scenarios).run()
   results_machine_cost = q_machine_cost.by(m4).by(scenarios).run()
   results_difficulty = q_difficulty.by(m4).by(scenarios).run()

   # Convert outputs to pandas dataframe
   human_cost_df = results_human_cost.to_pandas()
   machine_cost_df = results_machine_cost.to_pandas()
   difficulty_df = results_difficulty.to_pandas()

   # Subset dataframe
   human_cost_df = human_cost_df[['scenario.task', 'answer.human_cost']]
   machine_cost_df = machine_cost_df[['scenario.task', 'answer.machine_cost']]
   difficulty_df = difficulty_df[['scenario.task', 'answer.difficulty']]

   # Merge outputs
   tasks_stats = pd.merge(human_cost_df, machine_cost_df, on='scenario.task', how='inner')
   tasks_stats = pd.merge(tasks_stats, difficulty_df, on='scenario.task', how='inner')

   # Rename columns
   tasks_stats.rename(columns={'scenario.task': 'task', 
                               'answer.human_cost': 'human_cost', 
                               'answer.machine_cost': 'machine_cost', 
                               'answer.difficulty': 'difficulty'}, inplace=True)
   
   return tasks_stats

### Main Code

In [70]:
# Pick occupation
def pick_occupation(occupation):
    if occupation == 'travelAgents':
        GPT_input_occupation = 'travel agents'
        plot_title_occupation = 'Travel Agents'
        occupation_code = '41-3041'
    elif occupation == 'insuranceUnderwriters':
        GPT_input_occupation = 'insurance underwriters'
        plot_title_occupation = 'Insurance Underwriters'
        occupation_code = '13-2053'
    
    occupation_folder = f'{data_path}/daily_tasks_occupations_analysis/{occupation}'
    return GPT_input_occupation, plot_title_occupation, occupation_code, occupation_folder


In [71]:
occupation_list = ['travelAgents', 'insuranceUnderwriters']
for occupation in occupation_list:
    print(f'Occupation: {occupation}')

    # Initialize variables
    GPT_input_occupation, plot_title_occupation, occupation_code, occupation_folder = pick_occupation(occupation)

    # Load ONET data
    onet = pd.read_csv(f'{data_path}/data/onet_occupations_yearly.csv')
    onet = onet.sort_values(by=['year', 'occ_code', 'occ_title', 'task_id'])
    onet = onet[onet['year'] == 2023].reset_index(drop=True)

    # Get list of tasks in occupation
    my_df = onet[(onet.occ_code == f'{occupation_code}') & (onet.year == 2023)]
    tasks = my_df['task'].unique().tolist()

    # Ask GPT to assess tasks' stats
    tasks_stats = get_tasks_stas(GPT_input_occupation, tasks)

    # Save output
    tasks_stats.to_csv(f'{occupation_folder}/{occupation}_taskStats.csv', index=False)


Occupation: travelAgents
Occupation: insuranceUnderwriters
