In [1]:
import pandas as pd
import numpy as np

In [2]:
file_path = './data/OccupationProjections.xls'

In [65]:
df = pd.read_excel(file_path, skiprows=2)
df.columns=['OccupationLevel',
            'SkillLevel',
            'OccupationCode',
            'Occupation',
            'EmploymentLevel_May2018',
            'EmploymentLevel_May2023',
            'EmploymenGrowth_2018_to_2023',
            'EmploymenGrowth_2018_to_2023_percentage']

df.drop(df.tail(3).index,inplace=True) # Last few rows are not useful

In [60]:
df.head()

Unnamed: 0,OccupationLevel,SkillLevel,OccupationCode,Occupation,EmploymentLevel_May2018,EmploymentLevel_May2023,EmploymenGrowth_2018_to_2023,EmploymenGrowth_2018_to_2023_percentage
0,1,,1,MANAGERS,1561.630105,1671.502836,109.872731,7.035772
1,2,,11,"Chief Executives, General Managers and Legisla...",109.718797,114.566984,4.848187,4.418739
2,3,,111,"Chief Executives, General Managers and Legisla...",109.718797,114.566984,4.848187,4.418739
3,4,1.0,1111,Chief Executives and Managing Directors,62.82488,68.618668,5.793789,9.222124
4,4,1.0,1112,General Managers,42.125263,41.095188,-1.030075,-2.445267


# Things to make

### A thing that gives you every child of a given occupation code

### A thing that gives you the skill level of a given 4th level occupation code

# Occupation code to skill level

In [61]:
# Note that some entries have multiple values. These are preserved as strings, e.g. `1,2,3,4,5`
anzsco_to_skill_level = df[df['OccupationLevel']==4][['SkillLevel','OccupationCode']].set_index('OccupationCode')['SkillLevel'].to_dict()

# Map between occupation codes

### Map from code to names

In [126]:
occupations_level0 = {}
current_level1 = None
current_level2 = None
current_level3 = None
current_level4 = None

data = []

for index, row in df.iterrows():
    if len(str(row['OccupationCode']))==1:
        current_level1 = str(row['OccupationCode'])
        
        # New Level1
        occupations_level0[current_level1] = {'Major Group': row['Occupation'],
                                              'Sub-Major Groups':{}}
        continue
    if len(str(row['OccupationCode']))==2:
        current_level2 = str(row['OccupationCode'])
        
        # New Level2
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2] = {'Sub-Major Group': row['Occupation'],
                                                                          'Minor Groups': {}}
        continue
    if len(str(row['OccupationCode']))==3:
        current_level3 = str(row['OccupationCode'])
        
        # New Level3
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2]['Minor Groups'][current_level3] = {'Minor Group': row['Occupation'],
                                                                                                      'Unit Groups': {}}
        continue
    if len(str(row['OccupationCode']))==4:
        current_level4 = str(row['OccupationCode'])
        
        # New Level4
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2]['Minor Groups'][current_level3]['Unit Groups'][current_level4] = row['Occupation']
        
        data.append([current_level1, current_level2, current_level3, current_level4, row['Occupation']])
        
        continue
    


In [127]:
def resolve_anzsco(code):
    # Provide ANZSCO as string
    if len(code)==1:
        return occupations_level0[code]
    if len(code)==2:
        return occupations_level0[code[0]]['Sub-Major Groups'][code]
    if len(code)==3:
        return occupations_level0[code[0]]['Sub-Major Groups'][code[0:2]]['Minor Groups'][code]
    if len(code)==4:
        return occupations_level0[code[0]]['Sub-Major Groups'][code[0:2]]['Minor Groups'][code[0:3]]['Unit Groups'][code]

In [128]:
df_anzsco = pd.DataFrame(data = data, columns=['Major Group', 'Sub-major Group', 'Minor Group','Unit Group', 'Name'])

In [129]:
resolve_anzsco('121')

{'Minor Group': 'Farmers and Farm Managers',
 'Unit Groups': {'1211': 'Aquaculture Farmers',
  '1212': 'Crop Farmers',
  '1213': 'Livestock Farmers',
  '1214': 'Mixed Crop and Livestock Farmers',
  '1210': 'Farmers and Farm Managers nfd'}}

In [130]:
occupations_level0['1']

{'Major Group': 'MANAGERS',
 'Sub-Major Groups': {'11': {'Sub-Major Group': 'Chief Executives, General Managers and Legislators',
   'Minor Groups': {'111': {'Minor Group': 'Chief Executives, General Managers and Legislators',
     'Unit Groups': {'1111': 'Chief Executives and Managing Directors',
      '1112': 'General Managers',
      '1113': 'Legislators',
      '1110': 'Chief Executives, General Managers and Legislators nfd'}}}},
  '12': {'Sub-Major Group': 'Farmers and Farm Managers',
   'Minor Groups': {'121': {'Minor Group': 'Farmers and Farm Managers',
     'Unit Groups': {'1211': 'Aquaculture Farmers',
      '1212': 'Crop Farmers',
      '1213': 'Livestock Farmers',
      '1214': 'Mixed Crop and Livestock Farmers',
      '1210': 'Farmers and Farm Managers nfd'}}}},
  '13': {'Sub-Major Group': 'Specialist Managers',
   'Minor Groups': {'131': {'Minor Group': 'Advertising, Public Relations and Sales Managers',
     'Unit Groups': {'1311': 'Advertising, Public Relations and Sales

In [131]:
df_anzsco

Unnamed: 0,Major Group,Sub-major Group,Minor Group,Unit Group,Name
0,1,11,111,1111,Chief Executives and Managing Directors
1,1,11,111,1112,General Managers
2,1,11,111,1113,Legislators
3,1,11,111,1110,"Chief Executives, General Managers and Legisla..."
4,1,12,121,1211,Aquaculture Farmers
5,1,12,121,1212,Crop Farmers
6,1,12,121,1213,Livestock Farmers
7,1,12,121,1214,Mixed Crop and Livestock Farmers
8,1,12,121,1210,Farmers and Farm Managers nfd
9,1,13,131,1311,"Advertising, Public Relations and Sales Managers"
