In [180]:
import pandas as pd
import numpy as np

In [181]:
file_path = './data/OccupationProjections.xls'

In [190]:
df = pd.read_excel(file_path, skiprows=2)
df.columns=['OccupationLevel',
            'SkillLevel',
            'OccupationCode',
            'Occupation',
            'Number of Jobs (May 2018)',
            'Predicted Number of Jobs (May 2023)',
            'Predicted New Jobs Over Next 5 Years',
            'Predicted 5-year Employment Growth (%)']
df = df.fillna(0)
df['Number of Jobs (May 2018)'] = (df['Number of Jobs (May 2018)']*1000).astype(int)
df['Predicted Number of Jobs (May 2023)'] = (df[ 'Predicted Number of Jobs (May 2023)']*1000).astype(int)
df['Predicted New Jobs Over Next 5 Years'] = (df['Predicted New Jobs Over Next 5 Years']*1000).astype(int)
df['OccupationCode'] = df['OccupationCode'].astype(str)
df.drop(df.tail(3).index,inplace=True) # Last few rows are not useful

In [191]:
df.head()

Unnamed: 0,OccupationLevel,SkillLevel,OccupationCode,Occupation,Number of Jobs (May 2018),Predicted Number of Jobs (May 2023),Predicted New Jobs Over Next 5 Years,Predicted 5-year Employment Growth (%)
0,1,,1,MANAGERS,1561630,1671502,109872,7.035772
1,2,,11,"Chief Executives, General Managers and Legisla...",109718,114566,4848,4.418739
2,3,,111,"Chief Executives, General Managers and Legisla...",109718,114566,4848,4.418739
3,4,1.0,1111,Chief Executives and Managing Directors,62824,68618,5793,9.222124
4,4,1.0,1112,General Managers,42125,41095,-1030,-2.445267


# Things to make

### A thing that gives you every child of a given occupation code

### A thing that gives you the skill level of a given 4th level occupation code

# Occupation code to skill level

In [173]:
# Note that some entries have multiple values. These are preserved as strings, e.g. `1,2,3,4,5`
anzsco_to_skill_level = df[df['OccupationLevel']==4][['SkillLevel','OccupationCode']]
anzsco_to_skill_level['SkillLevel'] = anzsco_to_skill_level['SkillLevel'].astype(str).str[:1].astype(int)
anzsco_to_skill_level['OccupationCode'] = anzsco_to_skill_level['OccupationCode'].astype(str)
anzsco_to_skill_level = anzsco_to_skill_level.set_index('OccupationCode')['SkillLevel'].to_dict();

# Map between occupation codes

### Map from code to names

In [175]:
occupations_level0 = {}
current_level1 = None
current_level2 = None
current_level3 = None
current_level4 = None

data = []

for index, row in df.iterrows():
    if len(str(row['OccupationCode']))==1:
        current_level1 = str(row['OccupationCode'])
        
        # New Level1
        occupations_level0[current_level1] = {'Major Group': row['Occupation'],
                                              'Sub-Major Groups':{}}
        continue
    if len(str(row['OccupationCode']))==2:
        current_level2 = str(row['OccupationCode'])
        
        # New Level2
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2] = {'Sub-Major Group': row['Occupation'],
                                                                          'Minor Groups': {}}
        continue
    if len(str(row['OccupationCode']))==3:
        current_level3 = str(row['OccupationCode'])
        
        # New Level3
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2]['Minor Groups'][current_level3] = {'Minor Group': row['Occupation'],
                                                                                                      'Unit Groups': {}}
        continue
    if len(str(row['OccupationCode']))==4:
        current_level4 = str(row['OccupationCode'])
        
        # New Level4
        occupations_level0[current_level1]['Sub-Major Groups'][current_level2]['Minor Groups'][current_level3]['Unit Groups'][current_level4] = row['Occupation']
        
        data.append([current_level1, current_level2, current_level3, current_level4, anzsco_to_skill_level[current_level4], row['Occupation']])
        
        
        continue
    


In [127]:
def resolve_anzsco(code):
    # Provide ANZSCO as string
    if len(code)==1:
        return occupations_level0[code]
    if len(code)==2:
        return occupations_level0[code[0]]['Sub-Major Groups'][code]
    if len(code)==3:
        return occupations_level0[code[0]]['Sub-Major Groups'][code[0:2]]['Minor Groups'][code]
    if len(code)==4:
        return occupations_level0[code[0]]['Sub-Major Groups'][code[0:2]]['Minor Groups'][code[0:3]]['Unit Groups'][code]

In [178]:
df_anzsco = pd.DataFrame(data = data, columns=['Major Group', 'Sub-major Group', 'Minor Group','Unit Group', 'Skill Level', 'Name']).set_index('Unit Group')

In [194]:
df_anzsco_out = df_anzsco.join(df.set_index('OccupationCode')[['Number of Jobs (May 2018)',
            'Predicted Number of Jobs (May 2023)',
            'Predicted New Jobs Over Next 5 Years',
            'Predicted 5-year Employment Growth (%)']]
              )
df_anzsco_out.to_pickle("./data/pickles/anzsco_lmip_proj.pkl")

In [195]:
df_anzsco_out

Unnamed: 0_level_0,Major Group,Sub-major Group,Minor Group,Skill Level,Name,Number of Jobs (May 2018),Predicted Number of Jobs (May 2023),Predicted New Jobs Over Next 5 Years,Predicted 5-year Employment Growth (%)
Unit Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1111,1,11,111,1,Chief Executives and Managing Directors,62824,68618,5793,9.222124
1112,1,11,111,1,General Managers,42125,41095,-1030,-2.445267
1113,1,11,111,1,Legislators,2626,2711,84,3.216018
1110,1,11,111,1,"Chief Executives, General Managers and Legisla...",185,185,0,0.000000
1211,1,12,121,1,Aquaculture Farmers,2652,2597,-54,-2.066223
1212,1,12,121,1,Crop Farmers,41145,41378,232,0.564764
1213,1,12,121,1,Livestock Farmers,77256,75203,-2053,-2.658273
1214,1,12,121,1,Mixed Crop and Livestock Farmers,30204,29190,-1014,-3.357210
1210,1,12,121,1,Farmers and Farm Managers nfd,10245,8607,-1638,-15.994166
1311,1,13,131,1,"Advertising, Public Relations and Sales Managers",145614,159872,14258,9.791914


In [130]:
occupations_level0['1']

{'Major Group': 'MANAGERS',
 'Sub-Major Groups': {'11': {'Sub-Major Group': 'Chief Executives, General Managers and Legislators',
   'Minor Groups': {'111': {'Minor Group': 'Chief Executives, General Managers and Legislators',
     'Unit Groups': {'1111': 'Chief Executives and Managing Directors',
      '1112': 'General Managers',
      '1113': 'Legislators',
      '1110': 'Chief Executives, General Managers and Legislators nfd'}}}},
  '12': {'Sub-Major Group': 'Farmers and Farm Managers',
   'Minor Groups': {'121': {'Minor Group': 'Farmers and Farm Managers',
     'Unit Groups': {'1211': 'Aquaculture Farmers',
      '1212': 'Crop Farmers',
      '1213': 'Livestock Farmers',
      '1214': 'Mixed Crop and Livestock Farmers',
      '1210': 'Farmers and Farm Managers nfd'}}}},
  '13': {'Sub-Major Group': 'Specialist Managers',
   'Minor Groups': {'131': {'Minor Group': 'Advertising, Public Relations and Sales Managers',
     'Unit Groups': {'1311': 'Advertising, Public Relations and Sales

In [137]:
df_anzsco['Name'] = df_anzsco['Name'].str.replace(' nfd', '', regex=True)
df_anzsco

Unnamed: 0,Major Group,Sub-major Group,Minor Group,Unit Group,Name
0,1,11,111,1111,Chief Executives and Managing Directors
1,1,11,111,1112,General Managers
2,1,11,111,1113,Legislators
3,1,11,111,1110,"Chief Executives, General Managers and Legisla..."
4,1,12,121,1211,Aquaculture Farmers
5,1,12,121,1212,Crop Farmers
6,1,12,121,1213,Livestock Farmers
7,1,12,121,1214,Mixed Crop and Livestock Farmers
8,1,12,121,1210,Farmers and Farm Managers
9,1,13,131,1311,"Advertising, Public Relations and Sales Managers"
