In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from collections import Counter, OrderedDict
from nltk.corpus import stopwords
import re

from automation_mapping import *
from automating_tech_helpers import *
from machines_helpers import *
from ai_tech_helpers import *

%matplotlib inline

# LOAD DATA

In [25]:
unspsc = pd.read_table('../data/onet_tools_technology/UNSPSC Reference.txt') # occupation code database
tt6 = pd.read_csv('../data/db06/Tools_and_Technology.csv') # tools & tech 2006
tt9 = pd.read_csv('../data/db09/Tools and Technology.csv') # tools & tech 2009
tt15 = pd.read_table('../data/db15/Tools and Technology.txt') # tools & tech 2015
occ_ref = pd.read_csv('../data/db15/helpers/occ_codes/occupation_reference.csv')
class_codes = pd.read_csv('../data/db15/helpers/class_codes.csv')
family_codes = pd.read_csv('../data/db15/helpers/family_codes.csv')
segment_codes = pd.read_csv('../data/db15/helpers/segment_codes.csv')

tt15 = pd.merge(tt15, occ_ref, how = 'left', on = ['O*NET-SOC Code']) ## add occupation titles to tt15
tt9 = pd.merge(tt9, occ_ref, how = 'left', on = ['O*NET-SOC Code']) ## add occupation titles to tt15
tt6 = pd.merge(tt6, occ_ref, how = 'left', on = ['O*NET-SOC Code']) ## add occupation titles to tt15

# CLEAN DATA

In [26]:
def remove_punc_stopwords_lower(s):
    stop = stopwords.words('english')
    regex = r"\W+"
    return " ".join([i for i in re.split(regex, s.lower()) if i not in stop])

def fix_titles(df):
    new_df = df.copy()
    new_df['T2 Example'] = new_df['T2 Example'].apply(lambda x: remove_punc_stopwords_lower(x))
    new_df['Commodity Title'] = new_df['Commodity Title'].apply(lambda x: remove_punc_stopwords_lower(x))
    return new_df

def add_tech_groupings(df):
    new_df = df.copy()
    new_df['segment_code'] = create_codes(new_df, group = 'Segment')
    new_df['class_code'] = create_codes(new_df, group = 'Class')
    new_df['family_code'] = create_codes(new_df, group = 'Family')
    return new_df

def add_group_titles(df, class_codes, family_codes, segment_codes):
    new_df = df.copy()
    add_class = new_df.merge(class_codes, how = 'left', on = 'class_code')
    add_family = add_class.merge(family_codes, how = 'left', on = 'family_code')
    final = add_family.merge(segment_codes, how = 'left', on = 'family_code')
    return final

tt6 = add_tech_groupings(fix_titles(tt6))
tt9 = add_tech_groupings(fix_titles(tt9))
tt15 = add_tech_groupings(fix_titles(tt15))

tt15 = add_group_titles(tt15, class_codes, family_codes, segment_codes)

# EMERGING TECHNOLOGIES

In [49]:
tech6 = set(tt6['Commodity Code'])
tech9 = set(tt9['Commodity Code'])
tech15 = set(tt15['Commodity Code'])

In [71]:
emerged = tech15 - tech9
adoptions = tt15[tt15['Commodity Code'].isin(emerged)]
len(adoptions['O*NET-SOC Code'].unique())/float(len(tt15['O*NET-SOC Code'].unique()))

0.74435318275154

74.4% of occupations adopted some emerged technology

In [78]:
adopter_info = adoptions[['O*NET-SOC Code', 'Title', 'broad_title', 'minor_title', 'major_title']].drop_duplicates()
adopter_info['minor_title'].value_counts()[:10]

Postsecondary Teachers                                        36
Other Production Occupations                                  34
Construction Trades Workers                                   31
Health Diagnosing and Treating Practitioners                  28
Other Installation, Maintenance, and Repair Occupations       26
Metal Workers and Plastic Workers                             26
Drafters, Engineering Technicians, and Mapping Technicians    25
Business Operations Specialists                               24
Other Management Occupations                                  22
Information and Record Clerks                                 18
dtype: int64

# CHANGE IN AUTOMATION

In [91]:
a15 = pd.read_table('../data/db15/Work Context.txt')
# a15 = a15[a15['Element Name'] == 'Degree of Automation'][['O*NET-SOC Code','Data Value']]

In [106]:
a15[(a15['Element Name'] == 'Degree of Automation') & (a15['Scale ID'] == 'CX')]

Unnamed: 0,O*NET-SOC Code,Element ID,Element Name,Scale ID,Category,Data Value,N,Standard Error,Lower CI Bound,Upper CI Bound,Recommend Suppress,Not Relevant,Date,Domain Source
288,11-1011.00,4.C.3.b.2,Degree of Automation,CX,,2.68,24,0.26,2.13,3.22,N,,07/2014,Incumbent
626,11-1011.03,4.C.3.b.2,Degree of Automation,CX,,1.72,25,,,,,,07/2013,Occupational Expert
964,11-1021.00,4.C.3.b.2,Degree of Automation,CX,,2.27,50,0.22,1.82,2.72,N,,07/2015,Incumbent
1302,11-2011.00,4.C.3.b.2,Degree of Automation,CX,,2.56,18,0.19,2.17,2.96,N,,06/2010,Incumbent
1640,11-2021.00,4.C.3.b.2,Degree of Automation,CX,,2.10,40,0.18,1.74,2.46,N,,07/2015,Incumbent
1978,11-2022.00,4.C.3.b.2,Degree of Automation,CX,,2.62,21,,,,,,06/2008,Occupational Expert
2316,11-2031.00,4.C.3.b.2,Degree of Automation,CX,,2.07,30,0.36,1.32,2.81,N,,06/2009,Incumbent
2654,11-3011.00,4.C.3.b.2,Degree of Automation,CX,,2.53,44,0.20,2.12,2.93,N,,06/2009,Incumbent
2992,11-3021.00,4.C.3.b.2,Degree of Automation,CX,,2.16,40,0.24,1.68,2.65,N,,06/2008,Incumbent
3330,11-3031.01,4.C.3.b.2,Degree of Automation,CX,,3.00,30,,,,,,07/2012,Occupational Expert


# map commodity code to family codes

In [141]:
x = tt15.merge(class_codes, how = 'left', on = 'class_code')

In [143]:
x.merge(segment_codes, how = 'left', on = 'segment_code').merge(family_codes, how = 'left', on = 'family_code')

Unnamed: 0,O*NET-SOC Code,T2 Type,T2 Example,Commodity Code,Commodity Title,Title,broad_group,minor_group,major_group,broad_title,minor_title,major_title,segment_code,class_code,family_code,class_title,segment_title,family_title
0,11-1011.00,Tools,10 key calculators,44101809,desktop calculator,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,44000000,44101800,44100000,Calculating machines and accessories,Office Equipment and Accessories and Supplies,Office machines and their supplies and accesso...
1,11-1011.00,Tools,desktop computers,43211507,desktop computers,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43211500,43210000,Computers,Information Technology Broadcasting and Teleco...,Computer Equipment and Accessories
2,11-1011.00,Tools,laptop computers,43211503,notebook computers,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43211500,43210000,Computers,Information Technology Broadcasting and Teleco...,Computer Equipment and Accessories
3,11-1011.00,Tools,personal computers,43211508,personal computers,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43211500,43210000,Computers,Information Technology Broadcasting and Teleco...,Computer Equipment and Accessories
4,11-1011.00,Tools,personal digital assistants pda,43211504,personal digital assistant pdas organizers,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43211500,43210000,Computers,Information Technology Broadcasting and Teleco...,Computer Equipment and Accessories
5,11-1011.00,Tools,smartphones,43191501,mobile phones,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43191500,43190000,Personal communication devices,Information Technology Broadcasting and Teleco...,Communications Devices and Accessories
6,11-1011.00,Tools,universal serial bus usb flash drives,43201813,high capacity removable media drives,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43201800,43200000,Media storage devices,Information Technology Broadcasting and Teleco...,Components for information technology or broad...
7,11-1011.00,Technology,adsense tracker,43232306,data base user interface query software,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43232300,43230000,Data management and query software,Information Technology Broadcasting and Teleco...,Software
8,11-1011.00,Technology,blackbaud raiser edge,43232303,customer relationship management crm software,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43232300,43230000,Data management and query software,Information Technology Broadcasting and Teleco...,Software
9,11-1011.00,Technology,computerease construction accounting,43231601,accounting software,Chief Executives,11-1010,11-1000,11-0000,Chief Executives,Top Executives,Management Occupations,43000000,43231600,43230000,Finance accounting and enterprise resource pla...,Information Technology Broadcasting and Teleco...,Software
