In [1]:
import os
from skills_ml.storage import FSStore
from skills_ml.datasets.onet_cache import OnetSiteCache
from skills_ml.ontologies.onet import Onet



In [2]:
file_store = FSStore(path=os.path.realpath('datasets/onet'))
onet_cache = OnetSiteCache(storage=file_store)
myonet = Onet(onet_cache=onet_cache)
myonet.print_summary_stats()

Ontology summary statistics for onet
Num competencies: 47519
Num occupations: 1133
Num competency-occupation edges: 172438
Median occupations per competency: 1.0
Median competencies per occupation: 158
Mean occupations per competency: 3.628898522665095
Mean competencies per occupation: 152.19593998234774


In [3]:
computer_scientists = myonet.filter_by(lambda edge: 'Computer and Information Research Scientists' in edge.occupation.name)
computer_scientists.competencies

{Competency(identifier=1.A.1.a.1, name=Oral Comprehension, categories=['Abilities'], {'competencyText': 'The ability to listen to and understand information and ideas presented through spoken words and sentences.'}),
 Competency(identifier=1.A.1.a.2, name=Written Comprehension, categories=['Abilities'], {'competencyText': 'The ability to read and understand information and ideas presented in writing.'}),
 Competency(identifier=1.A.1.a.3, name=Oral Expression, categories=['Abilities'], {'competencyText': 'The ability to communicate information and ideas in speaking so others will understand.'}),
 Competency(identifier=1.A.1.a.4, name=Written Expression, categories=['Abilities'], {'competencyText': 'The ability to communicate information and ideas in writing so others will understand.'}),
 Competency(identifier=1.A.1.b.1, name=Fluency of Ideas, categories=['Abilities'], {'competencyText': 'The ability to come up with a number of ideas about a topic (the number of ideas is important, not 

In [4]:
[(competency.name,competency.other_attributes['competencyText']) for competency in computer_scientists.competencies if competency.categories[0] == 'Abilities']

[('Visualization',
  'The ability to imagine how something will look after it is moved around or when its parts are moved or rearranged.'),
 ('Mathematical Reasoning',
  'The ability to choose the right mathematical methods or formulas to solve a problem.'),
 ('Speech Recognition',
  'The ability to identify and understand the speech of another person.'),
 ('Perceptual Speed',
  'The ability to quickly and accurately compare similarities and differences among sets of letters, numbers, objects, pictures, or patterns. The things to be compared may be presented at the same time or one after the other. This ability also includes comparing a presented object with a remembered object.'),
 ('Written Comprehension',
  'The ability to read and understand information and ideas presented in writing.'),
 ('Oral Comprehension',
  'The ability to listen to and understand information and ideas presented through spoken words and sentences.'),
 ('Speech Clarity',
  'The ability to speak clearly so othe

In [5]:
[(competency.other_attributes['competencyText'],competency.name) for competency in computer_scientists.competencies if competency.categories[0] == 'Technology Skills']

[('Configuration management software',
  'Perforce Software Configuration Management System'),
 ('Data base management system software',
  'Relational database management system software'),
 ('Office suite software', 'Microsoft Office'),
 ('Computer aided design CAD software', 'PTC Creo Parametric'),
 ('Clustering software', 'Clustermatic'),
 ('Development environment software', 'Software libraries'),
 ('Operating system software', 'VME PowerPC VxWorks'),
 ('Communications server software', 'IBM Domino'),
 ('Development environment software', 'Ruby'),
 ('Analytical or scientific software', 'Augmint'),
 ('Development environment software',
  'Formula translation/translator FORTRAN'),
 ('Analytical or scientific software', 'The MathWorks MATLAB'),
 ('Development environment software', 'Source code management SCM software'),
 ('Web platform development software', 'Django'),
 ('Business intelligence and data analysis software', 'Qlik Tech QlikView'),
 ('Data base user interface and query s

In [6]:
[(competency.other_attributes['competencyText'],competency.name) for competency in computer_scientists.competencies if competency.categories[0] == 'Tools Used']

[('High capacity removable media drives',
  'Universal serial bus USB flash drives'),
 ('Hard disk drives', 'Hard disk drives'),
 ('High end computer servers', 'Cluster systems'),
 ('Hard disk arrays', 'Network storage arrays'),
 ('Scanners', 'Computer scanners'),
 ('High end computer servers', 'Massively parallel processors MPP'),
 ('Loudspeakers', 'Free-field speakers'),
 ('Laser printers', 'Computer laser printers'),
 ('Pick or place robots', 'Articulated robots'),
 ('Multimedia projectors', 'Video projectors'),
 ('High end computer servers', 'High end computer servers'),
 ('Scanners', 'Laser scanners'),
 ('Stage or projection or studio lighting system', 'Lighting grids'),
 ('Camera based vision systems for automated data collection',
  'Real time motion capture systems'),
 ('Digital cameras', 'Digital cameras'),
 ('Cinematographic cameras', 'Pan-tilt-zoom cameras'),
 ('Compact disks CDs', 'Magneto optical discs'),
 ('Personal computers', 'Personal computers'),
 ('High end computer 

In [8]:
[(competency.name,competency.other_attributes['competencyText']) for competency in computer_scientists.competencies if competency.categories[0] == 'Work Activities']

[('Monitor Processes, Materials, or Surroundings',
  'Monitoring and reviewing information from materials, events, or the environment, to detect or assess problems.'),
 ('Training and Teaching Others',
  'Identifying the educational needs of others, developing formal educational or training programs or classes, and teaching or instructing others.'),
 ('Judging the Qualities of Things, Services, or People',
  'Assessing the value, importance, or quality of things or people.'),
 ('Making Decisions and Solving Problems',
  'Analyzing information and evaluating results to choose the best solution and solve problems.'),
 ('Updating and Using Relevant Knowledge',
  'Keeping up-to-date technically and applying new knowledge to your job.'),
 ('Provide Consultation and Advice to Others',
  'Providing guidance and expert advice to management or other groups on technical, systems-, or process-related topics.'),
 ('Establishing and Maintaining Interpersonal Relationships',
  'Developing constructi

In [9]:
[(competency.name,competency.other_attributes['competencyText']) for competency in computer_scientists.competencies if competency.categories[0] == 'Work Styles']

[('Self Control',
  'Job requires maintaining composure, keeping emotions in check, controlling anger, and avoiding aggressive behavior, even in very difficult situations.'),
 ('Adaptability/Flexibility',
  'Job requires being open to change (positive or negative) and to considerable variety in the workplace.'),
 ('Independence',
  "Job requires developing one's own ways of doing things, guiding oneself with little or no supervision, and depending on oneself to get things done."),
 ('Persistence', 'Job requires persistence in the face of obstacles.'),
 ('Integrity', 'Job requires being honest and ethical.'),
 ('Dependability',
  'Job requires being reliable, responsible, and dependable, and fulfilling obligations.'),
 ('Achievement/Effort',
  'Job requires establishing and maintaining personally challenging achievement goals and exerting effort toward mastering tasks.'),
 ('Analytical Thinking',
  'Job requires analyzing information and using logic to address work-related issues and pr

In [14]:
[(competency.name,competency.other_attributes['competencyText']) for competency in computer_scientists.competencies if competency.categories[0] == 'Work Values']

[('Relationships',
  'Occupations that satisfy this work value allow employees to provide service to others and work with co-workers in a friendly non-competitive environment. Corresponding needs are Co-workers, Moral Values and Social Service.'),
 ('Independence',
  'Occupations that satisfy this work value allow employees to work on their own and make decisions. Corresponding needs are Creativity, Responsibility and Autonomy.'),
 ('Achievement',
  'Occupations that satisfy this work value are results oriented and allow employees to use their strongest abilities, giving them a feeling of accomplishment. Corresponding needs are Ability Utilization and Achievement.'),
 ('Support',
  'Occupations that satisfy this work value offer supportive management that stands behind employees. Corresponding needs are Company Policies, Supervision: Human Relations and Supervision: Technical.'),
 ('Working Conditions',
  'Occupations that satisfy this work value offer job security and good working con

In [10]:
[(competency.name,competency.other_attributes['competencyType']) for competency in computer_scientists.competencies if competency.categories[0] == 'Task Statements']

[('Evaluate project plans and proposals to assess feasibility issues.',
  'Core'),
 ('Participate in staffing decisions and direct training of subordinates.',
  'Supplemental'),
 ('Consult with users, management, vendors, and technicians to determine computing needs and system requirements.',
  'Core'),
 ('Conduct logical analyses of business, scientific, engineering, and other technical problems, formulating mathematical models of problems for solution by computers.',
  'Core'),
 ('Direct daily operations of departments, coordinating project activities with other departments.',
  'Supplemental'),
 ('Develop and interpret organizational goals, policies, and procedures.',
  'Core'),
 ('Approve, prepare, monitor, and adjust operational budgets.',
  'Supplemental'),
 ('Develop performance standards, and evaluate work in light of established standards.',
  'Core'),
 ('Analyze problems to develop solutions involving computer hardware and software.',
  'Core'),
 ('Design computers and the so

In [7]:
computer_scientists.occupations

{Occupation(identifier=15-1111.00, name=Computer and Information Research Scientists, {'description': 'Conduct research into fundamental computer and information science as theorists, designers, or inventors. Develop solutions to problems in the field of computer hardware and software.', 'categories': ['O*NET-SOC Occupation']})}

In [5]:
import pandas as pd



In [7]:
columns = ['occupation', 'competency', 'category', 'description']
data = []
occupations = ['Computer and Information Research Scientists', 
               'Social Science Research Assistants', 
               'Remote Sensing Scientists and Technologists',
               'Bioinformatics Scientists',
               'Geospatial Information Scientists and Technologists',
               'Survey Researchers',
               'Statisticians',
               'Computer Systems Analysts',
               'Mathematicians',
               'Software Developers, Systems Software',
               'Database Administrators',
               'Database Architects',
               'Database Administrators',
               'Data Warehousing Specialists',
               'Computer Systems Engineers/Architects',
               'Business Intelligence Analysts',
               'Financial Quantitative Analysts',
               'Clinical Data Managers',
               'Information Security Analysts',
               'Clinical Data Managers'
              ]
for occupation in occupations:
    oc = myonet.filter_by(lambda edge: occupation in edge.occupation.name)
    for competency in oc.competencies:
        row = [occupation, competency.name, competency.categories[0], competency.other_attributes['competencyText']]
        data.append(row)
df = pd.DataFrame(data, columns=columns)

In [8]:
df.head(10)

Unnamed: 0,occupation,competency,category,description
0,Computer and Information Research Scientists,Source code management SCM software,Technology Skills,Development environment software
1,Computer and Information Research Scientists,Microsoft Azure,Technology Skills,Development environment software
2,Computer and Information Research Scientists,Visualization,Abilities,The ability to imagine how something will look...
3,Computer and Information Research Scientists,Free-field speakers,Tools Used,Loudspeakers
4,Computer and Information Research Scientists,Data visualization software,Technology Skills,Analytical or scientific software
5,Computer and Information Research Scientists,Judgment and Decision Making,Skills,Considering the relative costs and benefits of...
6,Computer and Information Research Scientists,Linux,Technology Skills,Operating system software
7,Computer and Information Research Scientists,IBM Rational Apex,Technology Skills,Configuration management software
8,Computer and Information Research Scientists,Minitab,Technology Skills,Analytical or scientific software
9,Computer and Information Research Scientists,Evaluate project plans and proposals to assess...,Task Statements,Core


In [9]:
output_file = 'datasets/competencies.csv'
df.to_csv(output_file)

In [11]:
oc_columns = ['identifier', 'name', 'description', 'titles']
oc_data = []
for occupation_title in occupations:
    oc = myonet.filter_by(lambda edge: occupation_title in edge.occupation.name)
    for occupation in oc.occupations:
        row = [occupation.identifier, occupation.name, occupation.other_attributes['description'], ','.join(occupation.other_attributes['alternate_titles'])]
        oc_data.append(row)
oc_df = pd.DataFrame(oc_data, columns=oc_columns)

In [12]:
oc_df.head(10)

Unnamed: 0,identifier,name,description,titles
0,15-1111.00,Computer and Information Research Scientists,Conduct research into fundamental computer and...,Artificial Intelligence Specialist (AI Special...
1,19-4061.00,Social Science Research Assistants,"Assist social scientists in laboratory, survey...","Bilingual Research Interviewer,Clinical Resear..."
2,19-2099.01,Remote Sensing Scientists and Technologists,Apply remote sensing principles and methods to...,"All Source Intelligence Analyst,Data Analytics..."
3,19-1029.01,Bioinformatics Scientists,Conduct research using bioinformatics theory a...,"Assistant Scientist,Bioinformatician,Bioinform..."
4,15-1199.04,Geospatial Information Scientists and Technolo...,Research or develop geospatial technologies. M...,"Geographic Information Scientist,Geographic In..."
5,19-3022.00,Survey Researchers,"Plan, develop, or conduct surveys. May analyze...","Data Analyst,Data Collection Specialist,Field ..."
6,15-2041.00,Statisticians,Develop or apply mathematical or statistical t...,"Analytical Statistician,Applied Scientist,Appl..."
7,15-1121.00,Computer Systems Analysts,"Analyze science, engineering, business, and ot...","Applications Analyst,Applications Systems Anal..."
8,15-2021.00,Mathematicians,Conduct research in fundamental mathematics or...,"Agent-Based Modeler,Algebraist,Applied Mathema..."
9,15-1133.00,"Software Developers, Systems Software","Research, design, develop, and test operating ...","Applications Analyst,Automation Engineer,Beta ..."


In [13]:
output_file = 'datasets/occupations.csv'
oc_df.to_csv(output_file)