# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import pickle

# Load Dictionaries to Compare Differences

In [5]:
with open('skills.pkl', 'rb') as pickle_file:
    skills = pickle.load(pickle_file)

with open('skills_required.pkl', 'rb') as pickle_file:
    skills_required = pickle.load(pickle_file)

In [6]:
skills[31605080]

{'management': 5,
 'inventory management': 5,
 'installation': 5,
 'troubleshooting': 5,
 'logistics': 5,
 'customer service': 5,
 'marshalling': 5,
 'software installation': 5,
 'active directory': 5,
 'technical support': 5,
 'aviation': 5,
 'information technology': 5,
 'research': 5,
 'computer repair': 1,
 'software maintenance': 1,
 'android': 1,
 'malware detection': 1}

In [7]:
skills_required['AVIATION']

{'aircraft maintenance': 3,
 'aviation': 5,
 'b': 3,
 'business administration': 3,
 'c': 3,
 'construction': 3,
 'critical thinking': 3,
 'customer service': 4,
 'drawing': 3,
 'filing': 3,
 'inquiry': 3,
 'installation': 3,
 'inventory control': 3,
 'inventory management': 3,
 'investigation': 3,
 'leadership': 4,
 'logistics': 3,
 'm': 3,
 'management': 5,
 'marketing': 3,
 'microsoft office': 3,
 'microsoft word': 3,
 'negotiation': 3,
 'planning': 4,
 'process improvement': 3,
 'procurement': 3,
 'project management': 3,
 'purchasing': 3,
 'quality assurance': 3,
 'quality control': 3,
 'r': 3,
 'reduction': 3,
 'requisition': 3,
 'research': 4,
 'safety training': 3,
 'scheduling': 3,
 'security clearance': 3,
 'source': 3,
 'supervision': 3,
 'test equipment': 3,
 'time management': 3,
 'track': 4,
 'tracking': 3,
 'troubleshooting': 3}

# Identify Skill Gap

This skill gap identification is to compare the applicant's skillset with the most ideal peers' skillset.  

We have 5 level of recommendation based on the skill level you have:
 - Good to have
 - Recommended (level 3 -> level 4)
 - Strongly Recommended(level 4 -> level 5)
 - Critical (level 3 -> level 5)
 - Fulfilled

In [11]:
def skill_gap_identification_peers(skills, skills_required):
    diff = {'good to have': set(), 'recommended': set(), 'strongly recommended': set(), 'critical': set(), 'fulfilled': set()}
    for key in skills_required:
        if key not in skills:
            diff['good to have'].add(key)
        elif skills[key] >= skills_required[key]:
            diff['fulfilled'].add(key)
        elif skills_required[key] - skills[key] == 2:
            diff['critical'].add(key)
        elif skills_required[key] == 5:
            diff['strongly recommended'].add(key)
        else:
            diff['recommended'].add(key)
    return diff

In [12]:
skill_gap_identification_peers(skills[31605080], skills_required['AVIATION'])

{'good to have': {'aircraft maintenance',
  'b',
  'business administration',
  'c',
  'construction',
  'critical thinking',
  'drawing',
  'filing',
  'inquiry',
  'inventory control',
  'investigation',
  'leadership',
  'm',
  'marketing',
  'microsoft office',
  'microsoft word',
  'negotiation',
  'planning',
  'process improvement',
  'procurement',
  'project management',
  'purchasing',
  'quality assurance',
  'quality control',
  'r',
  'reduction',
  'requisition',
  'safety training',
  'scheduling',
  'security clearance',
  'source',
  'supervision',
  'test equipment',
  'time management',
  'track',
  'tracking'},
 'recommended': set(),
 'strongly recommended': set(),
 'critical': set(),
 'fulfilled': {'aviation',
  'customer service',
  'installation',
  'inventory management',
  'logistics',
  'management',
  'research',
  'troubleshooting'}}