# Gartner Wanted Analytics API Tutorial

In this tutorial, you will have an example of how to use the python wrapper with the `WantedQuery` class

In [1]:
from talentml.gartner.core import WantedQuery
from talentml.onet.core import OnetDB
from talentml.utils import viz

import os
import pandas as pd
import matplotlib.pyplot as plt
import igraph as ig
import chart_studio.plotly as py

Parameters (See Wanted Analytics documentation for more details)

In [2]:
API_KEY = os.getenv('gartner_API_key') # This is a 32 characters-long key stored as an environment variable
date='2016-01-01-2020-02-07'
query = '"data scientist"|"scientifique des données"|"Artificial intelligence"|"intelligence artificielle"|"big data"'
function = '10' # Information technology

Creating query object

In [3]:
wq = WantedQuery(
    passkey=API_KEY, 
    function=function, 
    query=query,
    date=date
)
wq

<talentml.gartner.core.WantedQuery at 0x22857b2a5b0>

Download data

In [4]:
data = wq.get_data()

Exploring data

In [5]:
# Columns
print(data.columns)
print(data.shape)

Index(['id', 'hash', 'refnumber', 'isstaffing', 'isanonymous', 'isthirdparty',
       'isinappropriate', 'isbulk', 'isaggregator', 'isfree',
       'isclassifiedoccupation', 'isclassifiedindustry', 'iscurrent',
       'dates_firstseen', 'dates_posted', 'dates_refreshed', 'title_value',
       'title_titleid', 'title_cleantitleid', 'title_semicleantitleid',
       'description_value', 'occupation_occupation_code',
       'occupation_occupation_label', 'occupation_occupation_revision',
       'industry_code', 'industry_label', 'function_id', 'function_label',
       'employer_id', 'employer_name', 'employer_superaliasid',
       'employer_superalias', 'education_id', 'education_label',
       'locations_location_0_city_code', 'locations_location_0_city_label',
       'locations_location_0_state_code', 'locations_location_0_state_label',
       'locations_location_0_county_code', 'locations_location_0_county_label',
       'locations_location_0_wib_id', 'locations_location_0_wib_code',
  

In [6]:
viz.hist_plot(
    series = pd.to_datetime(data['dates_firstseen']),
    title = "Évolution du nombre d'emploi (2016-2020)"
)    

In [121]:
from sklearn.preprocessing import MinMaxScaler

# Number of postings per city, with mean locations as centroid

city_labels = 'locations_location_0_city_label'
latitude_labels = 'locations_location_0_position_latitude'
longitude_labels = 'locations_location_0_position_longitude'


scaler = MinMaxScaler(feature_range=(0.1, 0.9))

city_count = pd.DataFrame(
    index = data[city_labels].value_counts().index,
    data = data[city_labels].value_counts().values,
    columns = ['count'],
)
    
city_count['count_scaled'] = scaler.fit_transform(city_count.values.reshape(-1,1))
city_count['mean_x'] = 0
city_count['mean_y'] = 0


unique_cities = list(data[city_labels].unique())

for city in unique_cities:
    city_count.loc[city, 'mean_x'] = data[data[city_labels]==city][longitude_labels].astype(float).mean()
    city_count.loc[city, 'mean_y'] = data[data[city_labels]==city][latitude_labels].astype(float).mean()

city_count = city_count.drop('Unavailable')
city_count

Unnamed: 0,count,count_scaled,mean_x,mean_y
Montréal,218,0.9,-73.651703,45.527901
Québec,10,0.13318,-71.330299,46.8517
Saint-Laurent,7,0.12212,-73.732903,45.522598
Sherbrooke,3,0.107373,-71.965797,45.401798
Gatineau,2,0.103687,-75.659302,45.491001
Dorval,2,0.103687,-73.753304,45.450901
Mirabel,2,0.103687,-74.076401,45.645599
Victoriaville,2,0.103687,-71.970802,46.061901
Brossard,2,0.103687,-73.4562,45.446602
Anjou,2,0.103687,-73.588997,45.619999


In [157]:
from importlib import reload 
reload(viz)

viz.map_city_count(city_value_counts_df=city_count, geojson_path='../data/geojson/region_admin_poly.shp')

Uncomment to export as xlsx

In [58]:
'Montréal' in df['MRS_NM_MRC']

False

In [None]:
onet = OnetDB()
onet.directory

In [None]:
link = onet.directory['technology_skills_competencies']
tech_skills = list(onet.call(link).unique())
tech_skills[100:120] # Random

In [15]:
def compare(description_values, tech_skills):
    df = pd.DataFrame(columns=tech_skills,index=range(len(description_values)))
    for desc in range(len(description_values)):
        for skill in tech_skills:
            df.loc[desc,skill] = description_values[desc].count(str(skill))
    return(df)


In [16]:
tech_count = compare(data['description_value'], tech_skills)

_df = tech_count.loc[
    list(map(lambda row: tech_count.iloc[row].sum()>0, range(len(tech_count)))),
    (tech_count.sum()>0).values
]

_df = _df.reset_index(drop=True)

#skills_id=range(list(_df.columns))

TypeError: 'list' object cannot be interpreted as an integer

In [17]:
idx = [s for s, v in enumerate(_df.columns)]
skills = [v for s, v in enumerate(_df.columns)]
idx

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92]

In [18]:
#L=len(data['links'])

#Edges=[(data['links'][k]['source'], data['links'][k]['target']) for k in range(L)]

#G=ig.Graph(Edges, directed=False)

links = []

import itertools

for x in range(len(_df)):
    combinations = list(itertools.combinations(idx, 2))
    for combo in combinations:
        if _df.iloc[x, combo[0]] == 0 or _df.iloc[x, combo[1]] == 0:
            pass
        else:
            links.append([combo[0], combo[1]])

In [19]:
links

[[0, 23],
 [0, 38],
 [0, 39],
 [0, 46],
 [0, 52],
 [0, 66],
 [0, 67],
 [0, 75],
 [23, 38],
 [23, 39],
 [23, 46],
 [23, 52],
 [23, 66],
 [23, 67],
 [23, 75],
 [38, 39],
 [38, 46],
 [38, 52],
 [38, 66],
 [38, 67],
 [38, 75],
 [39, 46],
 [39, 52],
 [39, 66],
 [39, 67],
 [39, 75],
 [46, 52],
 [46, 66],
 [46, 67],
 [46, 75],
 [52, 66],
 [52, 67],
 [52, 75],
 [66, 67],
 [66, 75],
 [67, 75],
 [39, 40],
 [39, 51],
 [39, 52],
 [40, 51],
 [40, 52],
 [51, 52],
 [29, 39],
 [29, 51],
 [29, 52],
 [39, 51],
 [39, 52],
 [51, 52],
 [39, 40],
 [39, 52],
 [40, 52],
 [22, 39],
 [22, 46],
 [22, 52],
 [22, 53],
 [39, 46],
 [39, 52],
 [39, 53],
 [46, 52],
 [46, 53],
 [52, 53],
 [39, 40],
 [39, 52],
 [39, 59],
 [39, 60],
 [40, 52],
 [40, 59],
 [40, 60],
 [52, 59],
 [52, 60],
 [59, 60],
 [39, 52],
 [39, 51],
 [39, 52],
 [51, 52],
 [0, 16],
 [0, 39],
 [0, 40],
 [0, 51],
 [0, 52],
 [0, 55],
 [0, 57],
 [0, 75],
 [0, 84],
 [16, 39],
 [16, 40],
 [16, 51],
 [16, 52],
 [16, 55],
 [16, 57],
 [16, 75],
 [16, 84],
 [39,

In [36]:
G=ig.Graph(links, directed=False)
layt=G.layout('kk', dim=3)
layt[92]

[-1.6899906964966604, -6.029840274824559, 6.3735930503096725]

In [43]:
from sklearn.cluster import KMeans
import numpy as np

X = np.array(links)
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)

kmeans.labels_


#kmeans.cluster_centers_array

array([1, 1, 1, ..., 0, 2, 2])

In [37]:
Xn=[layt[k][0] for k in range(92)]# x-coordinates of nodes
Yn=[layt[k][1] for k in range(92)]# y-coordinates
Zn=[layt[k][2] for k in range(92)]# z-coordinates
Xe=[]
Ye=[]
Ze=[]
for e in links:
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]
    Ze+=[layt[e[0]][2],layt[e[1]][2], None]

In [51]:
print(Xn)


trace1=go.Scatter3d(x=Xe,
               y=Ye,
               z=Ze,
               mode='lines',
               line=dict(color='rgb(125,125,125)', width=0.1)
               )

trace2=go.Scatter3d(x=Xn,
               y=Yn,
               z=Zn,
               mode='markers',
               name='actors',
               marker=dict(symbol='circle',
                             size=np.random.randint(8,20,92),
                             line=dict(color='rgb(50,50,50)', width=0.5)
                             )
               )

axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

layout = go.Layout(
         title="Network of coappearances of characters in Victor Hugo's novel<br> Les Miserables (3D visualization)",
         width=1000,
         height=1000,
         showlegend=False,
         scene=dict(
             xaxis=dict(axis),
             yaxis=dict(axis),
             zaxis=dict(axis),
        ),
     margin=dict(
        t=100
    ),
    hovermode='closest',
    annotations=[
           dict(
           showarrow=False,
            text="Data source: <a href='http://bost.ocks.org/mike/miserables/miserables.json'>[1] miserables.json</a>",
            xref='paper',
            yref='paper',
            x=0,
            y=0.1,
            xanchor='left',
            yanchor='bottom',
            font=dict(
            size=14
            )
            )
        ],    )

[-0.14283486022019182, -0.29651973693728867, 3.9512793567612916, 3.0891346533981427, -2.6061613070442693, -1.8706842186521286, -3.7009418657436712, -1.191452315450057, 1.2812954081014407, -4.087659503035206, 6.897505721682005, 1.3729095214057012, 4.226003512155625, -3.8368771360413, 2.2414923078264057, 6.106085394145131, 1.5683174674898783, 3.349716648395007, 1.2647785016562212, -2.3071879020281036, -1.0886744421995038, 6.4542967162043094, -0.86106088895735, 2.3118205488899677, 4.257523051325946, 0.9105783369902071, -0.8052361972992307, 5.015551677052296, 1.4589141448067493, 0.9560964921723953, 0.34070398484240066, 1.7756158315318546, 0.1676175162994189, -2.0028948155578137, -2.843346533081098, 0.598102994920803, 4.6075281694645245, 7.194747719850974, 3.7919791127844973, 2.126643053077535, 1.6669163931729027, 5.013244686976771, 1.201678947180494, -2.6979697387568002, 2.033452916036112, 2.2340962562357207, -0.13257859846331443, -0.14986082342904358, -0.5551319620981918, 2.72832382152048

In [52]:
data=[trace1, trace2]
fig=go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig, filename='Les-Miserables')

In [90]:
onet_skills = pd.read_excel('https://www.onetcenter.org/dl_files/database/db_24_2_excel/Technology%20Skills.xlsx')
onet_skills['count'] = 0
skills = pd.DataFrame(onet_skills['Example'].unique(), index=range(len(onet_skills['Example'].unique())), columns=['name'])
skills['count'] = 0


onet_knowledge = pd.read_excel('https://www.onetcenter.org/dl_files/database/db_24_2_excel/Knowledge.xlsx')
onet_knowledge['count'] = 0
knowledge = pd.DataFrame(onet_knowledge['Element Name'].unique(), index=range(len(onet_knowledge['Element Name'].unique())), columns=['name'])
knowledge['count'] = 0


In [91]:
for job in range(len(data)):
    for x in range(len(skills)):
        if skills.loc[x, 'name'] in data.loc[job,'description_value']:
            skills.loc[x, 'count']+=data.loc[job,'description_value'].count(skills.loc[x, 'name'])
            

for job in range(len(data)):
    for y in range(len(knowledge)):
        if knowledge.loc[y, 'name'] in data.loc[job,'description_value']:
            knowledge.loc[y, 'count']+=data.loc[job,'description_value'].count(knowledge.loc[y, 'name'])

In [83]:
data.loc[0,'description_value']

'DÉVELOPPEUR BACK-END WEB – PRODUIT QUELQUES MOTS SUR NOUS Moment Factory est un studio multimédia, réunissant un large éventail d’expertises sous un même toit. Notre équipe combine des spécialités dans la vidéo, l’éclairage, l’architecture, le son et les effets spéciaux afin de créer des expériences mémorables. Basé à Montréal, le studio possède également des bureaux à Los Angeles, Londres, Paris, New York et Tokyo. Depuis ses débuts en 2001, Moment Factory a créé plus de 400 productions et destinations uniques dans le monde, pour des clients tels que l’aéroport de Los Angeles, Nine Inch Nails, Microsoft, la NFL, Sony, Toyota, la Sagrada Familia de Barcelone, Madonna et la Royal Caribbean. VOTRE ÉQUIPE Conçu par Moment Factory, le logiciel X-Agora simplifie la gestion des expériences immersives et permet l’opération de nos spectacles. Pour soutenir son évolution constante, l’équipe X-Agora met à profit ses esprits logiques et créatifs pour offrir un produit adapté aux projets. Rejoind

In [92]:
#onet_skills.columns
knowledge.sort_values(by='count',ascending=False)

Unnamed: 0,name,count
10,Design,65
13,Mathematics,9
32,Transportation,3
14,Physics,2
18,Sociology and Anthropology,0
20,Medicine and Dentistry,0
21,Therapy and Counseling,0
22,Education and Training,0
23,English Language,0
24,Foreign Language,0


In [47]:
data.loc[0,'description_value'].count('HMI')

3

In [9]:
%config InlineBackend.rc

{'figure.figsize': (6.0, 4.0),
 'figure.facecolor': (1, 1, 1, 0),
 'figure.edgecolor': (1, 1, 1, 0),
 'font.size': 10,
 'figure.dpi': 72,
 'figure.subplot.bottom': 0.125}