In [1]:
## import necessary libraries

import pandas as pd
import nltk
import string
import gensim
import itertools
import numpy as np
from scipy import spatial
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sent2vec.vectorizer import Vectorizer
from sent2vec.splitter import Splitter
from gensim.models import Word2Vec
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('./potential-talents - Aspiring human resources - seeking human resources.csv')
df.head()

Unnamed: 0,id,job_title,location,connection,fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,


## Word2Vec

In [3]:
job_desc_list = []

for num in range(len(df)):
    desc = df.iloc[num]['job_title']
    ## remove punctuation from job description    
    for symbol in list(string.punctuation):
        desc = desc.replace(symbol, ' ')
    
    ## write out acronyms for human resource positions
    desc = nltk.word_tokenize(desc)
    for word in range(len(desc)):
        if desc[word] == 'HR':
            desc[word] = 'Human Resources'
        elif desc[word] == 'CHRO':
            desc[word] = 'Chief Human Resources Officer'
        elif desc[word] == 'GPHR':
            desc[word] = 'General Professional Human Resources'
        elif desc[word] == 'SPHR':
            desc[word] = 'Senior Professional Human Resources'
            
        ## remove words that are not in pretrained vectors
        ## most of these words are the names of colleges
        elif desc[word] in ['EPIK', 'Celal', 'Bayar', 'Humber', 'ENGIE', 'Buckhead', 'Luxottica', 'Beneteau', 'ScottMadden', 
                            'Nortia', 'Schwan', 'Endemol', 'JTI', 'Styczynski', 'Westfield', 'Kokomo', 'Delphi', 'Loparex']:
            desc[word] = ''
        desc[word] = desc[word].lower()

    ## remove stopwords
    desc = [token for token in desc if token not in stopwords.words('english')]
    desc = [token for token in desc if not token.isdigit()]
    
    desc = ' '.join(desc)
    desc = nltk.word_tokenize(desc)
    
    job_desc_list.append(desc)

In [4]:
job_desc_list.append(['seeking', 'human', 'resources'])

In [5]:
job_desc_list

[['c',
  'bauer',
  'college',
  'business',
  'graduate',
  'magna',
  'cum',
  'laude',
  'aspiring',
  'human',
  'resources',
  'professional'],
 ['native', 'english', 'teacher', 'english', 'program', 'korea'],
 ['aspiring', 'human', 'resources', 'professional'],
 ['people', 'development', 'coordinator', 'ryan'],
 ['advisory', 'board', 'member', 'university'],
 ['aspiring', 'human', 'resources', 'specialist'],
 ['student', 'college', 'aspiring', 'human', 'resources', 'generalist'],
 ['human', 'resources', 'senior', 'specialist'],
 ['student', 'college', 'aspiring', 'human', 'resources', 'generalist'],
 ['seeking', 'human', 'resources', 'hris', 'generalist', 'positions'],
 ['student', 'chapman', 'university'],
 ['svp',
  'chief',
  'human',
  'resources',
  'officer',
  'marketing',
  'communications',
  'csr',
  'officer',
  'houston',
  'woodlands',
  'energy',
  'general',
  'professional',
  'human',
  'resources',
  'senior',
  'professional',
  'human',
  'resources'],
 ['huma

In [6]:
vectorizer = Vectorizer()
vectorizer.word2vec(job_desc_list, pretrained_vectors_path= './GoogleNews-vectors-negative300.bin')
word2vec_vectors = vectorizer.vectors

In [7]:
dist_1 = spatial.distance.cosine(word2vec_vectors[-1], word2vec_vectors[0])
dist_2 = spatial.distance.cosine(word2vec_vectors[-1], word2vec_vectors[2])
print('dist_1: {0}, dist-2: {1}'.format(dist_1, dist_2))

dist_1: 0.611827164888382, dist-2: 0.29940420389175415


In [8]:
vector_df = df
vector_df.head()

Unnamed: 0,id,job_title,location,connection,fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,


In [9]:
scores = []
for num in range(len(job_desc_list) - 1):
    scores.append(1 - spatial.distance.cosine(word2vec_vectors[num], word2vec_vectors[-1]))
    
vector_df['word2vec_fit'] = scores
vector_df

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,,0.388173
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,,0.104410
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,,0.700596
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,,0.291818
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,,0.229988
...,...,...,...,...,...,...
99,100,Aspiring Human Resources Manager | Graduating ...,"Cape Girardeau, Missouri",103,,0.729863
100,101,Human Resources Generalist at Loparex,"Raleigh-Durham, North Carolina Area",500+,,0.750962
101,102,Business Intelligence and Analytics at Travelers,Greater New York City Area,49,,0.282647
102,103,Always set them up for Success,Greater Los Angeles Area,500+,,0.133268


In [10]:
vector_df.sort_values(by=['word2vec_fit'], ascending=False).head(15)

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit
98,99,Seeking Human Resources Position,"Las Vegas, Nevada Area",48,,0.920869
27,28,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864
29,30,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864
72,73,"Aspiring Human Resources Manager, seeking inte...","Houston, Texas Area",7,,0.83922
9,10,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101
39,40,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101
52,53,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101
61,62,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101
73,74,Human Resources Professional,Greater Boston Area,16,,0.793325
67,68,Human Resources Specialist at Luxottica,Greater New York City Area,500+,,0.767376


## BERT

In [11]:
bert_list = []

for num in range(len(df)):
    desc = df.iloc[num]['job_title']
    ## remove punctuation from job description    
    for symbol in list(string.punctuation):
        desc = desc.replace(symbol, ' ')
        
    ## write out acronyms for human resource positions
    desc = nltk.word_tokenize(desc)
    for word in range(len(desc)):
        if desc[word] == 'HR':
            desc[word] = 'Human Resources'
        elif desc[word] == 'CHRO':
            desc[word] = 'Chief Human Resources Officer'
        elif desc[word] == 'GPHR':
            desc[word] = 'General Professional in Human Resources'
        elif desc[word] == 'SPHR':
            desc[word] = 'Senior Professional in Human Resources'
    
    desc = ' '.join(desc)
    bert_list.append(desc)

In [12]:
bert_list.append('seeking human resources')

In [13]:
bert_list

['2019 C T Bauer College of Business Graduate Magna Cum Laude and aspiring Human Resources professional',
 'Native English Teacher at EPIK English Program in Korea',
 'Aspiring Human Resources Professional',
 'People Development Coordinator at Ryan',
 'Advisory Board Member at Celal Bayar University',
 'Aspiring Human Resources Specialist',
 'Student at Humber College and Aspiring Human Resources Generalist',
 'Human Resources Senior Specialist',
 'Student at Humber College and Aspiring Human Resources Generalist',
 'Seeking Human Resources HRIS and Generalist Positions',
 'Student at Chapman University',
 'SVP Chief Human Resources Officer Marketing Communications CSR Officer ENGIE Houston The Woodlands Energy General Professional in Human Resources Senior Professional in Human Resources',
 'Human Resources Coordinator at InterContinental Buckhead Atlanta',
 '2019 C T Bauer College of Business Graduate Magna Cum Laude and aspiring Human Resources professional',
 '2019 C T Bauer Colleg

In [14]:
vectorizer = Vectorizer()
vectorizer.bert(bert_list)
bert_vectors = vectorizer.vectors

Vectorization done on cpu device


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
dist_1 = spatial.distance.cosine(bert_vectors[-1], bert_vectors[0])
dist_2 = spatial.distance.cosine(bert_vectors[-1], bert_vectors[2])
print('dist_1: {0}, dist-2: {1}'.format(dist_1, dist_2))

dist_1: 0.07078111171722412, dist-2: 0.01500558853149414


In [16]:
scores = []
for num in range(len(bert_list) - 1):
    scores.append(1 - spatial.distance.cosine(bert_vectors[num], bert_vectors[-1]))
    
vector_df['bert_fit'] = scores
vector_df

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit,bert_fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,,0.388173,0.929219
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,,0.104410,0.948172
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,,0.700596,0.984994
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,,0.291818,0.973138
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,,0.229988,0.935457
...,...,...,...,...,...,...,...
99,100,Aspiring Human Resources Manager | Graduating ...,"Cape Girardeau, Missouri",103,,0.729863,0.914459
100,101,Human Resources Generalist at Loparex,"Raleigh-Durham, North Carolina Area",500+,,0.750962,0.982065
101,102,Business Intelligence and Analytics at Travelers,Greater New York City Area,49,,0.282647,0.976543
102,103,Always set them up for Success,Greater Los Angeles Area,500+,,0.133268,0.983066


In [17]:
vector_df.sort_values(by=['bert_fit'], ascending=False).head(15)

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit,bert_fit
29,30,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201
27,28,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201
98,99,Seeking Human Resources Position,"Las Vegas, Nevada Area",48,,0.920869,0.994894
73,74,Human Resources Professional,Greater Boston Area,16,,0.793325,0.991686
87,88,Human Resources Management Major,"Milpitas, California",18,,0.74754,0.990193
5,6,Aspiring Human Resources Specialist,Greater New York City Area,1,,0.743151,0.987985
59,60,Aspiring Human Resources Specialist,Greater New York City Area,1,,0.743151,0.987985
35,36,Aspiring Human Resources Specialist,Greater New York City Area,1,,0.743151,0.987985
48,49,Aspiring Human Resources Specialist,Greater New York City Area,1,,0.743151,0.987985
23,24,Aspiring Human Resources Specialist,Greater New York City Area,1,,0.743151,0.987985


## TF-IDF

In [18]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(bert_list)

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [19]:
vector_df['tfidf_fit'] = cosine_sim[-1][:-1]

In [20]:
vector_df.sort_values(by=['tfidf_fit'], ascending=False).head(15)

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit,bert_fit,tfidf_fit
27,28,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201,0.646155
29,30,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201,0.646155
98,99,Seeking Human Resources Position,"Las Vegas, Nevada Area",48,,0.920869,0.994894,0.625865
72,73,"Aspiring Human Resources Manager, seeking inte...","Houston, Texas Area",7,,0.83922,0.975402,0.552318
52,53,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527
9,10,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527
61,62,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527
39,40,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527
26,27,Aspiring Human Resources Management student se...,"Houston, Texas Area",500+,,0.696512,0.965308,0.406702
28,29,Aspiring Human Resources Management student se...,"Houston, Texas Area",500+,,0.696512,0.965308,0.406702


## GloVe

In [21]:
glove_list = []

for num in range(len(df)):
    desc = df.iloc[num]['job_title']
    ## remove punctuation from job description    
    for symbol in list(string.punctuation):
        desc = desc.replace(symbol, ' ')
        
    ## write out acronyms for human resource positions
    desc = nltk.word_tokenize(desc)
    for word in range(len(desc)):
        if desc[word] == 'HR':
            desc[word] = 'Human Resources'
        elif desc[word] == 'CHRO':
            desc[word] = 'Chief Human Resources Officer'
        elif desc[word] == 'GPHR':
            desc[word] = 'General Professional in Human Resources'
        elif desc[word] == 'SPHR':
            desc[word] = 'Senior Professional in Human Resources'
        elif desc[word] in ['HRIS', 'ENGIE', 'ScottMadden', 'Styczynski', 'Nortia', 'Loparex']:
            desc[word] = ''
        desc[word] = desc[word].lower()
    
    desc = [token for token in desc if token not in stopwords.words('english')]
    desc = [token for token in desc if not token.isdigit()]
    
    desc = ' '.join(desc)
    glove_list.append(desc)

glove_list.append('seeking human resources')
glove_list

['c bauer college business graduate magna cum laude aspiring human resources professional',
 'native english teacher epik english program korea',
 'aspiring human resources professional',
 'people development coordinator ryan',
 'advisory board member celal bayar university',
 'aspiring human resources specialist',
 'student humber college aspiring human resources generalist',
 'human resources senior specialist',
 'student humber college aspiring human resources generalist',
 'seeking human resources  generalist positions',
 'student chapman university',
 'svp chief human resources officer marketing communications csr officer  houston woodlands energy general professional in human resources senior professional in human resources',
 'human resources coordinator intercontinental buckhead atlanta',
 'c bauer college business graduate magna cum laude aspiring human resources professional',
 'c bauer college business graduate magna cum laude aspiring human resources professional',
 'native

In [22]:
import gensim.downloader as api

model = api.load("glove-wiki-gigaword-300")

In [23]:
def preprocess(s):
    return [i.lower() for i in s.split()]

def get_vector(s):
    return np.sum(np.array([model[i] for i in preprocess(s)]), axis=0)

In [24]:
glove_vectors = []

for num in range(len(glove_list)):
    glove_vectors.append(get_vector(glove_list[num]))

In [25]:
scores = []
for num in range(len(glove_list) - 1):
    scores.append(1 - spatial.distance.cosine(glove_vectors[num], glove_vectors[-1]))
    
vector_df['glove_fit'] = scores
vector_df

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit,bert_fit,tfidf_fit,glove_fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,,0.388173,0.929219,0.097827,0.466585
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,,0.104410,0.948172,0.000000,0.329688
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,,0.700596,0.984994,0.292820,0.799220
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,,0.291818,0.973138,0.000000,0.529448
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,,0.229988,0.935457,0.000000,0.295936
...,...,...,...,...,...,...,...,...,...
99,100,Aspiring Human Resources Manager | Graduating ...,"Cape Girardeau, Missouri",103,,0.729863,0.914459,0.283608,0.809268
100,101,Human Resources Generalist at Loparex,"Raleigh-Durham, North Carolina Area",500+,,0.750962,0.982065,0.172445,0.810235
101,102,Business Intelligence and Analytics at Travelers,Greater New York City Area,49,,0.282647,0.976543,0.000000,0.412289
102,103,Always set them up for Success,Greater Los Angeles Area,500+,,0.133268,0.983066,0.000000,0.459518


In [26]:
vector_df.sort_values(by=['glove_fit'], ascending=False).head(15)

Unnamed: 0,id,job_title,location,connection,fit,word2vec_fit,bert_fit,tfidf_fit,glove_fit
29,30,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201,0.646155,0.948254
27,28,Seeking Human Resources Opportunities,"Chicago, Illinois",390,,0.903864,0.996201,0.646155,0.948254
98,99,Seeking Human Resources Position,"Las Vegas, Nevada Area",48,,0.920869,0.994894,0.625865,0.945992
72,73,"Aspiring Human Resources Manager, seeking inte...","Houston, Texas Area",7,,0.83922,0.975402,0.552318,0.914078
52,53,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527,0.873041
9,10,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527,0.873041
39,40,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527,0.873041
61,62,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,,0.808101,0.974998,0.447527,0.873041
73,74,Human Resources Professional,Greater Boston Area,16,,0.793325,0.991686,0.351856,0.864729
87,88,Human Resources Management Major,"Milpitas, California",18,,0.74754,0.990193,0.18337,0.849824


In [27]:
vector_df.drop('fit', axis=1, inplace=True)
vector_df

Unnamed: 0,id,job_title,location,connection,word2vec_fit,bert_fit,tfidf_fit,glove_fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,0.388173,0.929219,0.097827,0.466585
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,0.104410,0.948172,0.000000,0.329688
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,0.700596,0.984994,0.292820,0.799220
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,0.291818,0.973138,0.000000,0.529448
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,0.229988,0.935457,0.000000,0.295936
...,...,...,...,...,...,...,...,...
99,100,Aspiring Human Resources Manager | Graduating ...,"Cape Girardeau, Missouri",103,0.729863,0.914459,0.283608,0.809268
100,101,Human Resources Generalist at Loparex,"Raleigh-Durham, North Carolina Area",500+,0.750962,0.982065,0.172445,0.810235
101,102,Business Intelligence and Analytics at Travelers,Greater New York City Area,49,0.282647,0.976543,0.000000,0.412289
102,103,Always set them up for Success,Greater Los Angeles Area,500+,0.133268,0.983066,0.000000,0.459518


## RankNet

In [56]:
X = pd.DataFrame(word2vec_vectors)
y = np.array(vector_df['word2vec_fit'])

In [57]:
X.drop([104], inplace=True)
X['qid'] = 1
qid = np.array(X['qid'])
X.drop('qid', axis=1, inplace=True)
X = X.to_numpy()

In [58]:
X

array([[-0.06260172,  0.0594991 ,  0.01339213, ..., -0.0257899 ,
        -0.01607577,  0.10813395],
       [ 0.0164388 ,  0.0230306 ,  0.06384277, ...,  0.03059896,
         0.00439453,  0.02514648],
       [-0.09277344,  0.03723145,  0.08280945, ..., -0.04348755,
        -0.08966064,  0.03756714],
       ...,
       [ 0.18185425, -0.05010986, -0.08358765, ...,  0.0144043 ,
         0.13219261,  0.0296936 ],
       [ 0.01285807,  0.10728963,  0.02231852, ...,  0.00384521,
         0.00382487, -0.1295573 ],
       [-0.0721283 , -0.02363586,  0.03528214, ...,  0.01784515,
         0.1071167 ,  0.14154053]], dtype=float32)

In [70]:
from LambdaRankNN  import RankNetNN

ranker = RankNetNN(input_size=X.shape[1], hidden_layer_sizes=(16, 8,), activation=('relu', 'relu',))
ranker.fit(X, y, qid, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
ndcg: 0.999961575863638


In [71]:
ranker.predict(X)

array([-3.1113829e+01, -8.3598633e+01,  1.6173635e+01, -4.2282860e+01,
       -5.7604885e+01,  2.2553463e+01, -1.2438192e+01,  1.2200714e+01,
       -1.2438192e+01,  3.1213749e+01, -6.5911209e+01,  6.3278074e+00,
       -7.7946215e+00, -3.1113829e+01, -3.1113829e+01, -8.3598633e+01,
        1.6173635e+01, -4.2282860e+01, -3.1113829e+01, -8.3598633e+01,
        1.6173635e+01, -4.2282860e+01, -5.7604885e+01,  2.2553463e+01,
       -1.2438192e+01,  1.2200714e+01,  9.5932178e+00,  4.1130314e+01,
        9.5932178e+00,  4.1130314e+01, -3.1113829e+01, -8.3598633e+01,
        1.6173635e+01, -4.2282860e+01, -5.7604885e+01,  2.2553463e+01,
       -1.2438192e+01,  1.2200714e+01, -1.2438192e+01,  3.1213749e+01,
       -6.5911209e+01,  6.3278074e+00, -7.7946215e+00, -3.1113829e+01,
       -8.3598633e+01,  1.6173635e+01, -4.2282860e+01, -5.7604885e+01,
        2.2553463e+01, -1.2438192e+01,  1.2200714e+01, -1.2438192e+01,
        3.1213749e+01, -6.5911209e+01,  6.3278074e+00, -7.7946215e+00,
      

In [72]:
ranker.evaluate(X, y, qid, eval_at=10)

ndcg@10: 0.9998997466715455


In [93]:
test = pd.DataFrame(enumerate(ranker.predict(X)))
test

Unnamed: 0,0,1
0,0,-31.113829
1,1,-83.598633
2,2,16.173635
3,3,-42.282860
4,4,-57.604885
...,...,...
99,99,18.993565
100,100,25.505970
101,101,-45.512695
102,102,-70.045853


In [95]:
vector_df.sort_values('word2vec_fit', ascending=False).head()

Unnamed: 0,id,job_title,location,connection,word2vec_fit,bert_fit,tfidf_fit,glove_fit
98,99,Seeking Human Resources Position,"Las Vegas, Nevada Area",48,0.920869,0.994894,0.625865,0.945992
27,28,Seeking Human Resources Opportunities,"Chicago, Illinois",390,0.903864,0.996201,0.646155,0.948254
29,30,Seeking Human Resources Opportunities,"Chicago, Illinois",390,0.903864,0.996201,0.646155,0.948254
72,73,"Aspiring Human Resources Manager, seeking inte...","Houston, Texas Area",7,0.839220,0.975402,0.552318,0.914078
9,10,Seeking Human Resources HRIS and Generalist Po...,Greater Philadelphia Area,500+,0.808101,0.974998,0.447527,0.873041
...,...,...,...,...,...,...,...,...
15,16,Native English Teacher at EPIK (English Progra...,Kanada,500+,0.104410,0.948172,0.000000,0.329688
19,20,Native English Teacher at EPIK (English Progra...,Kanada,500+,0.104410,0.948172,0.000000,0.329688
44,45,Native English Teacher at EPIK (English Progra...,Kanada,500+,0.104410,0.948172,0.000000,0.329688
31,32,Native English Teacher at EPIK (English Progra...,Kanada,500+,0.104410,0.948172,0.000000,0.329688
