In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Pickling works, maybe use a json? https://datatofish.com/export-pandas-dataframe-json/
train = pd.read_pickle('w2v_train.pkl')
train.head(3)

Unnamed: 0,posTitle,memberUrn,posEncoded
0,Badminton Head Coach,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",3035
1,CRO specialist,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1583
2,Consultant for Hays Office Support,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",20355


In [12]:
w2v_inner_dim = 200
# Simple 2 layer model to create the word2vec matrix
# Predicting word (job) based off context (members who had that job) currently
# people --> job
model = keras.models.Sequential([
    keras.layers.Dense(w2v_inner_dim, input_dim=len(train['memberUrn'][0])),
    keras.layers.Dense(train.shape[0], activation='softmax')
])

# Hyper parameters
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 200)               1370800   
_________________________________________________________________
dense_7 (Dense)              (None, 23907)             4805307   
Total params: 6,176,107
Trainable params: 6,176,107
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(
    np.array(train['memberUrn'].tolist(), dtype=np.float32),
    np.array(train['posEncoded'].tolist(), dtype=np.float32),
    epochs=5
)

In [None]:
# get_weights returns weights & biases -> we want the 2nd matrix of weights (w2v_inner_dim by # of jobs)
w2v_matrix = model.get_weights()[2]

from sklearn.manifold import TSNE

# Collapse matrix into Nx2
tsne = TSNE(n_components=2, random_state=0, verbose=2)
w2v_visual = tsne.fit_transform(w2v_matrix)

In [None]:
import plotly.graph_objects as go

# Display data
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=w2v_matrix[:,0], y=w2v_matrix[:,1],
    text=train['posTitle'], # ! LABELS ARE WRONG
    mode='markers',
    marker_color='rgba(255, 182, 193, .8)'
))
fig.update_layout(title='Word2Vec 2D Career Map')