In [1]:
import datamapplot

In [2]:
from collections import Counter
import pickle
import numpy as np
import pandas as pd
import os

## Titles plot

In [63]:
with open("titles_data.pickle", "rb") as f:
    titles_data = pickle.load(f)
titles_data.keys()

dict_keys(['layers', 'tsne', 'soft_clusters', 'soft_probs', 'sentences', 'sentences_cleaned', 'company_name', 'url'])

In [107]:
titles_data["layers"] = [[title.title() for title in layer] for layer in titles_data["layers"]]
titles_data["layers_reduced"] = [layer.copy() for layer in titles_data["layers"]]
for i in range(len(titles_data["layers_reduced"]) - 2):
    titles_data["layers_reduced"][i] = [title.title() if titles_data["soft_probs"][j] >= 0.005 else "Unlabelled"
                                            for j, title in enumerate(titles_data["layers_reduced"][i])]

sentences_counter = Counter(titles_data["sentences"])
marker_size_array = [sentences_counter[key] for key in titles_data["sentences"]]

extra_point_data = pd.DataFrame({"title": titles_data["sentences"],
                                 "functions": titles_data["layers"][5],
                                 "fields": titles_data["layers"][0],
                                 "main_field": titles_data["layers"][4],
                                 "cluster": titles_data["soft_clusters"],
                                 "company_name": titles_data["company_name"],
                                 "job_url": titles_data["url"]})

hover_text_template = """
<div>
    <p>TITLE: {title}</p>
    <p>JOB FUNCTIONS: {functions}</p>
    <p>FIELDS: {fields}</p>
    <p>MAIN FIELD: {main_field}</p>
    <p>COMPANY: {company_name}</p>
    <p>CLUSTER NUMBER: {cluster}</p>
</div>
"""

custom_css="""
.row {
    display : flex;
    align-items : left;
}
#tutorial {
    position: fixed;
    bottom: 0;
    left: 0;
    margin: 16px;
    padding: 12px;
    border-radius: 16px;
    z-index: 2;
    background: #ffffffcc;
    font-family: Cinzel;
    font-size: 12pt;
    box-shadow: 2px 3px 10px #aaaaaa44;
}
#title-container {
    max-width: 75%;
}
"""

instructions = """Scroll to zoom in/out. Hover on points to see additional info. Click on points to open jobs pages. Use the search box on the top-left to search job positions.\nThe light-gray points are considered unclustered, but may still be simillar to the nearby points. Size if points represents the number of same jobs titles."""

custom_html = """
<div id="tutorial">
"""
custom_html += f'    <div class="row"><div id="istructions" class="box" style="text-align:left"></div>{instructions}</div>'
custom_html += """
</div>
"""

In [114]:
plot = datamapplot.create_interactive_plot(
    np.array(titles_data["tsne"], dtype=np.float32),
    # [str(c) for c in titles_data["soft_clusters"]],
    titles_data["layers_reduced"][1],
    # titles_data["layers"][1],
    # titles_data["layers"][2],
    # titles_data["layers"][3],
    titles_data["layers_reduced"][4],
    hover_text=titles_data["sentences"],
    title="LinkedIn Job Positions",
    sub_title="Collected From Over 1000 Recently Hiring Companies (04.2024)",
    logo="https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/Technion_logo.svg/255px-Technion_logo.svg.png",
    logo_width=90,
    on_click="window.open(`{job_url}`)",
    enable_search=True,
    darkmode=False,
    point_radius_min_pixels=4,
    color_label_text=True,
    noise_color="#EBEBEB",
    extra_point_data=extra_point_data,
    marker_size_array=marker_size_array,
    hover_text_html_template=hover_text_template,
    custom_html=custom_html,
    custom_css=custom_css,
    # use_medoids=True, # veeeeery slow
    # search_field="main_layer",
    # font_family="Cinzel",
)
plot

In [115]:
with open("demo_titles.html", "w", encoding='utf-8') as f:
    f.write(str(plot))

## Skills Plot

In [91]:
with open("skills_data.pickle", "rb") as f:
    skills_data = pickle.load(f)
skills_data.keys()

dict_keys(['layers', 'tsne', 'soft_clusters', 'soft_probs', 'sentences', 'sentences_cleaned', 'company_name', 'url'])

In [111]:
skills_data["layers"] = [[title.title() for title in layer] for layer in skills_data["layers"]]
skills_data["layers_reduced"] = [layer.copy() for layer in skills_data["layers"]]
for i in range(len(skills_data["layers_reduced"]) - 2):
    skills_data["layers_reduced"][i] = [skill.title() if skills_data["soft_probs"][j] >= 0.0055 else "Unlabelled"
                                            for j, skill in enumerate(skills_data["layers_reduced"][i])]

sentences_counter_skills = Counter(skills_data["sentences"])
marker_size_array_skills = [sentences_counter_skills[key] for key in skills_data["sentences"]]

extra_point_data_skills = pd.DataFrame({"skills": skills_data["sentences"],
                                 "functions": skills_data["layers"][5],
                                 "fields": skills_data["layers"][0],
                                 "main_field": skills_data["layers"][4],
                                 "cluster": skills_data["soft_clusters"],
                                 "company_name": skills_data["company_name"],
                                 "job_url": skills_data["url"]})

hover_text_template_skills = """
<div>
    <p>SKILLS: {skills}</p>
    <p>JOB FUNCTIONS: {functions}</p>
    <p>FIELDS: {fields}</p>
    <p>MAIN FIELD: {main_field}</p>
    <p>COMPANY: {company_name}</p>
    <p>CLUSTER NUMBER: {cluster}</p>
</div>
"""

custom_css_skills="""
.row {
    display : flex;
    align-items : left;
}
#tutorial {
    position: fixed;
    bottom: 0;
    left: 0;
    margin: 16px;
    padding: 12px;
    border-radius: 16px;
    z-index: 2;
    background: #ffffffcc;
    font-family: Cinzel;
    font-size: 12pt;
    box-shadow: 2px 3px 10px #aaaaaa44;
}
#title-container {
    max-width: 75%;
}
"""

instructions_skills = """Scroll to zoom in/out. Hover on points to see additional info. Click on points to open jobs pages. Use the search box on the top-left to search skills.\nThe light-gray points are considered unclustered, but may still be simillar to the nearby points. Size of points represents the number of same skills names."""

custom_html_skills = """
<div id="tutorial">
"""
custom_html_skills += f'    <div class="row"><div id="istructions" class="box" style="text-align:left"></div>{instructions_skills}</div>'
custom_html_skills += """
</div>
"""

In [113]:
skills_plot = datamapplot.create_interactive_plot(
    np.array(skills_data["tsne"], dtype=np.float32),
    # [str(c) for c in skills_data["soft_clusters"]],
    skills_data["layers_reduced"][2],
    # skills_data["layers"][1],
    # skills_data["layers"][2],
    # skills_data["layers"][3],
    skills_data["layers_reduced"][4],
    hover_text=skills_data["sentences"],
    title="LinkedIn Job Required Skills",
    sub_title="Collected From Over 1000 Recently Hiring Companies (04.2024)",
    logo="https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/Technion_logo.svg/255px-Technion_logo.svg.png",
    logo_width=90,
    on_click="window.open(`{job_url}`)",
    enable_search=True,
    darkmode=False,
    point_radius_min_pixels=4,
    color_label_text=True,
    noise_color="#EBEBEB",
    extra_point_data=extra_point_data_skills,
    marker_size_array=marker_size_array_skills,
    hover_text_html_template=hover_text_template_skills,
    custom_html=custom_html_skills,
    custom_css=custom_css_skills,
    # use_medoids=True,
    # search_field="main_layer",
    # font_family="Cinzel",
)
skills_plot

In [116]:
with open("demo_skills.html", "w", encoding='utf-8') as f:
    f.write(str(skills_plot))