In [1]:
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
import time
import itertools
import copy
import random
warnings.filterwarnings("ignore")
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth',100)

In [2]:
random.seed(2023)
np.random.seed(2023)

In [3]:
dataset_name = "Dataset/job"

# 1.Read Data

In [44]:
rating_df = pd.read_csv(dataset_name + '/job-recommendation/apps.tsv',delimiter='\t',parse_dates=['ApplicationDate'],infer_datetime_format=True)
rating_df = rating_df.rename(columns={"UserID":'userid','JobID':'itemid'})
rating_df.drop_duplicates(subset=['userid','itemid'], inplace=True)
rating_df = rating_df.reset_index(drop=True)
rating_df 
(rating_df.isna().sum()/rating_df.shape[0]).sort_values(ascending=False)
rating_df["userid"].max()
rating_df["itemid"].max()

Unnamed: 0,userid,WindowID,Split,ApplicationDate,itemid
0,47,1,Train,2012-04-04 15:56:23.537,169528
1,47,1,Train,2012-04-06 01:03:00.003,284009
2,47,1,Train,2012-04-05 02:40:27.753,2121
3,47,1,Train,2012-04-05 02:37:02.673,848187
4,47,1,Train,2012-04-05 22:44:06.653,733748
...,...,...,...,...,...
1603106,1472089,7,Train,2012-06-23 16:06:03.087,573732
1603107,1472089,7,Train,2012-06-25 23:20:35.603,39401
1603108,1472089,7,Train,2012-04-30 14:01:42.520,175198
1603109,1472089,7,Train,2012-04-30 13:52:45.823,1073263


userid             0.0
WindowID           0.0
Split              0.0
ApplicationDate    0.0
itemid             0.0
dtype: float64

1472095

1116313

In [10]:
rating_df = rating_df.sort_values(by=['userid','ApplicationDate']).reset_index(drop=True)
rating_df.drop(['WindowID','Split','ApplicationDate'],axis=1,inplace=True)
rating_df

Unnamed: 0,userid,itemid
0,7,309823
1,7,703889
2,9,809208
3,9,136489
4,9,617374
...,...,...
1603106,1472089,1060730
1603107,1472089,1062444
1603108,1472090,209535
1603109,1472090,254881


In [11]:
user_df = pd.read_csv(dataset_name + '/job dataset/job-recommendation/users.tsv',delimiter='\t').rename(columns={"UserID":'userid'})
user_df.drop_duplicates(subset=['userid'], inplace=True)
user_df = user_df.sort_values(by=['userid']).reset_index(drop=True)
user_df
(user_df.isna().sum()/user_df.shape[0]).sort_values(ascending=False)

Unnamed: 0,userid,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany
0,7,3,Train,Roanoke,VA,US,24012,High School,Not Applicable,1995-01-01 00:00:00,2,13.0,Yes,No,0
1,9,4,Train,Houston,TX,US,77095,High School,Not Applicable,,3,3.0,Yes,No,0
2,13,6,Test,Philadelphia,PA,US,19143,Bachelor's,Psychological & Social Sciences,2011-12-01 00:00:00,6,5.0,Yes,No,0
3,14,4,Train,San Antonio,TX,US,78247,,General,,2,10.0,No,No,0
4,16,5,Train,Atlanta,GA,US,30363,Bachelor's,Security,,9,9.0,Yes,No,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389703,1472085,5,Train,McKinney,TX,US,75070,High School,Not Applicable,2004-05-01 00:00:00,4,10.0,Yes,No,0
389704,1472087,6,Train,New Rochelle,NY,US,10801,Master's,,2012-06-01 00:00:00,7,7.0,Yes,No,0
389705,1472089,7,Train,Indianapolis,IN,US,46204,Bachelor's,Interior Design,2007-01-01 00:00:00,3,6.0,,No,0
389706,1472090,4,Train,Indianapolis,IN,US,46260,,Construction Management,,6,10.0,Yes,No,0


GraduationDate          0.308516
Major                   0.249520
CurrentlyEmployed       0.107968
TotalYearsExperience    0.036386
ZipCode                 0.004449
State                   0.001257
userid                  0.000000
WindowID                0.000000
Split                   0.000000
City                    0.000000
Country                 0.000000
DegreeType              0.000000
WorkHistoryCount        0.000000
ManagedOthers           0.000000
ManagedHowMany          0.000000
dtype: float64

In [12]:
jobs_data = []
with open(dataset_name + '/job-recommendation/jobs.tsv','r') as jobs_file:
    for row in tqdm(jobs_file):
        cur_data = row.strip().split('\t')
        jobs_data.append(cur_data)

1092097it [00:20, 52100.64it/s]


In [13]:
item_df = pd.DataFrame(jobs_data[1:],columns=jobs_data[0]).astype({"JobID":"int64"})
item_df = item_df.rename(columns={"JobID":'itemid','Title':'title'})
item_df.drop_duplicates(subset=['title'], inplace=True)
item_df = item_df.sort_values(by=['itemid']).reset_index(drop=True)
item_df
(item_df.isna().sum()/item_df.shape[0]).sort_values(ascending=False)

Unnamed: 0,itemid,WindowID,title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate
0,1,1,Security Engineer/Technical Lead,<p>Security Clearance Required:&nbsp; Top Secret </p>\r<p>Job Number: TMR-447</p>\r<p>Location o...,<p>SKILL SET</p>\r<p>&nbsp;</p>\r<p>Network Security tools:</p>\r<p>&nbsp;</p>\r<p>Webdefend Web...,Washington,DC,US,20531,2012-03-07 13:17:01.643,2012-04-06 23:59:59
1,2,3,IT Analyst SME,<span>\r<p>Security Clearance Required: TS/SCI Clearance </p>\r<p></p>\r<p>Job Number: TMR-483...,"Develop and implement measures, controls, inputs and outputs to the IT Governance structure and ...",Washington,DC,US,,2012-03-29 21:11:23.42,2012-04-28 23:59:59
2,4,1,SAP Business Analyst / WM,"<strong>NO Corp. to Corp resumes&nbsp;are being considered for this &ldquo;Direct Hire"" permanen...",<p><b>WHAT YOU NEED: </b></p>\r<p>Four year college degree</p>\r<p>Minimum 5 to 8+ years of SAP ...,Charlotte,NC,US,28217,2012-03-21 02:03:44.137,2012-04-20 23:59:59
3,5,3,Business Analyst / Business Process Owner,<p><b><span>Business Process Owner –Hiring immediately – Direct Hire!</span></b> </p>\r<p><span>...,<p><b><span>What you need:</span></b> </p>\r<ul>\r <li><span>Strong knowledge of our Client's...,Columbia,SC,US,29210,2012-04-16 11:16:45.127,2012-05-15 23:59:59
4,7,1,P/T HUMAN RESOURCES ASSISTANT,"<b> <b> P/T HUMAN RESOURCES ASSISTANT</b> </b> —— 1-2 years experience in HR & Benefits, st...",Please refer to the Job Description to view the requirements for this job,Winter Park,FL,US,32792,2012-03-02 16:36:55.447,2012-04-01 23:59:59
...,...,...,...,...,...,...,...,...,...,...,...
438602,1116303,4,Employee Benefits & Insurance Claims Administrator,<strong><span>Employee Benefits and Insurance Claims Loss Management Administrator</span></stron...,<span>The successful candidate should have references and job history within current or related ...,Williston,VT,US,05495,2012-05-08 14:28:38.407,2012-06-07 23:59:59
438603,1116304,7,"Career Experience Specialist - Career Services, #001310",<p>&nbsp;</p>\r<p><b><span>Macomb Community College</span></b></p>\r<p><b><span>14500 East 12 Mi...,see description,Warren,MI,US,48088,2012-05-24 12:34:38.667,2012-06-23 23:59:00
438604,1116305,5,Office Manager/Sales Assistant – Financial Services Industry,"<p align=""left""><span>Office Manager/Sales Assistant &ndash; a financial services firm with loca...",<p><b><i><span>Position Requirements</span></i></b></p>\r<ul><br>\r <li><span>Previous admini...,Fresno,CA,US,93720,2012-05-18 11:21:50.617,2012-06-17 23:59:00
438605,1116312,4,GLOBAL SUPPLY CHAIN / LOGISTICS MANAGER,"<div>Our client, a global chemical company located in Central New Jersey is eager to engage th...","- BS DEGREE IN ANY FIELD, REQUIRED. [SUPPLY CHAIN LOGISTICS DEGREE A PLUS] an advanced degree w...",New Brunswick,NJ,US,,2012-05-02 17:05:38.727,2012-06-01 23:59:59


itemid          0.0
WindowID        0.0
title           0.0
Description     0.0
Requirements    0.0
City            0.0
State           0.0
Country         0.0
Zip5            0.0
StartDate       0.0
EndDate         0.0
dtype: float64

In [15]:
item_pop = rating_df["itemid"].value_counts().rename_axis("itemid").reset_index(name="pop")
item_select = item_pop[:5000]
item_select

Unnamed: 0,itemid,pop
0,17361,208
1,900797,203
2,67239,189
3,601021,188
4,601126,186
...,...,...
4995,634766,35
4996,253392,35
4997,100420,35
4998,558856,35


In [17]:
item_df = item_df[item_df['itemid'].isin(item_select['itemid'].tolist())].reset_index(drop=True)
item_df

Unnamed: 0,itemid,WindowID,title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate
0,15,1,Administrative Assistant,This Administrative Assistant position is responsible for performing a variety of clerical and a...,Please refer to the Job Description to view the requirements for this job,Los Angeles,CA,US,90011,2012-03-09 01:12:16.81,2012-04-08 23:59:59
1,141,4,SHIFT MANAGER - FRONT END SUPERVISOR,<b> <b>SHIFT MNGR. - FRONT END SUPERVISOR</b> </b> —— Busy retail - Must have experience wi...,Please refer to the Job Description to view the requirements for this job,Orlando,FL,US,,2012-04-24 14:36:48.43,2012-05-23 23:59:59
2,169,4,MEDICAL FACILITY JOB FAIR,"***MEDICAL FACILITY JOB FAIR*** - FT, PT, VP SHIFTS AVAIL - POSITIONS INCLUDE ADMIN/OFFICE, MED/...",Please refer to the Job Description to view the requirements for this job,Orlando,FL,US,32806,2012-05-09 12:40:41.16,2012-06-08 23:59:59
3,206,6,Call Transfer Agents,"Orange Lake RESORTS Holiday Inn Club Vacations Join a workplace where Everyone matters, every...",Please refer to the Job Description to view the requirements for this job,Orlando,FL,US,,2012-05-23 09:00:48.703,2012-06-22 23:59:00
4,216,6,Manufacturing Job,Experienced forklift operator to load/unload product to/from flatbed trucks and containers using...,Please refer to the Job Description to view the requirements for this job,Sanford,FL,US,32771,2012-05-30 01:22:01.277,2012-06-29 23:59:00
...,...,...,...,...,...,...,...,...,...,...,...
2122,1115118,5,Medical Assistant & Cardiovascular Tech Opening,"<span style=""text-decoration: underline""><strong>Medical Assistant - Plastic/Reconstructive Surg...",<ul>\r <li>Minimum of six months recent experience as a Medical Assistant in a private practi...,Phoenix,AZ,US,85016,2012-05-18 13:35:04.417,2012-06-17 23:59:00
2123,1115606,5,Warehouse - Forklift - Order Picking,<p>Express&nbsp;Indy North&nbsp;needs&nbsp;Warehouse Associates&nbsp;to help with order selectio...,"<p><strong><span style=""text-decoration: underline"">REQUIREMENTS:</span></strong></p>\r<p>* Accu...",Pendleton,IN,US,,2012-05-02 11:31:15.687,2012-06-01 23:59:59
2124,1115625,5,TEACHER’S AIDE,<p><span>The Joseph J. Peters Institute is seeking a qualified candidate to provide mental healt...,"<p><b><span>Skills:</span></b></p>\r<p><span>Working knowledge of computers, computer programs, ...",Philadelphia,PA,US,19107,2012-05-25 13:05:41.84,2012-06-24 23:59:00
2125,1115793,7,Entry Level Office Assistant,Staffing Firm in Midtown is looking for Entry Level Office Assistant.<br />\r<br />\rHours:&nbsp...,H.S. Diploma a must; B.A. preferred.<br />\rMust know Word and Excel.<br />\rType 45 wpm. <br />...,New York,NY,US,10017,2012-06-04 19:20:45.887,2012-07-03 23:59:00


In [18]:
itemid_set = list(set(item_df.itemid.to_list()))

# 2.Filter

In [19]:
item_title_exist_idset = item_df.itemid.tolist()
rating_df_fliter = rating_df[rating_df["itemid"].isin(item_title_exist_idset)].reset_index(drop=True)
rating_df_fliter

Unnamed: 0,userid,itemid
0,9,809208
1,83,1059412
2,83,12561
3,143,816483
4,245,900240
...,...,...
118798,1471864,1015485
118799,1471864,324577
118800,1472066,498751
118801,1472066,639512


In [20]:
user_exist_idset = user_df.userid.tolist()
rating_df_fliter = rating_df_fliter[rating_df_fliter["userid"].isin(user_exist_idset)].reset_index(drop=True)
rating_df_fliter

Unnamed: 0,userid,itemid
0,9,809208
1,83,1059412
2,83,12561
3,143,816483
4,245,900240
...,...,...
118798,1471864,1015485
118799,1471864,324577
118800,1472066,498751
118801,1472066,639512


In [21]:
sequence_df = rating_df_fliter.groupby(['userid']).agg(
    pos_seq=("itemid", list)
).reset_index()
sequence_df["pos_seq_length"] = sequence_df['pos_seq'].apply(lambda x: len(x))
sequence_df

Unnamed: 0,userid,pos_seq,pos_seq_length
0,9,[809208],1
1,83,"[1059412, 12561]",2
2,143,[816483],1
3,245,"[900240, 1057696]",2
4,286,[220490],1
...,...,...,...
48887,1471772,[458151],1
48888,1471788,[771733],1
48889,1471793,"[251060, 1030429, 263755, 748136, 809502, 168302, 468405]",7
48890,1471864,"[709977, 751642, 664803, 312771, 1015485, 324577]",6


In [22]:
sequence_df_fliter = sequence_df[sequence_df["pos_seq_length"]>=11].reset_index(drop=True)
sequence_df_fliter

Unnamed: 0,userid,pos_seq,pos_seq_length
0,554,"[957, 802921, 855139, 600058, 627377, 491965, 251966, 25820, 283948, 196603, 640492]",11
1,769,"[900568, 608556, 512686, 195570, 601021, 272504, 1061408, 551465, 1054653, 584241, 1032353]",11
2,1697,"[300767, 1054653, 880231, 496169, 13313, 959911, 212415, 601021, 512686, 272504, 812531, 314425]",12
3,4743,"[731241, 810885, 246499, 846109, 14863, 846134, 803853, 246597, 846116, 675598, 246498, 149195]",12
4,5547,"[1078745, 64014, 622946, 811760, 887338, 1062685, 887337, 980367, 657326, 300614, 151832, 465067...",13
...,...,...,...
1345,1464405,"[1106523, 193031, 64014, 1078745, 980367, 465067, 811760, 386184, 151832, 567966, 74110]",11
1346,1465830,"[279574, 964825, 547360, 614026, 1069131, 701195, 458710, 859712, 479140, 254939, 34984, 542850,...",13
1347,1467940,"[813176, 916657, 1052635, 796972, 639688, 910646, 334412, 684720, 582802, 300427, 214329]",11
1348,1468799,"[932584, 512686, 811923, 480381, 812528, 900545, 1053248, 214948, 755151, 1054653, 811927, 26406...",26


In [23]:
def sample_neg_candidate(row, n_candidate):
    pos_seq = row.pos_seq
    neg_seq = []
    for i in range(n_candidate):
        neg_sample = random.choice(itemid_set)
        while neg_sample in pos_seq:
            neg_sample = random.choice(itemid_set)
        neg_seq.append(neg_sample)
    return neg_seq

In [24]:
sequence_df_fliter['neg_candidate'] = sequence_df_fliter.apply(lambda x:sample_neg_candidate(x,n_candidate=10),axis=1)
sequence_df_fliter

Unnamed: 0,userid,pos_seq,pos_seq_length,neg_candidate
0,554,"[957, 802921, 855139, 600058, 627377, 491965, 251966, 25820, 283948, 196603, 640492]",11,"[300809, 375790, 563095, 185112, 865358, 681368, 616139, 627227, 750071, 100839]"
1,769,"[900568, 608556, 512686, 195570, 601021, 272504, 1061408, 551465, 1054653, 584241, 1032353]",11,"[331173, 739798, 188681, 1059771, 1054196, 811958, 820983, 550, 568048, 485531]"
2,1697,"[300767, 1054653, 880231, 496169, 13313, 959911, 212415, 601021, 512686, 272504, 812531, 314425]",12,"[466280, 506865, 1045659, 286408, 811914, 1041406, 482488, 220815, 450, 91846]"
3,4743,"[731241, 810885, 246499, 846109, 14863, 846134, 803853, 246597, 846116, 675598, 246498, 149195]",12,"[331212, 755177, 811998, 657988, 772055, 563045, 900456, 469814, 625180, 707528]"
4,5547,"[1078745, 64014, 622946, 811760, 887338, 1062685, 887337, 980367, 657326, 300614, 151832, 465067...",13,"[282962, 561197, 1115793, 246555, 562821, 769335, 92297, 364925, 910545, 515830]"
...,...,...,...,...
1345,1464405,"[1106523, 193031, 64014, 1078745, 980367, 465067, 811760, 386184, 151832, 567966, 74110]",11,"[637226, 449358, 247250, 947622, 920526, 327307, 182108, 78613, 173028, 583185]"
1346,1465830,"[279574, 964825, 547360, 614026, 1069131, 701195, 458710, 859712, 479140, 254939, 34984, 542850,...",13,"[285686, 920025, 15520, 468908, 512239, 1064708, 354231, 209212, 164165, 189599]"
1347,1467940,"[813176, 916657, 1052635, 796972, 639688, 910646, 334412, 684720, 582802, 300427, 214329]",11,"[212538, 663174, 133432, 198970, 553774, 81159, 511957, 468937, 581974, 802]"
1348,1468799,"[932584, 512686, 811923, 480381, 812528, 900545, 1053248, 214948, 755151, 1054653, 811927, 26406...",26,"[315352, 807810, 300712, 578721, 331504, 564150, 515212, 106338, 1051924, 519706]"


# 3.Gnerate Test Data for Top-k Ranking

In [25]:
def get_sub_history_and_candidate(row, n_candidate=10, n_history=10):
    pos_seq = row.pos_seq
    history = pos_seq[-1-n_history:-1]
    target = pos_seq[-1]
    neg_candidate = row.neg_candidate

    full_candidate = [target] + neg_candidate[:n_candidate-1]
    random.shuffle(full_candidate)

    target_index = full_candidate.index(target)

    return pd.Series({"userid":row.userid,'history_list':history,'pos_target':target_index,'full_candidate':full_candidate})

In [26]:
sequence_df_fliter.apply(get_sub_history_and_candidate,axis=1)

Unnamed: 0,userid,history_list,pos_target,full_candidate
0,554,"[957, 802921, 855139, 600058, 627377, 491965, 251966, 25820, 283948, 196603]",0,"[640492, 300809, 563095, 750071, 627227, 185112, 865358, 616139, 375790, 681368]"
1,769,"[900568, 608556, 512686, 195570, 601021, 272504, 1061408, 551465, 1054653, 584241]",9,"[568048, 1059771, 1054196, 550, 331173, 188681, 820983, 739798, 811958, 1032353]"
2,1697,"[1054653, 880231, 496169, 13313, 959911, 212415, 601021, 512686, 272504, 812531]",9,"[450, 1045659, 1041406, 811914, 466280, 286408, 506865, 482488, 220815, 314425]"
3,4743,"[810885, 246499, 846109, 14863, 846134, 803853, 246597, 846116, 675598, 246498]",8,"[331212, 563045, 755177, 772055, 625180, 469814, 900456, 657988, 149195, 811998]"
4,5547,"[622946, 811760, 887338, 1062685, 887337, 980367, 657326, 300614, 151832, 465067]",2,"[910545, 769335, 881665, 92297, 1115793, 561197, 562821, 282962, 364925, 246555]"
...,...,...,...,...
1345,1464405,"[1106523, 193031, 64014, 1078745, 980367, 465067, 811760, 386184, 151832, 567966]",2,"[327307, 637226, 74110, 947622, 247250, 449358, 182108, 920526, 78613, 173028]"
1346,1465830,"[547360, 614026, 1069131, 701195, 458710, 859712, 479140, 254939, 34984, 542850]",8,"[920025, 354231, 15520, 512239, 209212, 1064708, 164165, 468908, 267978, 285686]"
1347,1467940,"[813176, 916657, 1052635, 796972, 639688, 910646, 334412, 684720, 582802, 300427]",6,"[212538, 553774, 663174, 133432, 198970, 511957, 214329, 581974, 468937, 81159]"
1348,1468799,"[919389, 338167, 282316, 212089, 396326, 812530, 889499, 811921, 212093, 782692]",5,"[515212, 106338, 564150, 807810, 331504, 710187, 1051924, 315352, 300712, 578721]"


In [27]:
def get_topk_final_data(df):
    for n_candidate in [5,10]:
        for n_history in [5,10]:
            print(f"n_candidate:{n_candidate} ; n_history{n_history}")
            LLM_top1_data = copy.deepcopy(df)
            LLM_top1_data = LLM_top1_data.apply(get_sub_history_and_candidate, n_candidate=n_candidate, n_history=n_history, axis=1, result_type="expand")
            LLM_top1_data = LLM_top1_data.sample(frac=1.0, random_state=2023).reset_index(drop=True)
            LLM_top1_data.to_csv(f"./processed/topk_candidate@{n_candidate}_history@{n_history}.csv", sep="\t", index=False)

In [28]:
get_topk_final_data(sequence_df_fliter)

n_candidate:5 ; n_history5
n_candidate:5 ; n_history10
n_candidate:10 ; n_history5
n_candidate:10 ; n_history10


# 4.Generate Datamaps

In [37]:
removePattern = r'(<(.*?)>)|(&\w+)'
addSpacePattern = r'([;:])|(\\r)|(\\n)'
removeExtraSpaces = r'(\s\s+?)(?=\S)'

item_df['titleCleaned'] = item_df['title'].str.lower()
item_df['titleCleaned'] = item_df['titleCleaned'].str.replace(removePattern, "")
item_df['titleCleaned'] = item_df['titleCleaned'].str.replace(addSpacePattern, " ")
item_df['titleCleaned'] = item_df['titleCleaned'].str.replace(removeExtraSpaces, " ")

In [40]:
item_df['titleCleaned'].to_list()

['administrative assistant',
 'shift manager - front end supervisor',
 'medical facility job fair',
 'call transfer agents',
 'manufacturing job',
 'reception/adminasst',
 'accounts payable clerk - contract',
 'customer service rep',
 'data entry - full time',
 'customer service representative',
 'phone representative',
 'drivers/trainees needed veterans welcome earn $600-$800',
 'mdnow urgent care front desk',
 'scheduler - located in deerfield beach',
 'cust.service teamleader',
 'front desk p/t',
 'adminstrative assistant needed asap',
 'hotel - front desk/night audit person, $10/hr',
 'secretary - for fast paced auto dealership mon-sat',
 'tech support customer service 37 year margate call center,',
 'customer service representative urgently needed',
 'process helper',
 'operations officer. salary 950 per week!',
 'sanitizers',
 'personal assistant - administrative assistants (ft or pt)',
 'customer service - a/p - a/r',
 'ft leasing consultant dp7796890 needed for properties',
 'a

In [41]:
id2item_dict = item_df.set_index("itemid")["titleCleaned"].to_dict()
id2item_dict
item2id_dict = item_df.set_index("titleCleaned")["itemid"].to_dict()
item2id_dict

{15: 'administrative assistant',
 141: 'shift manager - front end supervisor',
 169: 'medical facility job fair',
 206: 'call transfer agents',
 216: 'manufacturing job',
 261: 'reception/adminasst',
 407: 'accounts payable clerk - contract',
 450: 'customer service rep',
 460: 'data entry - full time',
 481: 'customer service representative',
 506: 'phone representative',
 510: 'drivers/trainees needed veterans welcome earn $600-$800',
 550: 'mdnow urgent care front desk',
 564: 'scheduler - located in deerfield beach',
 587: 'cust.service teamleader',
 591: 'front desk p/t',
 613: 'adminstrative assistant needed asap',
 635: 'hotel - front desk/night audit person, $10/hr',
 681: 'secretary - for fast paced auto dealership mon-sat',
 715: 'tech support customer service 37 year margate call center,',
 716: 'customer service representative urgently needed',
 802: 'process helper',
 807: 'operations officer. salary 950 per week!',
 844: 'sanitizers',
 957: 'personal assistant - administr

{'administrative assistant': 15,
 'shift manager - front end supervisor': 141,
 'medical facility job fair': 169,
 'call transfer agents': 206,
 'manufacturing job': 216,
 'reception/adminasst': 261,
 'accounts payable clerk - contract': 407,
 'customer service rep': 450,
 'data entry - full time': 460,
 'customer service representative': 481,
 'phone representative': 506,
 'drivers/trainees needed veterans welcome earn $600-$800': 510,
 'mdnow urgent care front desk': 550,
 'scheduler - located in deerfield beach': 564,
 'cust.service teamleader': 587,
 'front desk p/t': 591,
 'adminstrative assistant needed asap': 613,
 'hotel - front desk/night audit person, $10/hr': 635,
 'secretary - for fast paced auto dealership mon-sat': 681,
 'tech support customer service 37 year margate call center,': 715,
 'customer service representative urgently needed': 716,
 'process helper': 802,
 'operations officer. salary 950 per week!': 807,
 'sanitizers': 844,
 'personal assistant - administrative

In [42]:
datamaps = {}
datamaps["id2item_dict"] = id2item_dict
datamaps["item2id_dict"] = item2id_dict

import json
json_str = json.dumps(datamaps)
with open(f"./processed/title_datamaps.json", 'w') as out:
    out.write(json_str)

206655