In [1]:
%load_ext autoreload
%autoreload 2

In [33]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from langdetect import detect

from recommender.core import network_builder

In [36]:
companies_df = pd.read_csv(r'/home/huynhhao/Desktop/job_recommender/data/companies.csv')
jobs_df = pd.read_csv(r'/home/huynhhao/Desktop/job_recommender/data/jobs.csv')
cv = pd.read_csv(r'/home/huynhhao/Desktop/job_recommender/data/cvdata/ResumeDataSet.csv', encoding = 'utf-8')

companies_df.fillna('', inplace = True)
jobs_df.fillna('', inplace = True)
cv.fillna('', inplace = True)


In [41]:
# filter out companies and jobs that not in English
companies_df['lang'] = [detect(str(row['overview'])) == 'en' if len(row['overview']) > 5 else False for _, row in companies_df.iterrows()]

jobs_df['lang'] = [detect(str(row['description'])) == 'en' if len(row['description']) > 5 else False for _, row in jobs_df.iterrows() ]
companies_df = companies_df.loc[companies_df['lang'], :]
jobs_df = jobs_df.loc[jobs_df['lang'], :]

In [39]:
# transform companies_df and jobs_df into dicts
employers_data = {}
jobs_data = {}
for i, row in companies_df.iterrows():
    employer_data = {'company_name': row['company_name'],
                   'average_rating': row['average_rating'],
                   'num_review': row['num_review'],
                   'city': row['city'],
                   'type': row['type'],
                   'num_employee': row['num_employee'],
                   'country': row['country'],
                   'working_day': row['working_day'],
                   'OT': row['OT'],
                   'overview': row['overview'],
                   'expertise': row['expertise'],
                   'benifit': row['benifit'],
                   'logo_link': row['logo_link']}
    
    employers_data[row['company_id']] = employer_data
    
for i, row in jobs_df.iterrows():
    job_data = {'company_id': row['company_id'],
               'job_name': row['job_name'],
               'taglist': row['taglist'],
               'location': row['location'], 
               'three_reasons': row['three_reasons'],
               'description': row['description']}
    jobs_data[row['job_id']] = job_data
    

In [68]:
G = nx.MultiDiGraph(name = 'Jobs graph')
# first add all employer nodes to the network and its data
for employer_id, employer_data in employers_data.items():
    G.add_node(employer_id, node_type = 'employer', **employer_data)
    
# add all job nodes and the bidirectional edges from job node to employer node
for job_id, job_data in jobs_data.items():
    G.add_node(job_id, node_type = 'job', **job_data)
    # add two edges between job and its employers\
    G.add_edge(job_id, job_data['company_id'], weight = 1)
    G.add_edge(job_data['company_id'], job_id, weight = 1)

In [74]:
G['kms-technology']

AdjacencyView({'kms-technology:jrsr_qa_engineer_kms_labs_bonus': {0: {'weight': 1}}, 'kms-technology:engineering_manager_bonus': {0: {'weight': 1}}, 'kms-technology:fullstack_mobile_mobilenodejs_kobiton': {0: {'weight': 1}}, 'kms-technology:jrsrprincipal_java_developer_bonus': {0: {'weight': 1}}, 'kms-technology:product_manager_kms_labs_bonus': {0: {'weight': 1}}, 'kms-technology:sr_it_business_analyst_english_bonus': {0: {'weight': 1}}, 'kms-technology:fullstack_dev_reactjsnodejs_kobiton': {0: {'weight': 1}}, 'kms-technology:senior_ruby_on_rails_engineer_bonus': {0: {'weight': 1}}, 'kms-technology:senior_data_engineer_bonus': {0: {'weight': 1}}, 'kms-technology:srjr_fullstack_nodejsreactjs_bonus': {0: {'weight': 1}}, 'kms-technology:juniorsenior_test_engineer_bonus': {0: {'weight': 1}}, 'kms-technology:jrsrprincipal_net_developer_bonus': {0: {'weight': 1}}})

In [58]:
G.nodes['kms-technology']

{'company_name': 'KMS Technology',
 'average_rating': 0,
 'num_review': '197 Reviews',
 'city': 'Ho Chi Minh, Da Nang, Ha Noi',
 'type': 'Outsourcing',
 'num_employee': '1000+',
 'country': 'United States',
 'working_day': 'Monday - Friday',
 'OT': 'No OT',
 'overview': 'Established in 2009, KMS Technology is a U.S.-based engineering and services company with development centers in Vietnam. KMS Technology is trusted by international clients for the superior quality of products and expertise of Vietnamese engineers.\n\nKMS builds and successfully launches its own software companies through its internal startup incubator,\xa0KMS Labs. Most notable companies include\xa0QASymphony,\xa0Kobiton,\xa0Katalon, and\xa0Grove. Under the brand,\xa0KMS Solutions, the company serves the Asia Pacific region, offering technology solution consulting and bringing the most advanced and latest technologies to the Asian market.\xa0\n\nKMS is committed to making a long-lasting social impact by partnering wit

In [9]:
nb = network_builder.NetworkBuilder(companies_df, jobs_df, cv )

In [10]:
nb.G['kms-technology']

AdjacencyView({'kms-technology:jrsr_qa_engineer_kms_labs_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:engineering_manager_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:fullstack_mobile_mobilenodejs_kobiton': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:jrsrprincipal_java_developer_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:product_manager_kms_labs_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:sr_it_business_analyst_english_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:fullstack_dev_reactjsnodejs_kobiton': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:senior_ruby_on_rails_engineer_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:senior_data_engineer_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:srjr_fullstack_nodejsreactjs_bonus': {0: {'weight': 1, 'edge_type': 'posted'}}, 'kms-technology:juniorsenior_test_engineer

In [108]:
len(nb.G.edges)

2822

In [109]:
nb.G.graph

{'name': 'Jobs graph',
 'num_employers': 2041,
 'num_jobs': 1411,
 'num_candidates': 962}

In [12]:
nb.G.nodes['kms-technology']

{'node_type': 'employer',
 'company_name': 'KMS Technology',
 'average_rating': 4.0,
 'num_review': '197 Reviews',
 'city': 'Ho Chi Minh, Da Nang, Ha Noi',
 'type': 'Outsourcing',
 'num_employee': '1000+',
 'country': 'United States',
 'working_day': 'Monday - Friday',
 'OT': 'No OT',
 'overview': 'Established in 2009, KMS Technology is a U.S.-based engineering and services company with development centers in Vietnam. KMS Technology is trusted by international clients for the superior quality of products and expertise of Vietnamese engineers.\n\nKMS builds and successfully launches its own software companies through its internal startup incubator,\xa0KMS Labs. Most notable companies include\xa0QASymphony,\xa0Kobiton,\xa0Katalon, and\xa0Grove. Under the brand,\xa0KMS Solutions, the company serves the Asia Pacific region, offering technology solution consulting and bringing the most advanced and latest technologies to the Asian market.\xa0\n\nKMS is committed to making a long-lasting soc

In [14]:
len(nb.G.nodes)

4414

In [15]:
nb.G.graph

{'name': 'Jobs graph',
 'num_employers': 2041,
 'num_jobs': 1411,
 'num_candidates': 962,
 'num_candidate_match_job': 0,
 'num_similar_candidates': 0,
 'num_similar_jobs': 0,
 'num_similar_employers': 0,
 'num_apply': 0,
 'num_favorite': 0}

In [27]:
all_documents = []
texts = ''
for node_name, node_data in nb.G.nodes.items():
    if node_data['node_type'] == 'employer':
        all_documents.append(' '.join([str(node_data['overview']), str(node_data['benifit']) ]))
    elif node_data['node_type'] == 'job':
        all_documents.append(' '.join([str(node_data['three_reasons']), str(node_data['description']) ]))
    elif node_data['node_type'] == 'candidate':
        all_documents.append(node_data['resume'])
    else:
        continue
        


In [28]:
len(all_documents)

4414

In [44]:
jobs_df

Unnamed: 0,job_id,company_id,job_name,taglist,location,three_reasons,description
0,kms-technology:jrsr_qa_engineer_kms_labs_bonus,kms-technology,"(Jr/Sr) QA Engineer, KMS Labs - BONUS",QA QC English Tester,"290/26 Nam Ky Khoi Nghia, ward 8, District 3, ...",Inspired Leadership Team and Talented Colleagu...,KMS Labs is the startup incubation arm of KMS ...
1,kms-technology:engineering_manager_bonus,kms-technology,Engineering Manager - BONUS,Project Manager Agile English,"02 Tan Vien, ward 2, Tan Binh, Ho Chi Minh",Inspired Leadership Team and Talented Colleagu...,Full 13th Month Salary. ***Apply and Join in S...
2,kms-technology:fullstack_mobile_mobilenodejs_k...,kms-technology,"Fullstack Mobile (Mobile,NodeJs) Kobiton",NodeJS React Native Fresher Accepted,"02 Tan Vien, ward 2, Tan Binh, Ho Chi Minh",Inspired Leadership Team and Talented Colleagu...,is the mobile experience platform trusted by l...
3,kms-technology:jrsrprincipal_java_developer_bonus,kms-technology,(Jr/Sr/Principal) Java Developer- BONUS,Java JavaScript Spring,"174 Thai Ha, Dong Da, Ha Noi",Inspired Leadership Team and Talented Colleagu...,Full 13th Month Salary ***Apply and Join in Se...
4,kms-technology:product_manager_kms_labs_bonus,kms-technology,"Product Manager, KMS Labs - BONUS",Product Manager English Manager,"290/26 Nam Ky Khoi Nghia, ward 8, District 3, ...",Inspired Leadership Team and Talented Colleagu...,KMS Labs\n is the startup incubation arm of KM...
...,...,...,...,...,...,...,...
1407,go-game-vietnam-limited-liability-company:unit...,go-game-vietnam-limited-liability-company,Unity Games Developer,Unity C# Games,"Toà nhà Bách Việt , 65 Trần Quốc Hoàn, Phường ...","Chỉ Có Game, Game & Game\nTrẻ, Nhiệt Huyết và ...",Lập trình phát triển Game sử dụng Unity3D: And...
1408,dai-viet-group:software_developer_net_c_aspnet,dai-viet-group,"Software Developer (.NET, C#, ASP.NET)",.NET C# ASP.NET Fresher Accepted,"1295 1295B, Nguyễn Thị Định, Phường Cát Lái, D...",17-21 months Salary in 1 year\nHigh Salary\nat...,Tham gia xây dựng kiến trúc ứng dụng phần mềm....
1409,misa-software:net_dev_net_core_c_signon_bonus,misa-software,".NET Dev (.NET Core, C#) $ Sign-on Bonus",.NET C# ASP.NET,"40 Lam Son Street, Ward 2, Tan Binh, Ho Chi Minh",Comfortable Working Conditions\nInteresting & ...,We are looking for 30 Intermediate/Senior .NET...
1410,thuocsi-vn:remote_fullstack_dev_javascript,thuocsi-vn,[REMOTE] Fullstack Dev (JavaScript),JavaScript NodeJS ReactJS,"Cebu City, Philippines, Other, Ho Chi Minh",15 paid annual leaves\n100% work from home\nFl...,Participates in all phases of the software dev...
