In [1]:
import json
import string
import pandas as pd
import re
import difflib
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime as dt
from nltk import ngrams
from langdetect import detect, DetectorFactory
from difflib import get_close_matches as gcm

In [2]:
skills = pd.read_csv('skills_db2/skill.csv')
skills.head()

Unnamed: 0,skill_id,skill
0,1,Applied Science
1,2,Arts and Humanities
2,3,Business
3,4,Computer Science
4,5,Data Science


In [3]:
sk_list = skills['skill'].tolist()
len(sk_list)

3031

In [4]:
with open('resource/job_infos_scrape.json') as f:
    jobs = json.load(f)

len(jobs)

19680

In [5]:
jobs['0']

{'title': 'SALES ASSOCIATE',
 'job_details': '* Under supervision and perform duties to provide technical product assistance/knowledge to customers in order to generate new and repeat sales.\n* Assist customers in the selection of the best products that suited to their needs and desires and explain use, operation, care of the merchandise products and services to customers.\n* Explain the terms of sales, availability of the product and delivery dates and related information and process orders to customers.\n* Handle customers inquiries and complaint.\n* Keep display items clean and displayable and overall cleanliness of the branch.\n* To  handle  any  other  job  as  may be  assigned  by  the  superior.\n\nLocation : Tropicana Aman, Denai Alam, Wisma WCC, Puchong, Bandar Botanik, Sri Petaling, Pandan Indah, Rawang, Sungai Buloh, Kota Damansara, Kajang, Bangi , Balakong, USJ Taipan, Kota Kemuning, Shah Alam.',
 'requirement': 'Permanent| 3 Shift Time |STPM / A Level or Equivalent|Today',

In [6]:
rs = []
i = 0

for j in jobs:
    rs.append(jobs[j]['requirement'].split('|')[2])

set(rs)

{"Bachelor's or Equivalent",
 'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM / DKM Level 4 / DLKM Level 5',
 'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM /…',
 'Diploma / Advanced Diploma / Higher Graduate…',
 'Doctoral (PhD) or Equivalent',
 "Master's or Equivalent",
 'PMR / PT3 or Equivalent',
 'Primary Education or Below',
 'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or Equivalent',
 'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or…',
 'STPM / A Level or Equivalent'}

In [7]:
df_jobs = pd.DataFrame.from_dict(jobs).transpose().reset_index()
df_jobs.head()

Unnamed: 0,index,title,job_details,requirement,company,company_details
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent| 3 Shift Time |STPM / A Level or Equ...,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot..."
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent| Normal Hour |Bachelor's or Equivale...,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ..."
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| Flexible Hours |Diploma / Advanced ...,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot..."
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent| Normal Hour |Bachelor's or Equivale...,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo..."
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| 3 Shift Time |SPM / O Level / SKM L...,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot..."


In [8]:
df_unique = df_jobs.drop_duplicates(subset=['title', 'job_details'])
df_unique.head()

Unnamed: 0,index,title,job_details,requirement,company,company_details
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent| 3 Shift Time |STPM / A Level or Equ...,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot..."
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent| Normal Hour |Bachelor's or Equivale...,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ..."
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| Flexible Hours |Diploma / Advanced ...,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot..."
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent| Normal Hour |Bachelor's or Equivale...,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo..."
5,5,Vacancy For Technician,Engineering assistants ensure the administrati...,Permanent| 3 Shift Time |SPM / O Level / SKM L...,HLK (Chain-Store) Sdn. Bhd\n|\nShah Alam,"Civil engineering, Construction of buildings\n..."


In [9]:
len(df_unique)

8184

In [10]:
def extract_skills(info):
    # Remove ordered list with alphabets: a), b), c),...
    words = re.sub(r'[\s\t\n]+[a-zA-Z\s*]\)+', ' ', info)
    words = re.sub('[\n|,|.|/|\(|\)]', ' ', words).lower().split()
    bigrams = [' '.join(g) for g in ngrams(words, 2)]
    trigrams = [' '.join(g) for g in ngrams(words, 3)]
    results = []
    for skill in sk_list:
        s = skill.lower()
        if '(' in s:
            abb = s[s.find("(")+1:s.find(")")]
            if abb in info:
                continue
            s = re.sub(r"[\(].*?[\)]", "", s)
        s2 = s.split()
        if len(s2) == 1:
            if len(gcm(s, words, cutoff=0.9)) > 0:
                results.append(skill)
        elif len(s2) == 2:
            if len(gcm(s, bigrams, cutoff=0.9)) > 0:
                results.append(skill)
        elif len(s2) == 3:
            if len(gcm(s, trigrams, cutoff=0.85)) > 0:
                results.append(skill)
        else:
            if len(gcm(s, trigrams, cutoff=0.8)) > 0:
                results.append(skill)
    return results

In [13]:
i = 0
job_info_list = []
DetectorFactory.seed = 42
initial = dt.now()
interval = dt.now()
print_every = 200

for _, job in df_unique.iterrows():
    i += 1
    if i % print_every == 0:
        print("{} jobs processed. Time taken: {}".format(i, dt.now() - interval))
        interval = dt.now()
    all_info = job['title'] + ' ' + job['job_details']
    """
    if len(job['job_details']) < 3:
        language = None
    else:
        language = detect(job['job_details'])
    """
    skills = extract_skills(all_info)
    job_info_list.append({
        'title': job['title'],
        'job_details': job['job_details'],
        # 'language': language,
        'skills': skills,
        'no_skills': len(skills)
    })

print("Total time taken: {}".format(dt.now() - initial))

200 jobs processed. Time taken: 0:01:51.458656
400 jobs processed. Time taken: 0:01:51.786914
600 jobs processed. Time taken: 0:01:43.225501
800 jobs processed. Time taken: 0:01:53.575688
1000 jobs processed. Time taken: 0:01:59.559617
1200 jobs processed. Time taken: 0:01:40.713805
1400 jobs processed. Time taken: 0:01:48.591267
1600 jobs processed. Time taken: 0:01:27.331495
1800 jobs processed. Time taken: 0:02:15.966458
2000 jobs processed. Time taken: 0:02:32.579185
2200 jobs processed. Time taken: 0:01:43.901322
2400 jobs processed. Time taken: 0:01:06.386746
2600 jobs processed. Time taken: 0:00:58.488351
2800 jobs processed. Time taken: 0:01:41.767420
3000 jobs processed. Time taken: 0:01:26.272015
3200 jobs processed. Time taken: 0:01:51.712923
3400 jobs processed. Time taken: 0:02:09.110971
3600 jobs processed. Time taken: 0:02:02.694677
3800 jobs processed. Time taken: 0:01:43.351922
4000 jobs processed. Time taken: 0:01:46.989180
4200 jobs processed. Time taken: 0:01:37.920

In [14]:
df = pd.DataFrame.from_dict(job_info_list)
df.head()

Unnamed: 0,title,job_details,skills,no_skills
0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,"[Product, Sales, Service, Selection, Operations]",5
1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,"[Advertising, Adobe Illustrator, Adobe Photoshop]",3
2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...","[Distribution, Operations]",2
3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",[],0
4,Vacancy For Technician,Engineering assistants ensure the administrati...,"[Administration, Engineering]",2


In [15]:
df2 = df_jobs.merge(df, on=['title', 'job_details'], how='left')
df2.head()

Unnamed: 0,index,title,job_details,requirement,company,company_details,skills,no_skills
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent| 3 Shift Time |STPM / A Level or Equ...,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent| Normal Hour |Bachelor's or Equivale...,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| Flexible Hours |Diploma / Advanced ...,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent| Normal Hour |Bachelor's or Equivale...,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| 3 Shift Time |SPM / O Level / SKM L...,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2


In [16]:
df2.isnull().sum()

index              0
title              0
job_details        0
requirement        0
company            0
company_details    0
skills             0
no_skills          0
dtype: int64

In [18]:
df_lan = pd.read_csv('myfuturejobs-insights/v0/myfuturejobs_skills2.csv')
df2['language'] = df_lan['language']
df2.head()

Unnamed: 0,index,title,job_details,requirement,company,company_details,skills,no_skills,language
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent| 3 Shift Time |STPM / A Level or Equ...,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5,en
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent| Normal Hour |Bachelor's or Equivale...,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3,id
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| Flexible Hours |Diploma / Advanced ...,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,en
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent| Normal Hour |Bachelor's or Equivale...,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0,id
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent| 3 Shift Time |SPM / O Level / SKM L...,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,en


## Data Cleaning

In [31]:
df3 = df2.copy()
df3[['job_type', 'education_requirement']] = df2['requirement'].str.split('|', expand=True)[[0, 2]]
cols = df3.columns.tolist()
cols = cols[:3] + cols[-2:] + [cols[-3]] + cols[4:-3]
df3 = df3[cols]
df3.head()

Unnamed: 0,index,title,job_details,job_type,education_requirement,language,company,company_details,skills,no_skills
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent,STPM / A Level or Equivalent,en,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent,Bachelor's or Equivalent,id,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Diploma / Advanced Diploma / Higher Graduate…,en,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent,Bachelor's or Equivalent,id,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,SPM / O Level / SKM Level 1 / SKM Level 2 / SK...,en,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2


In [32]:
df3['education_requirement'].unique().tolist()

['STPM / A Level or Equivalent',
 "Bachelor's or Equivalent",
 'Diploma / Advanced Diploma / Higher Graduate…',
 'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or…',
 'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or Equivalent',
 'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM /…',
 'Primary Education or Below',
 'PMR / PT3 or Equivalent',
 'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM / DKM Level 4 / DLKM Level 5',
 "Master's or Equivalent",
 'Doctoral (PhD) or Equivalent']

In [33]:
df3['education'] = df3['education_requirement'].replace({
    'Primary Education or Below': '1 - Primary Education',
    'PMR / PT3 or Equivalent': '2 - PMR / PT3',
    'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or…': '3 - SPM / O Level / SKM',
    'SPM / O Level / SKM Level 1 / SKM Level 2 / SKM Level 3 or Equivalent': '3 - SPM / O Level / SKM',
    'STPM / A Level or Equivalent': '4 - STPM / A Level',
    'Diploma / Advanced Diploma / Higher Graduate…': '5 - Diploma / DVM',
    'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM /…': '5 - Diploma / DVM',
    'Diploma / Advanced Diploma / Higher Graduate Diploma / DVM / DKM Level 4 / DLKM Level 5': '5 - Diploma / DVM',
    "Bachelor's or Equivalent": "6 - Bachelor's",
    "Master's or Equivalent": "7 - Master's",
    'Doctoral (PhD) or Equivalent': '8 - Doctoral (PhD)'
})
df3.head()

Unnamed: 0,index,title,job_details,job_type,education_requirement,language,company,company_details,skills,no_skills,education
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent,STPM / A Level or Equivalent,en,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5,4 - STPM / A Level
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent,Bachelor's or Equivalent,id,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3,6 - Bachelor's
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Diploma / Advanced Diploma / Higher Graduate…,en,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,5 - Diploma / DVM
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent,Bachelor's or Equivalent,id,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0,6 - Bachelor's
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,SPM / O Level / SKM Level 1 / SKM Level 2 / SK...,en,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,3 - SPM / O Level / SKM


In [34]:
df3['language'].unique()

array(['en', 'id', 'fr', nan, 'de', 'so', 'sv', 'sw', 'nl', 'tl', 'ca',
       'ro', 'it', 'cy', 'af', 'da', 'et', 'es', 'vi', 'fi', 'no', 'tr',
       'ko', 'zh-cn', 'sl', 'pt', 'hr'], dtype=object)

In [35]:
languages = ['English', 'Others']
df3.loc[df3['language'] == 'en', 'job_language'] = 'English'
df3.loc[df3['language'] != 'en', 'job_language'] = 'Others'
df3.head()

Unnamed: 0,index,title,job_details,job_type,education_requirement,language,company,company_details,skills,no_skills,education,job_language
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent,STPM / A Level or Equivalent,en,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5,4 - STPM / A Level,English
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent,Bachelor's or Equivalent,id,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3,6 - Bachelor's,Others
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Diploma / Advanced Diploma / Higher Graduate…,en,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,5 - Diploma / DVM,English
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent,Bachelor's or Equivalent,id,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0,6 - Bachelor's,Others
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,SPM / O Level / SKM Level 1 / SKM Level 2 / SK...,en,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,3 - SPM / O Level / SKM,English


In [36]:
df_red = pd.read_excel('skills/Redundant Skills.xlsx')
red_skills = df_red['Skill'].unique().tolist()
df_red.head()

Unnamed: 0,Skill
0,Service
1,Product
2,Operations
3,Support
4,Business


In [37]:
for i, row in df3.iterrows():
    skills = row['skills']
    ignore_skills = []
    for j, skill in enumerate(skills):
        if skill in red_skills:
            ignore_skills.append(skill)
        elif any(skill in s for s in skills[:j] + skills[j+1:]):
            ignore_skills.append(skill)
    job_skills = [s for s in skills if s not in ignore_skills]
    df3.loc[i, ['job_skills', 'no_job_skills', 'ignore_skills', 'no_ignore_skills']] = \
           ['; '.join(job_skills), len(job_skills), '; '.join(ignore_skills), len(ignore_skills)]

df3.head()

Unnamed: 0,index,title,job_details,job_type,education_requirement,language,company,company_details,skills,no_skills,education,job_language,job_skills,no_job_skills,ignore_skills,no_ignore_skills
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent,STPM / A Level or Equivalent,en,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5,4 - STPM / A Level,English,Sales; Selection,2.0,Product; Service; Operations,3.0
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent,Bachelor's or Equivalent,id,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3,6 - Bachelor's,Others,Advertising; Adobe Illustrator; Adobe Photoshop,3.0,,0.0
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Diploma / Advanced Diploma / Higher Graduate…,en,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,5 - Diploma / DVM,English,Distribution,1.0,Operations,1.0
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent,Bachelor's or Equivalent,id,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0,6 - Bachelor's,Others,,0.0,,0.0
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,SPM / O Level / SKM Level 1 / SKM Level 2 / SK...,en,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,3 - SPM / O Level / SKM,English,Distribution,1.0,Operations,1.0


In [38]:
df4 = df3.copy()
cols = df4.columns.tolist()
cols = cols[:4] + cols[-2:] + cols[6:]
df4 = df4[cols]
df4.head()

Unnamed: 0,index,title,job_details,job_type,ignore_skills,no_ignore_skills,company,company_details,skills,no_skills,education,job_language,job_skills,no_job_skills,ignore_skills.1,no_ignore_skills.1
0,0,SALES ASSOCIATE,* Under supervision and perform duties to prov...,Permanent,Product; Service; Operations,3.0,GLOBAL PSYTECH SDN BHD\n|\nCyberjaya,"Retail trade, except of motor vehicles and mot...","[Product, Sales, Service, Selection, Operations]",5,4 - STPM / A Level,English,Sales; Selection,2.0,Product; Service; Operations,3.0
1,1,SIGN HOUSE ADVERTISING,Kelebihan diberi kepada yang mempunyai kemahir...,Permanent,,0.0,MR DIY (M) SDN BHD\n|\nSeremban,"Advertising and market research, Printing and ...","[Advertising, Adobe Illustrator, Adobe Photoshop]",3,6 - Bachelor's,Others,Advertising; Adobe Illustrator; Adobe Photoshop,3.0,,0.0
2,2,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Operations,1.0,INTER-EXCEL ADVISORY SDN BHD\n|\nKuala Lumpur,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,5 - Diploma / DVM,English,Distribution,1.0,Operations,1.0
3,3,Petugas PDK,"1. Bertanggungjawab kepada penyelia PDK, jawat...",Permanent,,0.0,IOI PLANTATION SERVICES SDN BHD\n|\nPutrajaya,"Education, Office administrative, office suppo...",[],0,6 - Bachelor's,Others,,0.0,,0.0
4,4,MR DIY (M) SDN BHD,"Receive stock, checking & distribution item re...",Permanent,Operations,1.0,QL Ansan Poultry Farm Sdn Bhd\n|\nPadang Serai,"Retail trade, except of motor vehicles and mot...","[Distribution, Operations]",2,3 - SPM / O Level / SKM,English,Distribution,1.0,Operations,1.0


In [39]:
df4.to_csv('myfuturejobs-insights/myfuturejobs_skills2.csv', index=False)