In [0]:
!pip install nltk
#nltk is required for our projcet

In [0]:
#importing required libraries
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.functions import col
import pyspark.sql.functions as F
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.types import DateType
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import Tokenizer, StopWordsRemover
from nltk.stem.snowball import SnowballStemmer
from pyspark.sql.functions import udf, col, lower, regexp_replace
from pyspark.sql.types import *
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.ml.feature import VectorAssembler,StringIndexer
from pyspark.ml import Pipeline


In [0]:
#Loading data into a daframe
df = spark.read.csv("/FileStore/tables/fake_job_postings.csv",header = True,inferSchema = True)

In [0]:
#displaying the loaded data
df.show()

In [0]:
#displaying the columns in a dataframe
df.columns

In [0]:
#counting null values in each column
from pyspark.sql.functions import isnan
df.select([count(when(col(c).isNull(), c)).alias(c) for c in df.columns]).show()

In [0]:
#dropping unnecessary columns
df1 = df.drop("salary_range","job_id")

In [0]:
#printing schema of dataframe
df1.printSchema()

In [0]:
#replacing null values with space
df1 = df1.fillna("")

In [0]:
df1.show()

In [0]:
#merging columns into a text column
df1 = df1.withColumn('text', 
                    concat(col('title') ,lit(' ') , col('location') ,lit(' ') ,col('department'), lit(' '),col('company_profile'), lit(' '), col('description'),lit(' ') ,col('requirements'),lit(' '),col('benefits'),lit(' '),col('employment_type'),
                           lit(' '),col('required_education'),lit(' '),col('industry'),lit(' '),col('function')))

In [0]:
#displaying the text column
df1.select('text').show(1)

In [0]:
#removing the columns
df1 = df1.drop('title','location','department','company_profile','description','requirements','benefits','employment_type','required_experience'
       ,'required_education','industry','function')

In [0]:
#creating null values columns to find out the null values in each column
df2 = df1.select(
  "telecommuting",col("telecommuting").cast("int").isNotNull().alias("tele_Value"),
  "has_company_logo",col("has_company_logo").cast("int").isNotNull().alias("logo_Value"),
  "has_questions",col("has_questions").cast("int").isNotNull().alias("question_Value"),
  "fraudulent",col("fraudulent").cast("int").isNotNull().alias("fradulent_Value"),
  "text")

In [0]:
df2.printSchema()

In [0]:
#selecting only which have true value
df3 = df2.filter(df2['tele_Value'] == 'true').filter(df2['logo_Value'] == 'true').filter(df2['question_Value'] == 'true').filter(df2['fradulent_Value'] == 'true')

In [0]:
df3.printSchema()

In [0]:
#finding if there are any null values in the given column
df3.filter(df3['fradulent_Value'] == 'false').count()

In [0]:
df3.count()

In [0]:
#dropping rows if there are any null values
df3=df3.dropna(how='any')
df3.count()

In [0]:
df3.printSchema()

In [0]:
#dropping newly created columns
df3 = df3.drop('tele_Value','logo_Value','question_Value','fradulent_Value')

In [0]:
#casting output column to integer
df3 = df3.withColumn('fraudulent',df3.fraudulent.cast('integer'))

In [0]:
df3.printSchema()

##Data Visualization

In [0]:
#%sql
#select telecommuting,count(telecommuting) as telecommuting_count from jobdescription_csv group by telecommuting
display(df3.groupby("telecommuting").agg(count("telecommuting").alias("telecommuting_count")))

telecommuting,telecommuting_count
0,16137
1,707


From the above bar plot,we can see there are more number of jobs available for "telecommuting" 0,the count is 16137 and for value 1 only
707 available.

In [0]:
#%sql
#select has_company_logo,count(has_company_logo) as has_company_logo_count from jobdescription_csv group by has_company_logo
display(df3.groupby("has_company_logo").agg(count("has_company_logo").alias("has_company_logo_count")))

has_company_logo,has_company_logo_count
0,3552
1,13292


From the above pi plot,we can see there are more number of jobs available "has_company_logo" 1,the % count is 79 and for value 0 the % count is 21.

In [0]:
#%sql
#select has_questions,count(has_questions) as has_questions_count from jobdescription_csv group by has_questions
display(df3.groupby("has_questions").agg(count("has_questions").alias("has_questions_count")))

has_questions,has_questions_count
0,8673
1,8171


From the above bar plot ,we can see there are almost equal number of jobs available for the variable "has_questions" values 0 and 1.

In [0]:
#%sql
#select fraudulent,count(fraudulent) as fraudulent_count from jobdescription_csv group by fraudulent
display(df3.groupby("fraudulent").agg(count("fraudulent").alias("fraudulent_count")))


fraudulent,fraudulent_count
1,832
0,16012


From the above plot,we can see there are very less number of fake jobs compared to real jobs.

###Text Analytics on the data set

In [0]:
from pyspark.ml.feature import Tokenizer, StopWordsRemover
from nltk.stem.snowball import SnowballStemmer
from pyspark.sql.functions import udf, col, lower, regexp_replace
from pyspark.sql.types import *

# implementing a df with categorical columns and cleaning the text data
df_clean = df3.select('telecommuting','has_company_logo','has_questions','fraudulent',(lower(regexp_replace('text', "[^a-zA-Z\\s]", "")).alias('text')))

# using tokenizer on text data
tokenizer = Tokenizer(inputCol='text', outputCol='words_token')
df_words_token = tokenizer.transform(df_clean).select('telecommuting','has_company_logo','has_questions','fraudulent','words_token')

# stop words removal
remover = StopWordsRemover(inputCol='words_token', outputCol='words_clean')
df_words_no_stopw = remover.transform(df_words_token).select('telecommuting','has_company_logo','has_questions','fraudulent','words_clean')

# using stemmer to stem the words
stemmer = SnowballStemmer(language='english')
stemmer_udf = udf(lambda tokens: [stemmer.stem(token) for token in tokens], ArrayType(StringType()))
df_stemmed = df_words_no_stopw.withColumn("words_stemmed", stemmer_udf("words_clean")).select('telecommuting','has_company_logo','has_questions','fraudulent','words_stemmed')

# selecting only words with length has greater than 3
filter_length_udf = udf(lambda row: [x for x in row if len(x) >= 3], ArrayType(StringType()))
df_final_words = df_stemmed.withColumn('words', filter_length_udf(col('words_stemmed')))

In [0]:
df_final_words.count()

In [0]:
display(df_final_words)

telecommuting,has_company_logo,has_questions,fraudulent,words_stemmed,words
0,1,0,0,"List(market, intern, us, ny, new, york, market, food, weve, creat, groundbreak, awardwin, cook, site, support, connect, celebr, home, cook, give, everyth, need, one, placew, top, editori, busi, engin, team, focus, use, technolog, find, new, better, way, connect, peopl, around, specif, food, interest, offer, superb, high, curat, inform, food, cook, attract, talent, home, cook, contributor, countri, also, publish, wellknown, profession, like, mario, batali, gwyneth, paltrow, danni, meyer, partnership, whole, food, market, random, housefood, name, best, food, websit, jame, beard, foundat, iacp, featur, new, york, time, npr, pando, daili, techcrunch, today, showwer, locat, chelsea, new, york, citi, food, fastgrow, jame, beard, awardwin, onlin, food, communiti, crowdsourc, curat, recip, hub, current, interview, full, parttim, unpaid, intern, work, small, team, editor, execut, develop, new, york, citi, headquartersreproduc, andor, repackag, exist, food, content, number, partner, site, huffington, post, yahoo, buzzfe, various, content, manag, systemsresearch, blog, websit, provis, food, affili, programassist, daytoday, affili, program, support, screen, affili, assist, affili, inquiriessupport, pr, amp, event, neededhelp, offic, administr, work, file, mail, prepar, meetingswork, develop, document, bug, suggest, improv, sitesupport, market, execut, staff, experi, content, manag, system, major, plus, blog, countsfamiliar, food, editori, voic, aestheticlov, food, appreci, import, home, cook, cook, seasonsmeticul, editor, perfectionist, obsess, attent, detail, madden, typo, broken, link, delight, find, fix, themcheer, pressureexcel, communic, skillsa, multitask, juggler, respons, big, smallinterest, engag, social, media, like, twitter, facebook, pinterestlov, problemsolv, collabor, drive, food, forwardthink, big, pictur, pitch, nitti, gritti, run, small, compani, dish, shop, administr, supportcomfort, realiti, work, startup, call, even, weekend, work, long, hour, , , , market)","List(market, intern, new, york, market, food, weve, creat, groundbreak, awardwin, cook, site, support, connect, celebr, home, cook, give, everyth, need, one, placew, top, editori, busi, engin, team, focus, use, technolog, find, new, better, way, connect, peopl, around, specif, food, interest, offer, superb, high, curat, inform, food, cook, attract, talent, home, cook, contributor, countri, also, publish, wellknown, profession, like, mario, batali, gwyneth, paltrow, danni, meyer, partnership, whole, food, market, random, housefood, name, best, food, websit, jame, beard, foundat, iacp, featur, new, york, time, npr, pando, daili, techcrunch, today, showwer, locat, chelsea, new, york, citi, food, fastgrow, jame, beard, awardwin, onlin, food, communiti, crowdsourc, curat, recip, hub, current, interview, full, parttim, unpaid, intern, work, small, team, editor, execut, develop, new, york, citi, headquartersreproduc, andor, repackag, exist, food, content, number, partner, site, huffington, post, yahoo, buzzfe, various, content, manag, systemsresearch, blog, websit, provis, food, affili, programassist, daytoday, affili, program, support, screen, affili, assist, affili, inquiriessupport, amp, event, neededhelp, offic, administr, work, file, mail, prepar, meetingswork, develop, document, bug, suggest, improv, sitesupport, market, execut, staff, experi, content, manag, system, major, plus, blog, countsfamiliar, food, editori, voic, aestheticlov, food, appreci, import, home, cook, cook, seasonsmeticul, editor, perfectionist, obsess, attent, detail, madden, typo, broken, link, delight, find, fix, themcheer, pressureexcel, communic, skillsa, multitask, juggler, respons, big, smallinterest, engag, social, media, like, twitter, facebook, pinterestlov, problemsolv, collabor, drive, food, forwardthink, big, pictur, pitch, nitti, gritti, run, small, compani, dish, shop, administr, supportcomfort, realiti, work, startup, call, even, weekend, work, long, hour, market)"
0,1,0,0,"List(custom, servic, , cloud, video, product, nz, , auckland, success, , second, world, cloud, video, product, servic, second, world, cloud, video, product, servic, enabl, brand, agenc, get, high, qualiti, onlin, video, content, shot, produc, anywher, world, , second, make, video, product, fast, afford, manag, seamless, cloud, purchas, publish, httpurlfbeafacacdcfbdeefaedaefcaddd, second, remov, hassl, cost, risk, speed, issu, work, regular, video, product, compani, manag, everi, aspect, video, project, beauti, onlin, experi, grow, global, network, , rate, video, profession, , countri, manag, dedic, product, success, team, , countri, , second, provid, , success, guarante, second, produc, almost, , video, , countri, , global, brand, includ, world, largest, includ, paypal, loreal, soni, barclay, offic, auckland, london, sydney, tokyo, singaporehttpurlfbeafacacdcfbdeefaedaefcadddhttpurleadbdefaaabdbbeacbbfbfbcfbcbaab, httpurlcddfabddaebebfdbcefbecbffea, organis, , focus, , vibrant, , awesomedo, passion, custom, servic, slick, type, skill, mayb, account, manag, think, administr, cooler, polar, bear, jetski, need, hear, youw, cloud, video, product, servic, opper, glodal, level, yeah, pretti, cool, serious, aboutdeliv, world, class, product, excel, custom, serviceour, rapid, expand, busi, look, talent, project, manag, manag, success, deliveri, video, project, manag, client, communic, drive, product, process, work, coolest, brand, planet, learn, global, team, repres, nz, huge, wayw, enter, next, growth, stage, busi, grow, quick, intern, therefor, posit, burst, opportun, right, person, enter, busi, right, time, second, world, cloud, video, product, servic, httpurlfbeafacacdcfbdeefaedaefcaddd, second, world, cloud, video, product, servic, enabl, brand, agenc, get, high, qualiti, onlin, video, content, shot, produc, anywher, world, fast, afford, manag, seamless, cloud, purchas, publish, second, remov, hassl, cost, risk, speed, issu, work, regular, video, product, compani, manag, everi, aspect, video, project, beauti, onlin, experi, grow, network, , rate, video, profession, , countri, dedic, product, success, team, , countri, guarante, video, project, success, , easi, commiss, quick, googl, adword, campaign, second, produc, almost, , video, , countri, , global, brand, includ, world, largest, includ, paypal, loreal, soni, barclay, offic, auckland, london, sydney, tokyo, amp, singaporeour, auckland, offic, basedright, heart, wynyard, quarter, innov, precinct, , gridakl, expect, youyour, key, respons, communic, client, , second, team, freelanc, communiti, throughout, video, product, process, includ, shoot, plan, secur, freelanc, talent, manag, workflow, onlin, product, manag, system, aim, manag, video, project, effect, produc, great, video, client, lovekeyattributescli, focus, excel, custom, servic, communic, skillsonlin, oustand, comput, knowledg, experi, use, onlin, softwar, project, manag, toolsorganis, manag, workload, abl, multitask, attent, detailmotiv, selfstart, passion, excel, work, achiev, great, resultsadapt, show, initi, think, feet, constant, evolv, atmosphereflex, fast, turnaround, work, hour, availabilityeasi, goingamp, upbeat, dosent, get, bog, love, challengesens, humour, laugh, know, work, startup, take, gutsabl, deliv, includ, meet, project, deadlin, budgetattitud, import, experi, , second, howev, previous, experi, custom, servic, andor, project, manag, beneficialpleas, view, platform, , websit, aturlaacefafbeeaddafdebbdcdddeaand, get, clear, understand, reach, get, usthrough, part, , second, team, gainexperi, work, project, locat, around, world, intern, brandexperi, work, varieti, client, larg, rang, projectsopportun, drive, grow, product, function, teama, posit, work, environ, great, teampayappli, role, videob, video, busi, understand, one, quickest, way, assess, suitabl, role, one, quickest, way, appli, submit, , second, long, video, tell, us, experi, think, perfect, role, filmmak, make, realli, creativ, video, simpl, video, film, smart, phone, web, cam, fine, pleas, also, includ, base, startyou, upload, video, onto, youtub, vimeo, similar, draft, live, linkappl, due, pm, wednesday, th, juli, , video, readi, appli, role, via, follow, link, togeth, cover, letter, cvafter, watch, video, get, idea, suitabl, role, email, shortlist, candid, fulltim, , market, advertis, custom, servic)","List(custom, servic, cloud, video, product, auckland, success, second, world, cloud, video, product, servic, second, world, cloud, video, product, servic, enabl, brand, agenc, get, high, qualiti, onlin, video, content, shot, produc, anywher, world, second, make, video, product, fast, afford, manag, seamless, cloud, purchas, publish, httpurlfbeafacacdcfbdeefaedaefcaddd, second, remov, hassl, cost, risk, speed, issu, work, regular, video, product, compani, manag, everi, aspect, video, project, beauti, onlin, experi, grow, global, network, rate, video, profession, countri, manag, dedic, product, success, team, countri, second, provid, success, guarante, second, produc, almost, video, countri, global, brand, includ, world, largest, includ, paypal, loreal, soni, barclay, offic, auckland, london, sydney, tokyo, singaporehttpurlfbeafacacdcfbdeefaedaefcadddhttpurleadbdefaaabdbbeacbbfbfbcfbcbaab, httpurlcddfabddaebebfdbcefbecbffea, organis, focus, vibrant, awesomedo, passion, custom, servic, slick, type, skill, mayb, account, manag, think, administr, cooler, polar, bear, jetski, need, hear, youw, cloud, video, product, servic, opper, glodal, level, yeah, pretti, cool, serious, aboutdeliv, world, class, product, excel, custom, serviceour, rapid, expand, busi, look, talent, project, manag, manag, success, deliveri, video, project, manag, client, communic, drive, product, process, work, coolest, brand, planet, learn, global, team, repres, huge, wayw, enter, next, growth, stage, busi, grow, quick, intern, therefor, posit, burst, opportun, right, person, enter, busi, right, time, second, world, cloud, video, product, servic, httpurlfbeafacacdcfbdeefaedaefcaddd, second, world, cloud, video, product, servic, enabl, brand, agenc, get, high, qualiti, onlin, video, content, shot, produc, anywher, world, fast, afford, manag, seamless, cloud, purchas, publish, second, remov, hassl, cost, risk, speed, issu, work, regular, video, product, compani, manag, everi, aspect, video, project, beauti, onlin, experi, grow, network, rate, video, profession, countri, dedic, product, success, team, countri, guarante, video, project, success, easi, commiss, quick, googl, adword, campaign, second, produc, almost, video, countri, global, brand, includ, world, largest, includ, paypal, loreal, soni, barclay, offic, auckland, london, sydney, tokyo, amp, singaporeour, auckland, offic, basedright, heart, wynyard, quarter, innov, precinct, gridakl, expect, youyour, key, respons, communic, client, second, team, freelanc, communiti, throughout, video, product, process, includ, shoot, plan, secur, freelanc, talent, manag, workflow, onlin, product, manag, system, aim, manag, video, project, effect, produc, great, video, client, lovekeyattributescli, focus, excel, custom, servic, communic, skillsonlin, oustand, comput, knowledg, experi, use, onlin, softwar, project, manag, toolsorganis, manag, workload, abl, multitask, attent, detailmotiv, selfstart, passion, excel, work, achiev, great, resultsadapt, show, initi, think, feet, constant, evolv, atmosphereflex, fast, turnaround, work, hour, availabilityeasi, goingamp, upbeat, dosent, get, bog, love, challengesens, humour, laugh, know, work, startup, take, gutsabl, deliv, includ, meet, project, deadlin, budgetattitud, import, experi, second, howev, previous, experi, custom, servic, andor, project, manag, beneficialpleas, view, platform, websit, aturlaacefafbeeaddafdebbdcdddeaand, get, clear, understand, reach, get, usthrough, part, second, team, gainexperi, work, project, locat, around, world, intern, brandexperi, work, varieti, client, larg, rang, projectsopportun, drive, grow, product, function, teama, posit, work, environ, great, teampayappli, role, videob, video, busi, understand, one, quickest, way, assess, suitabl, role, one, quickest, way, appli, submit, second, long, video, tell, experi, think, perfect, role, filmmak, make, realli, creativ, video, simpl, video, film, smart, phone, web, cam, fine, pleas, also, includ, base, startyou, upload, video, onto, youtub, vimeo, similar, draft, live, linkappl, due, wednesday, juli, video, readi, appli, role, via, follow, link, togeth, cover, letter, cvafter, watch, video, get, idea, suitabl, role, email, shortlist, candid, fulltim, market, advertis, custom, servic)"
0,1,0,0,"List(commiss, machineri, assist, cma, us, ia, wever, , valor, servic, provid, workforc, solut, meet, need, compani, across, privat, sector, special, focus, oil, amp, gas, industryvalor, servic, involv, throughout, everi, step, hire, process, remain, contact, way, final, step, sign, employ, contract, new, employervalor, servic, found, vision, employ, uniqu, skill, experi, qualiti, america, finest, veteran, provid, privat, sector, compani, precis, concert, valuead, servic, , america, finest, veteran, optim, career, opportunityw, eager, get, word, veteran, ampl, opportun, employ, privat, sector, ideal, candid, fill, positionsvalor, servicesyour, success, mission, client, locat, houston, activ, seek, experienc, commiss, machineri, assist, possess, strong, supervisori, skill, attent, detail, strong, dedic, safeti, must, ideal, candid, execut, activ, compli, qualiti, requir, health, environment, safeti, regul, implement, precommiss, commiss, procedur, rotari, equipmentexecut, activ, subcontractor, assign, crew, pertain, disciplineensur, effect, util, commiss, manpow, consumablesensur, execut, vendor, specialist, field, activ, assign, resourc, subcontractor, per, vendor, repres, planscarri, equip, inspect, client, repres, ensur, proper, certif, producedprepar, form, pend, test, submit, sign, certif, final, hand, certif, engin, qa, qccoordin, field, vendor, representativeskeep, record, activitiesensur, safeti, practic, strict, follow, execut, activitiesreport, progress, constraint, mechan, supervisorposs, author, site, manag, receiv, issu, permit, work, accord, project, permit, work, proceduresassist, supervisor, expedit, pend, punchlist, item, accord, commiss, manag, prioritiesassist, supervisor, coordin, supervis, constructionsupport, activ, precommiss, commiss, activitiescompani, overviewour, client, premier, engin, construct, procur, compani, execut, largescal, project, intern)","List(commiss, machineri, assist, cma, wever, valor, servic, provid, workforc, solut, meet, need, compani, across, privat, sector, special, focus, oil, amp, gas, industryvalor, servic, involv, throughout, everi, step, hire, process, remain, contact, way, final, step, sign, employ, contract, new, employervalor, servic, found, vision, employ, uniqu, skill, experi, qualiti, america, finest, veteran, provid, privat, sector, compani, precis, concert, valuead, servic, america, finest, veteran, optim, career, opportunityw, eager, get, word, veteran, ampl, opportun, employ, privat, sector, ideal, candid, fill, positionsvalor, servicesyour, success, mission, client, locat, houston, activ, seek, experienc, commiss, machineri, assist, possess, strong, supervisori, skill, attent, detail, strong, dedic, safeti, must, ideal, candid, execut, activ, compli, qualiti, requir, health, environment, safeti, regul, implement, precommiss, commiss, procedur, rotari, equipmentexecut, activ, subcontractor, assign, crew, pertain, disciplineensur, effect, util, commiss, manpow, consumablesensur, execut, vendor, specialist, field, activ, assign, resourc, subcontractor, per, vendor, repres, planscarri, equip, inspect, client, repres, ensur, proper, certif, producedprepar, form, pend, test, submit, sign, certif, final, hand, certif, engin, qccoordin, field, vendor, representativeskeep, record, activitiesensur, safeti, practic, strict, follow, execut, activitiesreport, progress, constraint, mechan, supervisorposs, author, site, manag, receiv, issu, permit, work, accord, project, permit, work, proceduresassist, supervisor, expedit, pend, punchlist, item, accord, commiss, manag, prioritiesassist, supervisor, coordin, supervis, constructionsupport, activ, precommiss, commiss, activitiescompani, overviewour, client, premier, engin, construct, procur, compani, execut, largescal, project, intern)"
0,1,0,0,"List(account, execut, , washington, dc, us, dc, washington, sale, passion, improv, qualiti, life, geographi, heart, everyth, esri, geograph, inform, system, gis, technolog, inspir, enabl, govern, univers, busi, worldwid, save, money, live, environ, deeper, understand, chang, world, around, themcar, manag, growth, zero, debt, give, esri, stabil, uncommon, today, volatil, busi, world, privat, held, offer, except, benefit, competit, salari, k, profitshar, program, opportun, person, profession, growth, much, compani, esri, , environment, system, research, instituteour, passion, improv, qualiti, life, geographi, heart, everyth, esri, geograph, inform, system, gis, technolog, inspir, enabl, govern, univers, busi, worldwid, save, money, live, environ, deeper, understand, chang, world, around, themcar, manag, growth, zero, debt, give, esri, stabil, uncommon, today, volatil, busi, world, privat, held, offer, except, benefit, competit, salari, k, profitshar, program, opportun, person, profession, growth, much, moreth, opportun, account, executivea, member, sale, divis, work, collabor, account, team, order, sell, promot, adopt, esri, arcgi, platform, within, organ, part, account, team, respons, facilit, develop, execut, set, strategi, defin, portfolio, account, execut, strategi, util, experi, enterpris, sale, help, custom, leverag, geospati, inform, technolog, achiev, busi, goalsspecificallyprospect, develop, opportun, partner, key, stakehold, envis, develop, implement, locat, strategi, organizationclear, articul, strength, valu, proposit, arcgi, platformdevelop, maintain, healthi, pipelin, opportun, busi, growthdemonstr, thought, understand, insight, industri, knowledg, gis, appli, initi, trend, triggersunderstand, key, busi, driver, within, organ, identifi, key, busi, stakeholdersunderstand, custom, budget, acquisit, processessuccess, execut, account, manag, process, includ, account, priorit, account, resourc, account, planningsuccess, execut, sale, process, opportunitiesleverag, lead, account, team, consist, sale, crossdivision, resourc, defin, execut, account, strategyeffect, util, leverag, crm, manag, opportun, drive, buy, processpursu, profession, person, develop, ensur, competit, knowledg, real, estat, industryleverag, social, media, success, prospect, build, profession, networkparticip, trade, show, workshop, seminar, requiredsupport, visual, stori, tell, effect, whiteboard, sessionsb, resourc, take, initi, resolv, issu, educationbachelor, master, gis, busi, administr, relat, field, equival, work, experi, depend, posit, levelexperi, year, enterpris, sale, experi, provid, platform, solut, businessesdemonstr, experi, manag, sale, cycl, includ, prospect, propos, closing, adapt, new, technolog, trend, translat, solut, address, custom, needsdemonstr, experi, strong, partnership, advocaci, customersexcel, present, white, board, negoti, skill, includ, good, listen, probe, qualif, abilitiesexperi, execut, insight, sell, methodologiesdemonstr, understand, mitig, competit, threatsexcel, written, verbal, communic, interperson, skillsabl, manag, priorit, activitiesdemonstr, experi, lead, execut, engag, provid, servic, sell, real, estat, industryknowledg, real, estat, industri, fiscal, year, budget, procur, cyclehigh, motiv, team, player, matur, posit, attitud, passion, meet, challeng, opportun, business, travel, domest, andor, intern, general, knowledg, spatial, analysi, problem, solvingresult, orient, abil, write, craft, smart, attain, realist, timedriven, goal, clear, lead, indic, cultur, anyth, corporatew, collabor, creativ, environ, phone, directori, organ, first, name, relax, dress, code, opendoor, policiesa, place, thrivepassion, peopl, strive, make, differencecasu, dress, codeflex, work, schedulessupport, continu, educationcollegelik, campusa, network, build, amid, lush, landscap, numer, outdoor, patio, areasonsit, caf, includ, starbuck, coffe, bar, loung, areafit, center, avail, comprehens, refer, librari, gis, bibliographystateoftheart, confer, center, host, staff, guest, speakersgreen, initiativessolar, rooftop, panel, reduc, carbon, emissionselectr, vehicl, provid, oncampus, transportationhundr, tree, reduc, cost, cool, build, fulltim, bachelor, degre, comput, softwar, sale)","List(account, execut, washington, washington, sale, passion, improv, qualiti, life, geographi, heart, everyth, esri, geograph, inform, system, gis, technolog, inspir, enabl, govern, univers, busi, worldwid, save, money, live, environ, deeper, understand, chang, world, around, themcar, manag, growth, zero, debt, give, esri, stabil, uncommon, today, volatil, busi, world, privat, held, offer, except, benefit, competit, salari, profitshar, program, opportun, person, profession, growth, much, compani, esri, environment, system, research, instituteour, passion, improv, qualiti, life, geographi, heart, everyth, esri, geograph, inform, system, gis, technolog, inspir, enabl, govern, univers, busi, worldwid, save, money, live, environ, deeper, understand, chang, world, around, themcar, manag, growth, zero, debt, give, esri, stabil, uncommon, today, volatil, busi, world, privat, held, offer, except, benefit, competit, salari, profitshar, program, opportun, person, profession, growth, much, moreth, opportun, account, executivea, member, sale, divis, work, collabor, account, team, order, sell, promot, adopt, esri, arcgi, platform, within, organ, part, account, team, respons, facilit, develop, execut, set, strategi, defin, portfolio, account, execut, strategi, util, experi, enterpris, sale, help, custom, leverag, geospati, inform, technolog, achiev, busi, goalsspecificallyprospect, develop, opportun, partner, key, stakehold, envis, develop, implement, locat, strategi, organizationclear, articul, strength, valu, proposit, arcgi, platformdevelop, maintain, healthi, pipelin, opportun, busi, growthdemonstr, thought, understand, insight, industri, knowledg, gis, appli, initi, trend, triggersunderstand, key, busi, driver, within, organ, identifi, key, busi, stakeholdersunderstand, custom, budget, acquisit, processessuccess, execut, account, manag, process, includ, account, priorit, account, resourc, account, planningsuccess, execut, sale, process, opportunitiesleverag, lead, account, team, consist, sale, crossdivision, resourc, defin, execut, account, strategyeffect, util, leverag, crm, manag, opportun, drive, buy, processpursu, profession, person, develop, ensur, competit, knowledg, real, estat, industryleverag, social, media, success, prospect, build, profession, networkparticip, trade, show, workshop, seminar, requiredsupport, visual, stori, tell, effect, whiteboard, sessionsb, resourc, take, initi, resolv, issu, educationbachelor, master, gis, busi, administr, relat, field, equival, work, experi, depend, posit, levelexperi, year, enterpris, sale, experi, provid, platform, solut, businessesdemonstr, experi, manag, sale, cycl, includ, prospect, propos, closing, adapt, new, technolog, trend, translat, solut, address, custom, needsdemonstr, experi, strong, partnership, advocaci, customersexcel, present, white, board, negoti, skill, includ, good, listen, probe, qualif, abilitiesexperi, execut, insight, sell, methodologiesdemonstr, understand, mitig, competit, threatsexcel, written, verbal, communic, interperson, skillsabl, manag, priorit, activitiesdemonstr, experi, lead, execut, engag, provid, servic, sell, real, estat, industryknowledg, real, estat, industri, fiscal, year, budget, procur, cyclehigh, motiv, team, player, matur, posit, attitud, passion, meet, challeng, opportun, business, travel, domest, andor, intern, general, knowledg, spatial, analysi, problem, solvingresult, orient, abil, write, craft, smart, attain, realist, timedriven, goal, clear, lead, indic, cultur, anyth, corporatew, collabor, creativ, environ, phone, directori, organ, first, name, relax, dress, code, opendoor, policiesa, place, thrivepassion, peopl, strive, make, differencecasu, dress, codeflex, work, schedulessupport, continu, educationcollegelik, campusa, network, build, amid, lush, landscap, numer, outdoor, patio, areasonsit, caf, includ, starbuck, coffe, bar, loung, areafit, center, avail, comprehens, refer, librari, gis, bibliographystateoftheart, confer, center, host, staff, guest, speakersgreen, initiativessolar, rooftop, panel, reduc, carbon, emissionselectr, vehicl, provid, oncampus, transportationhundr, tree, reduc, cost, cool, build, fulltim, bachelor, degre, comput, softwar, sale)"
0,1,1,0,"List(bill, review, manag, us, fl, fort, worth, , spotsourc, solut, llc, global, human, capit, manag, consult, firm, headquart, miami, florida, found, januari, , spotsourc, creat, fusion, innov, servic, offer, meet, increas, demand, today, economi, special, talent, acquisit, staf, execut, search, servic, across, various, function, specif, industri, global, talent, transfus, gtt, servic, util, best, practic, qualif, standard, deliv, talent, temporari, temporarytohir, perman, basi, health, career, transit, hct, subsidiari, global, talent, transfus, offer, placement, servic, specif, grow, healthcar, arena, spotsourc, execut, search, ses, consult, special, breed, talent, evangelist, understand, advis, streamlin, human, resourc, process, direct, organ, requir, long, term, sustain, successsuccess, plan, leadership, develop, program, compens, analysi, recruit, process, outsourc, custom, best, suit, need, busi, understand, demand, costeffect, solut, organizationar, seek, potenti, career, transit, interest, discuss, current, hire, trend, open, posit, vital, career, consult, vcc, offer, career, transit, servic, cater, specif, job, applic, includ, resum, construct, social, media, optim, interview, coachingaddress, n, powerlin, rd, ste, zpompano, beach, fl, offic, phonecbcaacddeeabadbcfeeecf, job, titl, item, review, managerloc, fort, worth, tx, , , depart, item, reviewreport, vp, oper, , , , , , , , , , , , , general, descriptionrespons, overal, aspect, item, review, oper, personnel, hire, qualiti, control, process, workflow, monitor, track, account, staff, regard, product, standard, depart, expectationsduti, responsibilitiesoverse, compani, item, review, depart, operationsrespons, encourag, reinforc, compani, culturedevelop, process, better, depart, implement, new, proceduresprotocol, work, custom, servic, elev, issu, provid, callsimpl, audit, polici, conjunct, polici, payment, integr, depart, monitor, qualityand, qualiti, control, result, depart, respons, ensur, overal, metric, complianc, manag, client, expectationsrespons, human, resourc, matter, direct, relat, depart, supervis, ie, interview, hire, train, annual, evalu, electron, time, card, address, personnel, issuesmay, createreview, daili, week, month, report, invoic, log, expensesaddit, dutiesrespons, assign, compli, safeti, rulesregul, conjunct, injuri, ill, prevent, program, iipp, well, maintain, hipaa, complianceoccasion, interact, custom, qualificationsrn, licens, state, texasdiploma, bachelor, scienc, nurs, requiredpast, manageri, experi, prefer, , year, experi, nurseexperi, facil, bill, helpfulstrong, knowledg, icdcptknowledg, fee, schedul, rule, rampc, guidelinesmust, abl, remain, pois, stress, situationshigh, motiv, selfstarterexcept, organiz, skillsabl, handl, deadlin, proactiv, problem, solveeffect, profession, communic, skillsabl, meet, exceed, perform, competenciesdemonstr, outstand, leadership, problem, solv, analyt, skillsabl, think, work, independ, work, overal, team, environmentprofici, microsoft, offic, suit, full, benefit, offer, fulltim, bachelor, degre, hospit, , health, care, health, care, provid)","List(bill, review, manag, fort, worth, spotsourc, solut, llc, global, human, capit, manag, consult, firm, headquart, miami, florida, found, januari, spotsourc, creat, fusion, innov, servic, offer, meet, increas, demand, today, economi, special, talent, acquisit, staf, execut, search, servic, across, various, function, specif, industri, global, talent, transfus, gtt, servic, util, best, practic, qualif, standard, deliv, talent, temporari, temporarytohir, perman, basi, health, career, transit, hct, subsidiari, global, talent, transfus, offer, placement, servic, specif, grow, healthcar, arena, spotsourc, execut, search, ses, consult, special, breed, talent, evangelist, understand, advis, streamlin, human, resourc, process, direct, organ, requir, long, term, sustain, successsuccess, plan, leadership, develop, program, compens, analysi, recruit, process, outsourc, custom, best, suit, need, busi, understand, demand, costeffect, solut, organizationar, seek, potenti, career, transit, interest, discuss, current, hire, trend, open, posit, vital, career, consult, vcc, offer, career, transit, servic, cater, specif, job, applic, includ, resum, construct, social, media, optim, interview, coachingaddress, powerlin, ste, zpompano, beach, offic, phonecbcaacddeeabadbcfeeecf, job, titl, item, review, managerloc, fort, worth, depart, item, reviewreport, oper, general, descriptionrespons, overal, aspect, item, review, oper, personnel, hire, qualiti, control, process, workflow, monitor, track, account, staff, regard, product, standard, depart, expectationsduti, responsibilitiesoverse, compani, item, review, depart, operationsrespons, encourag, reinforc, compani, culturedevelop, process, better, depart, implement, new, proceduresprotocol, work, custom, servic, elev, issu, provid, callsimpl, audit, polici, conjunct, polici, payment, integr, depart, monitor, qualityand, qualiti, control, result, depart, respons, ensur, overal, metric, complianc, manag, client, expectationsrespons, human, resourc, matter, direct, relat, depart, supervis, interview, hire, train, annual, evalu, electron, time, card, address, personnel, issuesmay, createreview, daili, week, month, report, invoic, log, expensesaddit, dutiesrespons, assign, compli, safeti, rulesregul, conjunct, injuri, ill, prevent, program, iipp, well, maintain, hipaa, complianceoccasion, interact, custom, qualificationsrn, licens, state, texasdiploma, bachelor, scienc, nurs, requiredpast, manageri, experi, prefer, year, experi, nurseexperi, facil, bill, helpfulstrong, knowledg, icdcptknowledg, fee, schedul, rule, rampc, guidelinesmust, abl, remain, pois, stress, situationshigh, motiv, selfstarterexcept, organiz, skillsabl, handl, deadlin, proactiv, problem, solveeffect, profession, communic, skillsabl, meet, exceed, perform, competenciesdemonstr, outstand, leadership, problem, solv, analyt, skillsabl, think, work, independ, work, overal, team, environmentprofici, microsoft, offic, suit, full, benefit, offer, fulltim, bachelor, degre, hospit, health, care, health, care, provid)"
0,0,0,0,"List(account, clerk, us, md, , , , job, overviewapex, environment, consult, firm, offer, stabl, leadership, growth, view, employe, valuabl, resourc, seek, selfmotiv, multifacet, account, payabl, clerk, join, team, rockvill, md, becom, integr, part, continu, success, stori, posit, entail, process, high, volum, invoic, work, fast, pace, environ, key, verifi, various, type, invoic, general, ledger, account, job, number, submit, vendor, compani, personnel, calcul, balanc, due, vendor, review, histori, prior, payment, made, account, candid, must, abl, answer, vendor, personnel, inquiri, via, phone, email, qualificationsthi, posit, requir, high, school, diploma, , year, relev, work, experi, keen, attent, detail, knowledg, commonlyus, concept, practic, procedur, within, account, field, experi, account, softwar, profici, ms, offic, suit, includ, advanc, excel, experi, high, degre, professionalismw, join, team, talent, account, profession, engin, manag, submit, resum, consider, todayurlfeffeaeeffdedbdbfebfebeeaeadabout, apexapex, customerfocus, compani, deliv, environment, health, safeti, engin, servic, , client, across, unit, state, abroad, driven, entrepreneuri, spirit, dedic, provid, respons, costeffect, solut, apex, grown, rapid, sinc, found, work, partnership, public, privat, sector, client, team, expert, provid, servic, tailor, support, custom, uniqu, goal, object, blend, strong, technic, skill, busi, acumen, superior, custom, servic, abl, deliv, creativ, solut, deliv, high, qualiti, result, low, costfrom, commerci, industri, firm, construct, petroleum, util, compani, financi, institut, govern, client, apex, extens, experi, wide, varieti, industri, corpor, profession, resum, includ, proven, capabl, area, water, resourc, remedi, restor, assess, complianc, industri, hygien, among, othersrank, top, , environment, firm, enr, magazin, rank, among, top, , design, firm, enr, magazin, award, , nation, environment, excel, award, environment, stewardship, nation, associ, environment, profession, select, , hot, firm, zweig, letter, come, join, award, win, teamapex, entrepreneuri, firm, ensur, senior, manag, abl, move, unencumb, prioriti, success, grow, midsiz, firm, small, enough, employe, still, access, leadership, easi, highperform, recogn, contribut, advanc, without, bureaucraci, , offic, locat, big, enough, provid, comprehens, environment, consult, engin, servic, divers, client, base, provid, resourc, employe, help, profession, develop, offer, incent, bonus, plan, ownership, opportun, success, managersapex, compani, llc, affirm, actionequ, opportun, employ)","List(account, clerk, job, overviewapex, environment, consult, firm, offer, stabl, leadership, growth, view, employe, valuabl, resourc, seek, selfmotiv, multifacet, account, payabl, clerk, join, team, rockvill, becom, integr, part, continu, success, stori, posit, entail, process, high, volum, invoic, work, fast, pace, environ, key, verifi, various, type, invoic, general, ledger, account, job, number, submit, vendor, compani, personnel, calcul, balanc, due, vendor, review, histori, prior, payment, made, account, candid, must, abl, answer, vendor, personnel, inquiri, via, phone, email, qualificationsthi, posit, requir, high, school, diploma, year, relev, work, experi, keen, attent, detail, knowledg, commonlyus, concept, practic, procedur, within, account, field, experi, account, softwar, profici, offic, suit, includ, advanc, excel, experi, high, degre, professionalismw, join, team, talent, account, profession, engin, manag, submit, resum, consider, todayurlfeffeaeeffdedbdbfebfebeeaeadabout, apexapex, customerfocus, compani, deliv, environment, health, safeti, engin, servic, client, across, unit, state, abroad, driven, entrepreneuri, spirit, dedic, provid, respons, costeffect, solut, apex, grown, rapid, sinc, found, work, partnership, public, privat, sector, client, team, expert, provid, servic, tailor, support, custom, uniqu, goal, object, blend, strong, technic, skill, busi, acumen, superior, custom, servic, abl, deliv, creativ, solut, deliv, high, qualiti, result, low, costfrom, commerci, industri, firm, construct, petroleum, util, compani, financi, institut, govern, client, apex, extens, experi, wide, varieti, industri, corpor, profession, resum, includ, proven, capabl, area, water, resourc, remedi, restor, assess, complianc, industri, hygien, among, othersrank, top, environment, firm, enr, magazin, rank, among, top, design, firm, enr, magazin, award, nation, environment, excel, award, environment, stewardship, nation, associ, environment, profession, select, hot, firm, zweig, letter, come, join, award, win, teamapex, entrepreneuri, firm, ensur, senior, manag, abl, move, unencumb, prioriti, success, grow, midsiz, firm, small, enough, employe, still, access, leadership, easi, highperform, recogn, contribut, advanc, without, bureaucraci, offic, locat, big, enough, provid, comprehens, environment, consult, engin, servic, divers, client, base, provid, resourc, employe, help, profession, develop, offer, incent, bonus, plan, ownership, opportun, success, managersapex, compani, llc, affirm, actionequ, opportun, employ)"
0,1,1,0,"List(head, content, mf, de, berlin, androidpit, found, , thefonpit, agros, intern, web, portalandroidpitto, world, largest, android, communiti, everi, month, , million, android, tech, enthusiast, around, world, log, intoandroidpit, know, theyll, find, latest, innov, inform, android, androidapp, , differ, languag, busi, activ, consist, close, collabor, mani, world, largest, tech, web, mobil, companiesin, app, mediaamend, fonpit, ag, brand, portfolio, onestop, shop, app, marketingapp, mediadeliv, qualiti, perform, market, app, give, client, success, campaign, possibl, custom, concept, sustain, resultsloc, heart, berlin, constant, look, high, motiv, success, driven, person, helpandroidpitandapp, mediagrow, even, responsibilitiesmanag, englishspeak, editori, team, build, team, bestinclass, editorsset, content, creation, schedul, ensur, deadlin, adher, toresearch, write, latest, tech, topic, news, relat, android, ecosystemensur, content, site, consist, high, qualityb, face, voic, urladbddeccedeefeceeaa, knowhow, univers, colleg, degre, journal, media, communic, studiesprofession, experi, relev, field, eg, onlin, editori, communiti, manag, tech, writingpossess, contact, within, key, compani, mobil, industryexperi, lead, amp, motiv, small, teamspass, android, world, hone, write, skill, person, flair, willing, share, android, knowledgestrong, commit, success, motiv, inspir, personalitycomfort, dynam, startup, environmentenglish, nativ, speakerloc, berlin, surround, benefitsb, part, fastgrow, compani, boom, industryfast, decisionmak, thank, flat, hierarchi, clear, structuresfreedom, unfold, idea, amp, creativ, take, respons, right, startcontinu, growth, success, intern, team, thrive, familiar, profession, work, atmospherefre, drink, tabl, tenni, lunch, cater, feel, good, manag, team, event, offic, dog, fulltim, master, degre, onlin, media, manag)","List(head, content, berlin, androidpit, found, thefonpit, agros, intern, web, portalandroidpitto, world, largest, android, communiti, everi, month, million, android, tech, enthusiast, around, world, log, intoandroidpit, know, theyll, find, latest, innov, inform, android, androidapp, differ, languag, busi, activ, consist, close, collabor, mani, world, largest, tech, web, mobil, companiesin, app, mediaamend, fonpit, brand, portfolio, onestop, shop, app, marketingapp, mediadeliv, qualiti, perform, market, app, give, client, success, campaign, possibl, custom, concept, sustain, resultsloc, heart, berlin, constant, look, high, motiv, success, driven, person, helpandroidpitandapp, mediagrow, even, responsibilitiesmanag, englishspeak, editori, team, build, team, bestinclass, editorsset, content, creation, schedul, ensur, deadlin, adher, toresearch, write, latest, tech, topic, news, relat, android, ecosystemensur, content, site, consist, high, qualityb, face, voic, urladbddeccedeefeceeaa, knowhow, univers, colleg, degre, journal, media, communic, studiesprofession, experi, relev, field, onlin, editori, communiti, manag, tech, writingpossess, contact, within, key, compani, mobil, industryexperi, lead, amp, motiv, small, teamspass, android, world, hone, write, skill, person, flair, willing, share, android, knowledgestrong, commit, success, motiv, inspir, personalitycomfort, dynam, startup, environmentenglish, nativ, speakerloc, berlin, surround, benefitsb, part, fastgrow, compani, boom, industryfast, decisionmak, thank, flat, hierarchi, clear, structuresfreedom, unfold, idea, amp, creativ, take, respons, right, startcontinu, growth, success, intern, team, thrive, familiar, profession, work, atmospherefre, drink, tabl, tenni, lunch, cater, feel, good, manag, team, event, offic, dog, fulltim, master, degre, onlin, media, manag)"
0,1,1,0,"List(lead, guest, servic, specialist, , , , us, ca, san, francisco, , airenvi, mission, provid, lucrat, yet, hassl, free, full, servic, short, term, properti, manag, around, world, combin, charm, home, amen, boutiqu, hotelcurr, shortterm, rental, properti, manag, compani, run, ineffici, thus, charg, owner, , month, revenu, use, price, algorithm, crossplatform, list, technolog, goal, increas, rental, incom, , charg, , commiss, provid, turnkey, experiencew, like, think, airenvi, creat, new, way, peopl, becom, excit, properti, manag, love, properti, managersfirst, use, case, airenvi, client, want, convert, longterm, rental, shortterm, rental, , earn, incom, flexibl, stay, place, wantairenvi, friend, next, door, , , custom, videourlefdefaeebcadbcbadccbaceecb, airenvyhey, season, entrepreneur, heart, san, francisco, soma, neighborhood, look, someon, embodi, entrepreneuri, spirit, pay, strong, attent, detail, want, part, next, big, thing, busi, feel, like, circus, time, allstar, team, one, kind, cultur, get, littl, tast, hereairenvi, , technolog, driven, properti, manag, compani, amultibillion, dollar, industryand, revolution, vacat, rental, space, grow, record, speed, expand, new, market, platform, allow, owner, put, vacat, rental, autopilot, proven, team, startup, veteran, love, join, thefamili, , , name, , airbnb, properti, manag, compani, san, francisco, accord, thesf, chronicl, , support, resourc, investor, mani, leader, technolog, real, estat, industriesth, positionw, ultim, peac, keeper, amp, problem, solverairenvi, grow, faster, handl, look, someon, help, us, scale, seek, bestinclass, lead, guest, servic, specialist, passion, delight, guest, owner, youll, play, direct, role, improv, custom, experi, scale, busi, creat, power, brand, advocatesresponsibilitiesservic, first, , interact, guest, owner, daili, listen, address, inquiri, via, phone, email, chatleadership, , set, preced, write, beauti, help, email, get, inboxzero, first, answer, phone, last, giveup, interest, escalationcross, collabor, , act, eye, ear, airenvi, busi, speakto, bug, request, new, featur, influenc, product, positivelyultim, multitask, , your, abl, manag, multipl, daytoday, gift, your, abl, ensur, person, contact, airenvi, posit, experi, even, face, hundr, email, dayyouproven, abil, take, custom, irat, delighted, make, decis, quick, high, sens, urgenc, spill, team, memberspass, delight, peoplethr, pressur, your, proactiv, recogn, solv, issu, ariseexcel, written, verbal, communic, skill, , spot, error, without, spell, checkfocus, defin, scale, busi, thru, playbook, definit, experi, crm, softwar, live, chat, phone, includ, one, year, minimum, custom, serviceyou, heed, call, servic, understand, must, flexibl, schedul, includ, avail, earli, morn, late, even, weekendsholidaysb, super, organ, care, deepli, detailszendesk, experi, bonus, requir, competit, payyoul, abl, eat, steak, everyday, choos, health, insurancew, vitamin, relat, healthi, hope, dont, need, thispiata, partiesy, danc, around, blindfold, swing, stick, awesomesnack, snacksal, cooool, startup, karat, lessonsr, air, ninja, kick, encourag, timesfre, massageswhen, hit, mileston, well, buy, amassag, chairdiscount, gym, membershipget, swole, like, lastemployeeth, best, benefit, allb, part, amaz, teamfamilyclickherefor, insight, interview, process)","List(lead, guest, servic, specialist, san, francisco, airenvi, mission, provid, lucrat, yet, hassl, free, full, servic, short, term, properti, manag, around, world, combin, charm, home, amen, boutiqu, hotelcurr, shortterm, rental, properti, manag, compani, run, ineffici, thus, charg, owner, month, revenu, use, price, algorithm, crossplatform, list, technolog, goal, increas, rental, incom, charg, commiss, provid, turnkey, experiencew, like, think, airenvi, creat, new, way, peopl, becom, excit, properti, manag, love, properti, managersfirst, use, case, airenvi, client, want, convert, longterm, rental, shortterm, rental, earn, incom, flexibl, stay, place, wantairenvi, friend, next, door, custom, videourlefdefaeebcadbcbadccbaceecb, airenvyhey, season, entrepreneur, heart, san, francisco, soma, neighborhood, look, someon, embodi, entrepreneuri, spirit, pay, strong, attent, detail, want, part, next, big, thing, busi, feel, like, circus, time, allstar, team, one, kind, cultur, get, littl, tast, hereairenvi, technolog, driven, properti, manag, compani, amultibillion, dollar, industryand, revolution, vacat, rental, space, grow, record, speed, expand, new, market, platform, allow, owner, put, vacat, rental, autopilot, proven, team, startup, veteran, love, join, thefamili, name, airbnb, properti, manag, compani, san, francisco, accord, thesf, chronicl, support, resourc, investor, mani, leader, technolog, real, estat, industriesth, positionw, ultim, peac, keeper, amp, problem, solverairenvi, grow, faster, handl, look, someon, help, scale, seek, bestinclass, lead, guest, servic, specialist, passion, delight, guest, owner, youll, play, direct, role, improv, custom, experi, scale, busi, creat, power, brand, advocatesresponsibilitiesservic, first, interact, guest, owner, daili, listen, address, inquiri, via, phone, email, chatleadership, set, preced, write, beauti, help, email, get, inboxzero, first, answer, phone, last, giveup, interest, escalationcross, collabor, act, eye, ear, airenvi, busi, speakto, bug, request, new, featur, influenc, product, positivelyultim, multitask, your, abl, manag, multipl, daytoday, gift, your, abl, ensur, person, contact, airenvi, posit, experi, even, face, hundr, email, dayyouproven, abil, take, custom, irat, delighted, make, decis, quick, high, sens, urgenc, spill, team, memberspass, delight, peoplethr, pressur, your, proactiv, recogn, solv, issu, ariseexcel, written, verbal, communic, skill, spot, error, without, spell, checkfocus, defin, scale, busi, thru, playbook, definit, experi, crm, softwar, live, chat, phone, includ, one, year, minimum, custom, serviceyou, heed, call, servic, understand, must, flexibl, schedul, includ, avail, earli, morn, late, even, weekendsholidaysb, super, organ, care, deepli, detailszendesk, experi, bonus, requir, competit, payyoul, abl, eat, steak, everyday, choos, health, insurancew, vitamin, relat, healthi, hope, dont, need, thispiata, partiesy, danc, around, blindfold, swing, stick, awesomesnack, snacksal, cooool, startup, karat, lessonsr, air, ninja, kick, encourag, timesfre, massageswhen, hit, mileston, well, buy, amassag, chairdiscount, gym, membershipget, swole, like, lastemployeeth, best, benefit, allb, part, amaz, teamfamilyclickherefor, insight, interview, process)"
0,1,1,0,"List(hp, bsm, sme, us, fl, pensacola, , solut, womanown, small, busi, whose, focus, servic, manag, use, best, breed, technolog, implement, industri, best, practic, follow, itil, framework, work, extens, area, plan, design, architect, assess, implement, train, technolog, solutionssolut, focus, area, includ, enterpris, network, amp, system, manag, architectur, implement, servic, manag, includ, servic, desk, associ, process, definit, incid, amp, problem, manag, chang, amp, configur, manag, servic, level, manag, configur, manag, databas, cmdb, asset, manag, alertev, manag, focus, best, breed, technolog, follow, industri, best, practic, includ, itil, isow, strong, focus, train, knowledg, transfer, associ, solut, mani, resourc, cours, develop, instructor, mani, vendor, partner, practition, experi, come, train, environ, usw, strong, set, core, valu, base, biblic, principl, includ, great, integr, high, ethic, alway, strive, excel, alway, seek, candid, want, work, amaz, technolog, compani, make, differ, part, someth, great, long, term, implementationconfigurationtestingtrain, onhp, servic, health, report, must, us, citizenan, activ, tssci, clearanc, requiredaddit, toolshp, bsm, applic, nnm, na, omi, omw, sitescop, etc, , beneficialsoft, skill, reqsleadership, strong, written, amp, verbal, communicationbeneficialknowledg, experi, hp, softwar, tool, bigplusaddit, itil, isk, knowledgeexperi, help, , fulltim, , inform, technolog, servic)","List(bsm, sme, pensacola, solut, womanown, small, busi, whose, focus, servic, manag, use, best, breed, technolog, implement, industri, best, practic, follow, itil, framework, work, extens, area, plan, design, architect, assess, implement, train, technolog, solutionssolut, focus, area, includ, enterpris, network, amp, system, manag, architectur, implement, servic, manag, includ, servic, desk, associ, process, definit, incid, amp, problem, manag, chang, amp, configur, manag, servic, level, manag, configur, manag, databas, cmdb, asset, manag, alertev, manag, focus, best, breed, technolog, follow, industri, best, practic, includ, itil, isow, strong, focus, train, knowledg, transfer, associ, solut, mani, resourc, cours, develop, instructor, mani, vendor, partner, practition, experi, come, train, environ, usw, strong, set, core, valu, base, biblic, principl, includ, great, integr, high, ethic, alway, strive, excel, alway, seek, candid, want, work, amaz, technolog, compani, make, differ, part, someth, great, long, term, implementationconfigurationtestingtrain, onhp, servic, health, report, must, citizenan, activ, tssci, clearanc, requiredaddit, toolshp, bsm, applic, nnm, omi, omw, sitescop, etc, beneficialsoft, skill, reqsleadership, strong, written, amp, verbal, communicationbeneficialknowledg, experi, softwar, tool, bigplusaddit, itil, isk, knowledgeexperi, help, fulltim, inform, technolog, servic)"
0,1,0,0,"List(custom, servic, associ, , part, time, , us, az, phoenix, , novitex, enterpris, solut, former, pitney, bow, manag, servic, deliv, innov, document, communic, manag, solut, help, compani, around, world, drive, busi, process, effici, increas, product, reduc, cost, improv, custom, satisfact, almost, , year, client, turn, us, integr, optim, enterprisewid, busi, process, empow, employe, increas, product, maxim, result, trust, partner, continu, focus, deliv, secur, technologyen, document, communic, solut, improv, client, work, process, enhanc, custom, interact, drive, growth, custom, servic, associ, base, phoenix, az, right, candid, integr, part, talent, team, support, continu, growthresponsibilitiesperform, various, mail, center, activ, sort, meter, fold, insert, deliveri, pickup, etclift, heavi, box, file, paper, neededmaintain, highest, level, custom, care, demonstr, friend, cooper, attitudedemonstr, flexibl, satisfi, custom, demand, high, volum, product, environmentconsist, adher, busi, procedur, guidelinesadher, safeti, procedurestak, direct, supervisor, site, managermaintain, log, report, document, attent, detailparticip, crosstrain, perform, duti, assign, file, outgo, shipment, etcoper, mail, copi, scan, equipmentship, amp, receivinghandl, timesensit, materi, like, confidenti, urgent, packagesperform, task, assignedscan, incom, mail, recipientsperform, file, purg, pullscreat, file, ship, filesprovid, backfil, neededent, inform, daili, spreadsheetsidentifi, charg, match, billingsort, deliv, mail, small, packag, minimum, requirementsminimum, , month, custom, servic, relat, experi, requiredhigh, school, diploma, equival, ged, requiredvalid, driver, licens, good, drive, record, requiredpref, qualificationskeyboard, window, environ, pc, skill, requir, word, excel, powerpoint, preferredexperi, run, mail, post, equip, plusexcel, communic, skill, verbal, writtenlift, , lbs, without, accommodationswilling, avail, work, addit, hour, assignedwilling, submit, preemploy, drug, screen, crimin, background, checkabl, effect, work, individu, team, environmentcompet, perform, multipl, function, tasksabl, meet, employ, attend, polici, , parttim, high, school, equival, financi, servic, custom, servic)","List(custom, servic, associ, part, time, phoenix, novitex, enterpris, solut, former, pitney, bow, manag, servic, deliv, innov, document, communic, manag, solut, help, compani, around, world, drive, busi, process, effici, increas, product, reduc, cost, improv, custom, satisfact, almost, year, client, turn, integr, optim, enterprisewid, busi, process, empow, employe, increas, product, maxim, result, trust, partner, continu, focus, deliv, secur, technologyen, document, communic, solut, improv, client, work, process, enhanc, custom, interact, drive, growth, custom, servic, associ, base, phoenix, right, candid, integr, part, talent, team, support, continu, growthresponsibilitiesperform, various, mail, center, activ, sort, meter, fold, insert, deliveri, pickup, etclift, heavi, box, file, paper, neededmaintain, highest, level, custom, care, demonstr, friend, cooper, attitudedemonstr, flexibl, satisfi, custom, demand, high, volum, product, environmentconsist, adher, busi, procedur, guidelinesadher, safeti, procedurestak, direct, supervisor, site, managermaintain, log, report, document, attent, detailparticip, crosstrain, perform, duti, assign, file, outgo, shipment, etcoper, mail, copi, scan, equipmentship, amp, receivinghandl, timesensit, materi, like, confidenti, urgent, packagesperform, task, assignedscan, incom, mail, recipientsperform, file, purg, pullscreat, file, ship, filesprovid, backfil, neededent, inform, daili, spreadsheetsidentifi, charg, match, billingsort, deliv, mail, small, packag, minimum, requirementsminimum, month, custom, servic, relat, experi, requiredhigh, school, diploma, equival, ged, requiredvalid, driver, licens, good, drive, record, requiredpref, qualificationskeyboard, window, environ, skill, requir, word, excel, powerpoint, preferredexperi, run, mail, post, equip, plusexcel, communic, skill, verbal, writtenlift, lbs, without, accommodationswilling, avail, work, addit, hour, assignedwilling, submit, preemploy, drug, screen, crimin, background, checkabl, effect, work, individu, team, environmentcompet, perform, multipl, function, tasksabl, meet, employ, attend, polici, parttim, high, school, equival, financi, servic, custom, servic)"


In [0]:
df_final_words.printSchema()

In [0]:
df_final = df_final_words.dropna(how='any')

In [0]:
df_final.count()

In [0]:
#(train_set, test_set) = df_final.randomSplit([0.70, 0.30], seed = 42)
(train_set, test_set) = df_final.randomSplit([0.70, 0.30], seed = 42)

###Logistic Regression model

In [0]:
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.ml.feature import VectorAssembler,StringIndexer
from pyspark.ml import Pipeline
from pyspark.ml.classification import LogisticRegression

# Generating tf-idf from the text data 

hashingtf = HashingTF(inputCol="words", outputCol='tf_features',numFeatures=1000)
idf = IDF(inputCol='tf_features', outputCol="tfidf_features", minDocFreq=5) #minDocFreq: remove sparse terms
# convert labels (string) to integers. Easy to process compared to string.

# Indexing the Categorical variable

telecommuting_indexer = StringIndexer(inputCol='telecommuting',outputCol='telecommuting_index',handleInvalid='keep')
has_company_logo_indexer = StringIndexer(inputCol='has_company_logo',outputCol='has_company_logo_index',handleInvalid='keep')
has_questions_indexer = StringIndexer(inputCol='has_questions',outputCol='has_questions_index',handleInvalid='keep')


# Vector assembler is used to create a vector of input features

assembler = VectorAssembler(inputCols=['tfidf_features','telecommuting_index','has_company_logo_index','has_questions_index'], outputCol="features")


lr_model = LogisticRegression(labelCol='fraudulent')#performing Logistic regression model on Fradulent categorical variable

#pipeline is used to run in a systematic way
pipeline = Pipeline(stages=[hashingtf, idf, telecommuting_indexer, has_company_logo_indexer, has_questions_indexer,assembler,lr_model])

pipelineFit_lr = pipeline.fit(train_set)

# Transforming the train set
train_data_lr = pipelineFit_lr.transform(train_set)

# Transforming the test set
test_data_lr = pipelineFit_lr.transform(test_set)

train_data_lr.show()

In [0]:
test_data_lr.select(['fraudulent','prediction']).show()

##Evaluating the model

##1. Area under the ROC

In [0]:
from pyspark.ml.evaluation import BinaryClassificationEvaluator
AUC_evaluator = BinaryClassificationEvaluator(rawPredictionCol='prediction',labelCol='fraudulent',metricName='areaUnderROC')
AUC = AUC_evaluator.evaluate(test_data_lr)
print("The area under the curve is {}".format(AUC))

##2. Area under the PR

In [0]:
PR_evaluator = BinaryClassificationEvaluator(rawPredictionCol='prediction',labelCol='fraudulent',metricName='areaUnderPR')
PR = PR_evaluator.evaluate(test_data_lr)
print("The area under the PR curve is {}".format(PR))

##3. Accuracy

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
ACC_evaluator = MulticlassClassificationEvaluator(labelCol="fraudulent", predictionCol="prediction", metricName="accuracy")
accuracy = ACC_evaluator.evaluate(test_data_lr)
print("The accuracy of the model is {}".format(accuracy))

##4. Confusion Matrix

In [0]:
from sklearn.metrics import confusion_matrix
y_true = test_data_lr.select("fraudulent")
y_true = y_true.toPandas()

y_pred = test_data_lr.select("prediction")
y_pred = y_pred.toPandas()

cnf_matrix = confusion_matrix(y_true, y_pred)
print("Below is the confusion matrix \n {}".format(cnf_matrix))

##5.F1 Score

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
f1_evaluator = MulticlassClassificationEvaluator(labelCol="fraudulent", predictionCol="prediction", metricName="f1")
f1 = f1_evaluator.evaluate(test_data_lr)
print("The F1 score of the model is {}".format(f1))

###Naive Bayes Model

In [0]:
df3 = df3.withColumn('fraudulent',col('fraudulent').cast('string'))

In [0]:
df3.printSchema()

In [0]:
from pyspark.ml.classification import NaiveBayes

labelIndexer = StringIndexer(inputCol="fraudulent", outputCol="label")

# Train a NaiveBayes model
nb = NaiveBayes(smoothing=1.0, modelType="multinomial",labelCol='label')

pipeline_nb = Pipeline(stages=[labelIndexer,hashingtf, idf, telecommuting_indexer, has_company_logo_indexer, has_questions_indexer,assembler,nb])

pipelineFit_nb = pipeline_nb.fit(train_set)

# Transforming the train set
test_data_nb = pipelineFit_nb.transform(test_set)


In [0]:
test_data_nb.printSchema()

In [0]:
# Select results to view
test_data_nb.select("label", "prediction", "probability").show(5)

###1.Accuracy

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction",
                                              metricName="accuracy")
accuracy = evaluator.evaluate(test_data_nb)
print ("Model Accuracy: ", accuracy)

###2.Precision

In [0]:
evaluator = MulticlassClassificationEvaluator(
    labelCol="label", predictionCol="prediction", metricName="weightedPrecision")
precision = evaluator.evaluate(test_data_nb)
print("Precision = %g" % (precision))
print("Test Error = %g " % (1.0 - precision))

###3.Recall

In [0]:
evaluator = MulticlassClassificationEvaluator(
    labelCol="label", predictionCol="prediction", metricName="weightedRecall")
recall = evaluator.evaluate(test_data_nb)
print("Recall = %g" % (recall))
print("Test Error = %g " % (1.0 - recall))

##4.Confusion Matrix

In [0]:
from sklearn.metrics import confusion_matrix
y_true = test_data_nb.select("label")
y_true = y_true.toPandas()

y_pred = test_data_nb.select("prediction")
y_pred = y_pred.toPandas()

cnf_matrix = confusion_matrix(y_true, y_pred)
print("Below is the confusion matrix \n {}".format(cnf_matrix))

##5.F1 Score

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
f1_evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="f1")
f1 = f1_evaluator.evaluate(test_data_nb)
print("The F1 score of the model is {}".format(f1))

We have applied two classification models on the data,which are Logistic regression and Naive bayes.Using evaluation metric measures,we can see the F1 score of Logistic Regression model is better than the Naive bayes model.F1 score of the Logistic regression model is = 0.965958829543668 and for Naive bayes model is = 0.9301063792233807.

###Logistic Regression

In [0]:
train = df_final.sampleBy("fraudulent", fractions={0: 0.9, 1: 0.1}, seed=10)
test = df_final.subtract(train)

splitted the data into train and test usning samplby(strartified split),usning fractions 0.9 and 0.1

In [0]:
pipelineFit_lr_new = pipeline.fit(train)

# Transforming the test set
test_data_lr_new = pipelineFit_lr_new.transform(test)


In [0]:
test_data_lr_new.select(['fraudulent','prediction']).show()

###Confusion Matrix

In [0]:
from sklearn.metrics import confusion_matrix
y_true = test_data_lr_new.select("fraudulent")
y_true = y_true.toPandas()

y_pred = test_data_lr_new.select("prediction")
y_pred = y_pred.toPandas()

cnf_matrix = confusion_matrix(y_true, y_pred)
print("Below is the confusion matrix \n {}".format(cnf_matrix))

###F1 Score

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
f1_evaluator = MulticlassClassificationEvaluator(labelCol="fraudulent", predictionCol="prediction", metricName="f1")
f1 = f1_evaluator.evaluate(test_data_lr_new)
print("The F1 score of the model is {}".format(f1))

The fraudulent column count is imbalanced,so we applied startified split on the dataset.Fradulent values percentage has mentioned in the fraction field. for 0 the fraction value is 0.9 and for 1 it is 0.1. For the imbalanced data, best evaluation metric f1 score and confusion matrix.F1 score value is  = 0.8100757877166505 which is less compared to the other logistic rgerssion model value,because % of 1 values were taken very low.

###Models Saving

In [0]:
pipelineFit_nb.save('/FileStore/tables/NaivebayesModelold')
pipelineFit_lr.save('/FileStore/tables/LogisticRegressionModelold')
pipelineFit_lr_new.save('/FileStore/tables/LogisticRegressionModelnew')

##Reference
#####https://stackoverflow.com/questions/53579444/efficient-text-preprocessing-using-pyspark-clean-tokenize-stopwords-stemming
#####https://towardsdatascience.com/natural-language-processing-with-pyspark-and-spark-nlp-b5b29f8faba