# Classifying Job Postings

## Building the Model

This is the file with the final model that is used for the app.

In [16]:
# Importing packages
import numpy as np
import pandas as pd
import sklearn
import imblearn
import string
import spacy

from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

from sklearn import preprocessing
from sklearn.base import TransformerMixin
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from imblearn.under_sampling import RandomUnderSampler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from datetime import datetime

In [2]:
# Reading the data and exploring it
df = pd.read_csv("fake_job_postings.csv")

In [4]:
# Replacing NA data
df.fillna(' ', inplace=True)

# Combining all textual data
df['text_data'] = ( df['title'] + ' ' +
                    df['company_profile'] + ' ' + 
                    df['description'] + ' ' + 
                    df['requirements'] + ' ' + 
                    df['benefits']
                  )

df.drop(columns =['title', 'company_profile', 'description', 'requirements', 'benefits'], inplace = True)

In [5]:
# Creating balanced dataframe
y = df['fraudulent']
x, y = df.drop(['fraudulent'], axis=1), df['fraudulent']

rus = RandomUnderSampler(random_state=100, replacement=True)
x, y = rus.fit_resample(x, y)

# Removing columns that will not be needed
x = x['text_data']

# Splitting training, testing, validation datasets
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.30)
x_validation, x_test, y_validation, y_test = train_test_split(x_temp, y_temp, test_size=0.50)

In [6]:
# Using to track how long the model is taking to run
global count
count = 0

In [7]:
# List of punctuations
# Create our list of punctuation marks
punctuations = string.punctuation

# Create our list of stopwords
nlp = spacy.load('en_core_web_sm')
stop_words = spacy.lang.en.stop_words.STOP_WORDS

# Load English tokenizer, tagger, parser, NER and word vectors
parser = English()

# Rewritten tokenizer function
def spacy_tokenizer(sentence):
    # Creating our token object, which is used to create documents with linguistic annotations.
    mytokens = parser(sentence)

    new_mytokens = []
    # Lemmatizing each token and converting each token into lowercase
    for word in mytokens:
        word_str = str(word)
        if word_str not in stop_words and word_str not in punctuations:
            doc = nlp(word_str)
            if doc[0].lemma_ != "-PRON-":
                new_mytokens.append(doc[0].lemma_.lower().strip())
            else:
                new_mytokens.append(doc[0].lower_)

    # return preprocessed list of tokens
    global count
    count += 1
    print(f'Finished loop #{count}', datetime.now())
    return new_mytokens

# Further cleaning of text data
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        # Cleaning Text
        cleaned = [clean_text(text) for text in X]
        print("Finished cleaning", datetime.now())
        return cleaned

    def fit(self, X, y=None, **fit_params):
        return self

    def get_params(self, deep=True):
        return {}

def clean_text(text):
    return text.strip().lower()

In [8]:
# Vectorizers
vectorizer = CountVectorizer(tokenizer = spacy_tokenizer, ngram_range=(1,1))
tfidf_vector = TfidfVectorizer(tokenizer = spacy_tokenizer)

In [9]:
lr = LogisticRegression(penalty=None, max_iter=1000)

In [10]:
pipe = Pipeline([("cleaner", predictors()),
                 ('vectorizer', vectorizer),
                 ('classifier', lr)])

In [11]:
pipe.fit(x_train, y_train)

Finished cleaning 2022-12-13 21:35:40.308896




Finished loop #1 2022-12-13 21:35:42.141071
Finished loop #2 2022-12-13 21:35:43.992002
Finished loop #3 2022-12-13 21:35:45.521324
Finished loop #4 2022-12-13 21:35:45.915583
Finished loop #5 2022-12-13 21:35:47.090700
Finished loop #6 2022-12-13 21:35:47.974292
Finished loop #7 2022-12-13 21:35:48.628416
Finished loop #8 2022-12-13 21:35:51.827250
Finished loop #9 2022-12-13 21:35:52.663708
Finished loop #10 2022-12-13 21:35:52.996551
Finished loop #11 2022-12-13 21:35:53.099536
Finished loop #12 2022-12-13 21:35:54.264703
Finished loop #13 2022-12-13 21:35:55.423477
Finished loop #14 2022-12-13 21:35:56.143650
Finished loop #15 2022-12-13 21:35:57.517639
Finished loop #16 2022-12-13 21:35:58.156168
Finished loop #17 2022-12-13 21:36:00.844240
Finished loop #18 2022-12-13 21:36:04.447535
Finished loop #19 2022-12-13 21:36:05.281900
Finished loop #20 2022-12-13 21:36:06.441023
Finished loop #21 2022-12-13 21:36:07.243127
Finished loop #22 2022-12-13 21:36:07.715417
Finished loop #23 2

Finished loop #182 2022-12-13 21:39:43.893904
Finished loop #183 2022-12-13 21:39:44.013849
Finished loop #184 2022-12-13 21:39:44.831336
Finished loop #185 2022-12-13 21:39:45.098641
Finished loop #186 2022-12-13 21:39:45.552552
Finished loop #187 2022-12-13 21:39:46.064323
Finished loop #188 2022-12-13 21:39:47.203877
Finished loop #189 2022-12-13 21:39:49.682120
Finished loop #190 2022-12-13 21:39:50.000902
Finished loop #191 2022-12-13 21:39:51.855741
Finished loop #192 2022-12-13 21:39:52.487545
Finished loop #193 2022-12-13 21:39:54.947055
Finished loop #194 2022-12-13 21:39:55.668137
Finished loop #195 2022-12-13 21:39:55.908989
Finished loop #196 2022-12-13 21:39:56.499636
Finished loop #197 2022-12-13 21:39:57.642508
Finished loop #198 2022-12-13 21:39:58.057830
Finished loop #199 2022-12-13 21:39:59.842547
Finished loop #200 2022-12-13 21:40:00.729619
Finished loop #201 2022-12-13 21:40:01.831714
Finished loop #202 2022-12-13 21:40:02.814096
Finished loop #203 2022-12-13 21:4

Finished loop #361 2022-12-13 21:44:11.185410
Finished loop #362 2022-12-13 21:44:12.457472
Finished loop #363 2022-12-13 21:44:13.334149
Finished loop #364 2022-12-13 21:44:13.608387
Finished loop #365 2022-12-13 21:44:14.616085
Finished loop #366 2022-12-13 21:44:16.429577
Finished loop #367 2022-12-13 21:44:18.248226
Finished loop #368 2022-12-13 21:44:18.626960
Finished loop #369 2022-12-13 21:44:19.356139
Finished loop #370 2022-12-13 21:44:20.399245
Finished loop #371 2022-12-13 21:44:21.241107
Finished loop #372 2022-12-13 21:44:22.362076
Finished loop #373 2022-12-13 21:44:23.331109
Finished loop #374 2022-12-13 21:44:25.329948
Finished loop #375 2022-12-13 21:44:27.469298
Finished loop #376 2022-12-13 21:44:27.902785
Finished loop #377 2022-12-13 21:44:29.853460
Finished loop #378 2022-12-13 21:44:31.237582
Finished loop #379 2022-12-13 21:44:32.437202
Finished loop #380 2022-12-13 21:44:32.744130
Finished loop #381 2022-12-13 21:44:34.077837
Finished loop #382 2022-12-13 21:4

Finished loop #540 2022-12-13 21:48:14.583641
Finished loop #541 2022-12-13 21:48:15.235860
Finished loop #542 2022-12-13 21:48:16.001132
Finished loop #543 2022-12-13 21:48:18.365206
Finished loop #544 2022-12-13 21:48:19.218944
Finished loop #545 2022-12-13 21:48:21.525812
Finished loop #546 2022-12-13 21:48:22.533680
Finished loop #547 2022-12-13 21:48:25.141476
Finished loop #548 2022-12-13 21:48:25.841966
Finished loop #549 2022-12-13 21:48:27.417935
Finished loop #550 2022-12-13 21:48:27.603515
Finished loop #551 2022-12-13 21:48:27.942466
Finished loop #552 2022-12-13 21:48:28.874070
Finished loop #553 2022-12-13 21:48:32.196937
Finished loop #554 2022-12-13 21:48:33.018907
Finished loop #555 2022-12-13 21:48:34.522126
Finished loop #556 2022-12-13 21:48:36.997914
Finished loop #557 2022-12-13 21:48:38.106685
Finished loop #558 2022-12-13 21:48:38.320090
Finished loop #559 2022-12-13 21:48:38.745707
Finished loop #560 2022-12-13 21:48:41.378674
Finished loop #561 2022-12-13 21:4

Finished loop #719 2022-12-13 21:52:32.560415
Finished loop #720 2022-12-13 21:52:34.576622
Finished loop #721 2022-12-13 21:52:35.270358
Finished loop #722 2022-12-13 21:52:36.258506
Finished loop #723 2022-12-13 21:52:37.553765
Finished loop #724 2022-12-13 21:52:39.645972
Finished loop #725 2022-12-13 21:52:40.255588
Finished loop #726 2022-12-13 21:52:41.635091
Finished loop #727 2022-12-13 21:52:42.187210
Finished loop #728 2022-12-13 21:52:43.417946
Finished loop #729 2022-12-13 21:52:43.885722
Finished loop #730 2022-12-13 21:52:44.817252
Finished loop #731 2022-12-13 21:52:47.526176
Finished loop #732 2022-12-13 21:52:48.081163
Finished loop #733 2022-12-13 21:52:49.020346
Finished loop #734 2022-12-13 21:52:50.652110
Finished loop #735 2022-12-13 21:52:51.851537
Finished loop #736 2022-12-13 21:52:52.619922
Finished loop #737 2022-12-13 21:52:54.702856
Finished loop #738 2022-12-13 21:52:55.614445
Finished loop #739 2022-12-13 21:52:55.710963
Finished loop #740 2022-12-13 21:5

Finished loop #898 2022-12-13 21:57:07.829511
Finished loop #899 2022-12-13 21:57:09.326579
Finished loop #900 2022-12-13 21:57:09.409011
Finished loop #901 2022-12-13 21:57:10.370145
Finished loop #902 2022-12-13 21:57:11.997977
Finished loop #903 2022-12-13 21:57:12.951794
Finished loop #904 2022-12-13 21:57:17.032756
Finished loop #905 2022-12-13 21:57:17.659005
Finished loop #906 2022-12-13 21:57:18.842740
Finished loop #907 2022-12-13 21:57:19.129267
Finished loop #908 2022-12-13 21:57:20.246653
Finished loop #909 2022-12-13 21:57:24.391522
Finished loop #910 2022-12-13 21:57:25.454437
Finished loop #911 2022-12-13 21:57:28.083457
Finished loop #912 2022-12-13 21:57:29.230645
Finished loop #913 2022-12-13 21:57:31.508605
Finished loop #914 2022-12-13 21:57:32.200366
Finished loop #915 2022-12-13 21:57:33.339471
Finished loop #916 2022-12-13 21:57:33.983128
Finished loop #917 2022-12-13 21:57:36.580926
Finished loop #918 2022-12-13 21:57:37.907285
Finished loop #919 2022-12-13 21:5

Finished loop #1075 2022-12-13 22:01:17.255845
Finished loop #1076 2022-12-13 22:01:21.910546
Finished loop #1077 2022-12-13 22:01:22.593651
Finished loop #1078 2022-12-13 22:01:23.298352
Finished loop #1079 2022-12-13 22:01:24.885191
Finished loop #1080 2022-12-13 22:01:25.068748
Finished loop #1081 2022-12-13 22:01:25.291161
Finished loop #1082 2022-12-13 22:01:27.802812
Finished loop #1083 2022-12-13 22:01:28.369391
Finished loop #1084 2022-12-13 22:01:29.649605
Finished loop #1085 2022-12-13 22:01:30.966202
Finished loop #1086 2022-12-13 22:01:32.522227
Finished loop #1087 2022-12-13 22:01:33.775883
Finished loop #1088 2022-12-13 22:01:34.948706
Finished loop #1089 2022-12-13 22:01:35.659120
Finished loop #1090 2022-12-13 22:01:38.420231
Finished loop #1091 2022-12-13 22:01:41.143150
Finished loop #1092 2022-12-13 22:01:41.990391
Finished loop #1093 2022-12-13 22:01:42.339641
Finished loop #1094 2022-12-13 22:01:42.967337
Finished loop #1095 2022-12-13 22:01:45.417041
Finished loop

In [13]:
y_test_pred = pipe.predict(x_test)
y_test_prob = pipe.predict_proba(x_test)

print("% match is " ,sum(y_test == y_test_pred)/len(y_test))

def metrics(y_true, y_pred):
    print('Confusion matrix:\n', confusion_matrix(y_true, y_pred))
    print('\nReport:\n', classification_report(y_true, y_pred))
    
y_pred = pipe.predict(x_validation) 
metrics(y_validation, y_pred)

Finished cleaning 2022-12-13 22:16:02.968252
Finished loop #1733 2022-12-13 22:16:04.384223
Finished loop #1734 2022-12-13 22:16:04.718324
Finished loop #1735 2022-12-13 22:16:07.651080
Finished loop #1736 2022-12-13 22:16:07.975439
Finished loop #1737 2022-12-13 22:16:08.355944
Finished loop #1738 2022-12-13 22:16:10.334282
Finished loop #1739 2022-12-13 22:16:13.093463
Finished loop #1740 2022-12-13 22:16:14.703021
Finished loop #1741 2022-12-13 22:16:14.995188
Finished loop #1742 2022-12-13 22:16:15.458869
Finished loop #1743 2022-12-13 22:16:15.966664
Finished loop #1744 2022-12-13 22:16:16.139533
Finished loop #1745 2022-12-13 22:16:16.335793
Finished loop #1746 2022-12-13 22:16:16.809512
Finished loop #1747 2022-12-13 22:16:17.549650
Finished loop #1748 2022-12-13 22:16:18.872509
Finished loop #1749 2022-12-13 22:16:22.095020
Finished loop #1750 2022-12-13 22:16:25.877457
Finished loop #1751 2022-12-13 22:16:26.108808
Finished loop #1752 2022-12-13 22:16:28.301880
Finished loop #

Finished loop #1907 2022-12-13 22:20:06.172608
Finished loop #1908 2022-12-13 22:20:07.929037
Finished loop #1909 2022-12-13 22:20:08.344399
Finished loop #1910 2022-12-13 22:20:09.037775
Finished loop #1911 2022-12-13 22:20:12.757314
Finished loop #1912 2022-12-13 22:20:14.011054
Finished loop #1913 2022-12-13 22:20:16.555699
Finished loop #1914 2022-12-13 22:20:17.002875
Finished loop #1915 2022-12-13 22:20:17.178584
Finished loop #1916 2022-12-13 22:20:17.977565
Finished loop #1917 2022-12-13 22:20:22.893889
Finished loop #1918 2022-12-13 22:20:24.857912
Finished loop #1919 2022-12-13 22:20:26.516762
Finished loop #1920 2022-12-13 22:20:28.388412
Finished loop #1921 2022-12-13 22:20:28.580638
Finished loop #1922 2022-12-13 22:20:29.692861
Finished loop #1923 2022-12-13 22:20:30.963614
Finished loop #1924 2022-12-13 22:20:31.457594
Finished loop #1925 2022-12-13 22:20:32.374826
Finished loop #1926 2022-12-13 22:20:33.222182
Finished loop #1927 2022-12-13 22:20:35.927581
Finished loop

In [17]:
# Predicting with a test dataset
predicted = pipe.predict(x_test)

# Model Accuracy
print("Logistic Regression Accuracy:",metrics.accuracy_score(y_test, predicted))
print("Logistic Regression Precision:",metrics.precision_score(y_test, predicted))
print("Logistic Regression Recall:",metrics.recall_score(y_test, predicted))

Logistic Regression Accuracy: 0.8961538461538462
Logistic Regression Precision: 0.8560606060606061
Logistic Regression Recall: 0.9338842975206612


In [18]:
# Validation dataset
predicted = pipe.predict(x_validation)

# Model Accuracy
print("Logistic Regression Accuracy:",metrics.accuracy_score(y_validation, predicted))
print("Logistic Regression Precision:",metrics.precision_score(y_validation, predicted))
print("Logistic Regression Recall:",metrics.recall_score(y_validation, predicted))

Finished cleaning 2022-12-13 22:42:26.159597
Finished loop #2253 2022-12-13 22:42:28.159518
Finished loop #2254 2022-12-13 22:42:28.604151
Finished loop #2255 2022-12-13 22:42:31.195590
Finished loop #2256 2022-12-13 22:42:31.608280
Finished loop #2257 2022-12-13 22:42:32.279833
Finished loop #2258 2022-12-13 22:42:34.661293
Finished loop #2259 2022-12-13 22:42:36.874115
Finished loop #2260 2022-12-13 22:42:38.241410
Finished loop #2261 2022-12-13 22:42:38.483270
Finished loop #2262 2022-12-13 22:42:39.005204
Finished loop #2263 2022-12-13 22:42:39.772531
Finished loop #2264 2022-12-13 22:42:40.256674
Finished loop #2265 2022-12-13 22:42:40.745448
Finished loop #2266 2022-12-13 22:42:41.337056
Finished loop #2267 2022-12-13 22:42:41.973037
Finished loop #2268 2022-12-13 22:42:42.644374
Finished loop #2269 2022-12-13 22:42:45.872455
Finished loop #2270 2022-12-13 22:42:48.272443
Finished loop #2271 2022-12-13 22:42:48.548077
Finished loop #2272 2022-12-13 22:42:50.111576
Finished loop #

Finished loop #2427 2022-12-13 22:46:29.270781
Finished loop #2428 2022-12-13 22:46:30.958639
Finished loop #2429 2022-12-13 22:46:31.550376
Finished loop #2430 2022-12-13 22:46:32.385464
Finished loop #2431 2022-12-13 22:46:35.980691
Finished loop #2432 2022-12-13 22:46:37.090853
Finished loop #2433 2022-12-13 22:46:39.563257
Finished loop #2434 2022-12-13 22:46:40.445285
Finished loop #2435 2022-12-13 22:46:40.542978
Finished loop #2436 2022-12-13 22:46:41.685677
Finished loop #2437 2022-12-13 22:46:45.079550
Finished loop #2438 2022-12-13 22:46:46.639986
Finished loop #2439 2022-12-13 22:46:47.863957
Finished loop #2440 2022-12-13 22:46:48.971091
Finished loop #2441 2022-12-13 22:46:49.124148
Finished loop #2442 2022-12-13 22:46:49.745433
Finished loop #2443 2022-12-13 22:46:50.339586
Finished loop #2444 2022-12-13 22:46:50.746868
Finished loop #2445 2022-12-13 22:46:51.277767
Finished loop #2446 2022-12-13 22:46:51.775628
Finished loop #2447 2022-12-13 22:46:53.685227
Finished loop

In [19]:
# Function to combine all textual data
def combine_text(df):
    df.fillna(' ', inplace=True)

    # Combining all textual data
    df['text_data'] = ( df['title'] + ' ' +
                        df['company_profile'] + ' ' + 
                        df['description'] + ' ' + 
                        df['requirements'] + ' ' + 
                        df['benefits']
                      )

    df.drop(columns =['title', 'company_profile', 'description', 'requirements', 'benefits'], inplace = True)
    
    return df

# Example of predicting with the model
query_df = pd.DataFrame([{'title' : 'Senior Engineering Product Manager', 
                          'company_profile' : 'Aptitude Staffing Solutions has redesigned the recruiting wheel. Our innovative new platform cuts the recruiting time in half, yields scientifically-proven results and clients and candidates enjoy a pleasant experience through advanced, simple to use technology and a tenured, industry-experienced recruiting team. Join us in a fresh new experience of leveraging your career...the way it should be!Â All represented candidates enjoy the following perks:Expert negotiations, maximizing total compensation packageÂ Signing bonus by Aptitude Staffing in addition to client signing bonus (if applicable)1 Year access to AnyPerkRelocation Services for out of town candidatesContinued education in your area of profession, seminars, workshops and other skill development eventsÂ Contract employees receive quarterly bonuses for the duration of their projectÂ Direct-Hire employees receive double bonues ($2,000) per referred/recruited candidate into their newly appointed companyAll candidates are encouraged to participate in ourÂ Referral Bonus ProgramÂ &amp; earnÂ $500 - $1,000Â per hired referralÂ  Â  Â  Â  Â  Â Â ', 
                          'description' : 'Senior Engineering Product ManagerAs a member of the Service Provider Engineering team, you will be responsible for managing the development of broadband DSL products targeted at service providers that provide Home Gateway, voice gateways, wireless adapters, IPTV and Voice over IP (VoIP) services. These products will incorporate one or several of the following technologies: DSL, Routers, 802.11 Wireless, VPN, Firewalls.Â Â The primary function of an engineering product manager is project management and technical oversight. The individual manages the ODM to ensure that products are delivered in a timely fashion to a high quality and in accordance with the procedures and systems. In addition the individual oversees the activities between the component vendors and the ODM. Within the service provider group, another important project management aspect is working with the sales engineers to get products approved at service providers. The individual must manage the technical issues that arise during the approval process and ensure a rapid resolution by working with and through the sales engineers, ODMs and Component vendors.Â A word from the Recruiter:Â "Some of the exciting things that retain engineers here are the new technologies that we integrate into our products. Most People hear about these technologies 9 months after the product development has launched,Â our engineers actually start learning about the new technologies in advance of any silicon introductions.Â In most cases, we are an early partner to silicon vendors and help guide the silicon features. This is extremely rare for many of the new members,Â once they do the first project they want more and more.Â As you can imagine the value of that engineer is much greater then when they walked in the door."Â "Most engineers that I interview have been disappointed by their employers, and when asked how much ownership they have of the product they developed, the answer is always something like : I only did this this portion of the software, or some portion of the chip, or some portion of the hardware. Not much of an empowerment here. Â Here, theÂ PE is the complete owner of every molecule of the product,Â including mechanical, operation, finance, cost, etc. This empowerment is another added value boost an engineer could benefit from,Â this value is rare and will push the engineer in the direction of someday running a business end to end with a larger responsibility and scope.Â The more of these projects they take the faster they reach that level of career growth and maturity."Â This is what not shared on the JD.Â "How you will mature your career."Â Job Responsibilities:Â â€¢ For assigned projects, take overall responsibility for delivering the product to production; including requirements generation, hardware, firmware, documentation, regulatory testing and Agile release requirements, per Netgear policies. It is expected that projects should be managed with minimal supervision.Â â€¢ Project management. Develop and maintain project plans. Keep all stakeholders informed of product status at all timesâ€¢ Project Execution. Manage the suppliers\' (ODM, chip vendor, etc.) product development process.Â â€¢ Product Verification. Manage verification testing of product features and performance.Â â€¢ Customer Certification. Manage qualification of products with customers.â€¢ Documentation. Provide input for manuals, help files, application notes, marketing materials and tech support.â€¢ Support. Resolve any 3 rd level technical support escalations.Â â€¢ Track industry technology developments, supplier roadmaps, standards bodies and make product recommendations', 
                          'requirements' : 'Skills and RequirementsÂ MSEE with 5 years of experience in customer premise equipment (CPE) or high volume product developmentDSL Broadband CPE product development experience required802.11 or wireless broadband product development experience requiredManagement of engineering team experience is requiredExperience with developing 3G/4G devices is desiredExperience certifying products with large service providers a plusExperience in complete development life cycle of technology productsSystems engineering background with significant hands-on experience in product development.In depth Domain knowledge in IP networking for Home gatewaysEffective organization, project management and coordination skills; ability to get the job done.Ability and willingness to take ownership of products; get the job done with minimal supervision.Teamwork. This position requires a close working relationship with the extended Netgear team, including engineering, operations, sales, customer support, marketing and program management.Â Good communication skills, both written and verbal.Experience working in a start-up environment.Self-starter that works well on a team.', 
                          'benefits' : 'What is offered:Broad responsibility, autonomy and visibility in an Engineering role.In-depth exposure to real-world customer issues across a global customer baseSmall-company feel in a growth environmentOpportunity for executive advancementVery competitive compensation packageSignificant stake in equity, stock optionsAttractive bonus programFull benefits package including generous retirement contributionsOpportunity for executive advancementReputable, renowned world-class leadership*Offering $1,000 Referral Bonus for each successful referral.Please contact: Darren Lawson directly at: #PHONE_90d33c9d7ec1484aebfe37b153d677decc6f5f53b316489ed24061544c04eb66# or #EMAIL_f4da338e899ddba983ac771b001681d1d2d93b3327ddc420a15f4e5a310071a9#Â #URL_99f46a2efd6ad483a11b40eef7a406a29de60d77be6dcb56289f26bd039c1017#Â'}])
query_df = combine_text(query_df)

                                           text_data
0  Senior Engineering Product Manager Aptitude St...


In [30]:
pred = pipe.predict(query_df)
pred_prob = pipe.predict_proba(query_df)
print(pred, pred_prob)

4       Customer Service Specialist in our Promotions ...
574     Customer Service Associate - Part Time  Novite...
504     Director of Marketing If you’re looking to sta...
1518    Account Sales Managers $80-$130,000/yr We have...
1549    Data Entry Clerk / Administrative Assistant   ...
                              ...                        
219     Front of House ustwo offers you the opportunit...
791     Flash  Developer Gaming Massive Media is the s...
744     Graduates: English Teacher Abroad (Conversatio...
1664    Admin Clerk,Office Assistant,Customer   Experi...
34      Senior Game Designer Magmic is a leading devel...
Name: text_data, Length: 260, dtype: object
Finished cleaning 2022-12-13 22:56:36.664648
Finished loop #2525 2022-12-13 22:56:36.675481
Finished cleaning 2022-12-13 22:56:36.677578
Finished loop #2526 2022-12-13 22:56:36.686390
[1] [[1.21213044e-04 9.99878787e-01]]


## Exporting Model

In [28]:
import joblib
# modify the file path to where you want to save the model
joblib.dump(pipe, '../app/model.pkl')

['../app/model.pkl']

In [32]:
import pickle
pickle.dump(pipe, open('../app/model.pkl', 'wb'))