In [1]:
#import all necessary packages
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import requests 

In [2]:
#read in udemy_df
udemy_df = pd.read_csv('udemy_master_df.csv')
job_descriptions_df = pd.read_excel('job_descriptions.xlsx')

In [3]:
#remove all null values
udemy_df = udemy_df[udemy_df['Summary'].notna()]

In [4]:
# grouping by title to remove duplicates
job_descriptions_df = job_descriptions_df.groupby(['Title'])['Task'].apply(','.join).reset_index()

In [5]:
# grouping by title to remove duplicates
udemy_df = udemy_df.groupby(['Title'])['Summary'].apply(','.join).reset_index()

In [6]:
# creating copies of the datasets
job_df = job_descriptions_df.copy()
course_df = udemy_df.copy()

In [36]:
#complete keyword extraction and create word vectors from course_df using tfidfvectorizer

course_df = course_df[['Title','Summary']]

course_df  = course_df.drop_duplicates()

tfidfvec = TfidfVectorizer(
            max_df=0.7,   # note: % of docs in collection
            max_features = 10000, # only top 10k by freq,
            lowercase = True, 
            min_df=2,  # note: absolute count of documents
            stop_words="english",
            ngram_range = (1,2), # include 2-word phrases
            use_idf=True, # Enable inverse-document-frequency reweighting. If False, idf(t) = 1.
            )

vectorized_data = tfidfvec.fit_transform(course_df['Summary'])

tfidf_df = pd.DataFrame(vectorized_data.toarray(), columns=tfidfvec.get_feature_names())

tfidf_df.index = course_df['Title']

cosine_similarity_array = cosine_similarity(tfidf_df)

cosine_similarity_df = pd.DataFrame(cosine_similarity_array, columns=tfidf_df.index,index=tfidf_df.index)



KernelInterrupted: Execution interrupted by the Jupyter kernel.

In [37]:
#vectorizing the job titles for cosine similarity comparison
tfidfvec2 = TfidfVectorizer()

vectorized_data2 = tfidfvec2.fit_transform(job_df['Title'])

In [38]:
#vectorizing the course summaries for cosine similarity comparison

tfidfvec3 = TfidfVectorizer(
            lowercase = True, 
            ngram_range = (1,2), # include 2-word phrases
            )

vectorized_data3 = tfidfvec3.fit_transform(course_df['Summary'])

Search for: Customer Service Representative
Customer service representatives, customer service advisors, or customer service associates interact with customers to handle complaints, process orders, and provide information about an organization's products and services. Customer service representatives answer questions or requests from customers or the public. They typically provide services by phone, but some also interact with customers face to face, by email or text, via live chat, and through social media.Qualifications include good communication, problem-solving, and computer skills.


In [1]:
def recommender(job_name,num):

    """This function takes in a job name and number as input and checks whether the job name entered
    is contained in the job dataframe titles. If it is contained in the data frame, it returns course
    recommendations for the job. If not,it searches the internet for a description of the job entered
    and if found, uses this description to match with course descriptions and returns course recommendations
    """
    
    if job_name in list(job_df['Title'].values):
        
        #get similarity between job name input and title
        query_vec = tfidfvec2.transform([job_name]) #vectorize user input job name
        similarity = cosine_similarity(query_vec, vectorized_data2).flatten() # calculate similarity with vectorized title
        indices = np.argpartition(similarity, -1)[-1:] # get 5 most similar indices
        #get the most similar summary
        #print(job_df.iloc[indices][::-1])
        job_summary = job_df.iloc[indices][::-1]['Task'].values[0] # make sure most similar is the first row and select task (summary)
      
        #get cosine similarity between course summary and job summary
        query_vec = tfidfvec3.transform([job_summary]) # vectorize the summary
        similarity = cosine_similarity(query_vec, vectorized_data3).flatten() # calculate similarity with vectorized course summary
        indices = np.argpartition(similarity, -1)[-5:]
        
        # get course name
        course_name = course_df.iloc[indices][::-1]['Title'].values[0]
        print(course_name)
        cosine_similarity_series = cosine_similarity_df.loc[course_name]
       
        ordered_similarities = cosine_similarity_series.sort_values(ascending=False)

        course_list = ordered_similarities[0:num]
        
    else:
        print('searching internet')
        response = requests.get(f"https://duckduckgo.com/?q={job_name}&va=b&t=hc&ia=web&format=json")
        results = response.json()
        information = results["AbstractText"]
        text = results["RelatedTopics"][0]["Text"]
        
        if information == "":
            job_summary = text
        else:
            job_summary = information
         
        
        #get cosine similarity between course summary and job summary
        query_vec = tfidfvec3.transform([job_summary]) # vectorize the summary
        similarity = cosine_similarity(query_vec, vectorized_data3).flatten() # calculate similarity with vectorized course summary
        indices = np.argpartition(similarity, -1)[-5:]
        
        # get course name
        course_name = course_df.iloc[indices][::-1]['Title'].values[0]
        
        cosine_similarity_series = cosine_similarity_df.loc[course_name]
        
        ordered_similarities = cosine_similarity_series.sort_values(ascending=False)

        course_list = ordered_similarities[0:num]
    
    return course_list


In [40]:
print(recommender('Registered Nurse',6))

searching internet
Title
Working knowledge of Accounting practice                        1.000000
SEO, SMM & Marketing Terminology Testing                        0.529150
Pneumatic Control                                               0.451113
CONNECT - Become Influential and Connect with Anyone            0.391254
Formal Languages & Finite State Automata: From the Beginning    0.351397
Spiritual Insights for Businesses, Leaders and Entrepreneurs    0.324600
Name: Working knowledge of Accounting practice, dtype: float64


In [41]:
recommender('Customer Service Representatives',5)

Sales Funnel Mastery For Digital Products With Clickfunnels


Title
Sales Funnel Mastery For Digital Products With Clickfunnels    1.000000
How to Create An Information Product                           0.451075
The Easy Information Product Creation System                   0.426412
Guy Kawasaki’s The Art of Evangelism                           0.350777
Remarkable Product Masterclass                                 0.330421
Name: Sales Funnel Mastery For Digital Products With Clickfunnels, dtype: float64

In [42]:
recommender('Project Manager',5)

searching internet


Title
Project Management Essentials                                  1.000000
Project Management Skills for Non Project Managers             0.622389
Project Management Fundamentals: Crash Course for Beginners    0.594502
Executive Strategy & Management                                0.569771
Project Management (PMP) & Management Skills: Pmbok, Scrum     0.525979
Name: Project Management Essentials, dtype: float64

In [43]:
recommender('Customer Service Representatives',5)

Sales Funnel Mastery For Digital Products With Clickfunnels


Title
Sales Funnel Mastery For Digital Products With Clickfunnels    1.000000
How to Create An Information Product                           0.451075
The Easy Information Product Creation System                   0.426412
Guy Kawasaki’s The Art of Evangelism                           0.350777
Remarkable Product Masterclass                                 0.330421
Name: Sales Funnel Mastery For Digital Products With Clickfunnels, dtype: float64

In [58]:
recommender('Data Scientist',5)

searching internet


Title
Data Science & ML for Python-Python & Data Science Made Easy    1.000000
Learn Data Science Basics                                       0.784355
Complete Data Science Training with Python for Data Analysis    0.722774
Learn Data Science From Scratch                                 0.714459
Data Science and Machine Learning Masterclass with R            0.698815
Name: Data Science & ML for Python-Python & Data Science Made Easy, dtype: float64

In [None]:
recommender('DevOps Engineer', 5)

In [None]:
recommender('Driver', 5)

In [59]:
recommender('administrative assistant',5)

searching internet


Title
Fundamentals of Administrative Professional    1.000000
Angular4 for beginners: Learn from scratch     0.491300
Better Business Writing Skills                 0.462754
Art of Facebook Marketing                      0.460805
Media Training and Interview Fundamentals      0.404912
Name: Fundamentals of Administrative Professional, dtype: float64

In [75]:
job_df[job_df['Title']=='Customer Service Representatives']['Task'].values

array(["Confer with customers by telephone or in person to provide information about products or services, take or enter orders, cancel accounts, or obtain details of complaints.,Keep records of customer interactions or transactions, recording details of inquiries, complaints, or comments, as well as actions taken.,Check to ensure that appropriate changes were made to resolve customers' problems.,Contact customers to respond to inquiries or to notify them of claim investigation results or any planned adjustments.,Determine charges for services requested, collect deposits or payments, or arrange for billing.,Complete contract forms, prepare change of address records, or issue service discontinuance orders, using computers.,Refer unresolved customer grievances to designated departments for further investigation.,Resolve customers' service or billing complaints by performing activities such as exchanging merchandise, refunding money, or adjusting bills.,Review insurance policy terms to de

In [57]:
course_df[course_df['Title']=='"Skyrocket your customer experience impact within weeks"']['Summary'].values

array(['A Customer Experience Mastermind e-course brought to you by The Experience Professionals'],
      dtype=object)

In [45]:
document1 = "Solar Sales Representatives and Assessors"
document2 = "Customer Service Representatives"

documents = (
    document1,
    document2)


tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)

cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

array([[0.15064018]])

In [46]:
document1 = "Customer Service Representatives"
document2 = "Customer Service Representatives"

documents = (
    document1,
    document2)


tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)

cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

array([[1.]])

In [99]:
document1 = "Confer with customers by telephone or in person to provide information about products or services, take or enter orders, cancel accounts, or obtain details of complaints.,Keep records of customer interactions or transactions, recording details of inquiries, complaints, or comments, as well as actions taken.,Check to ensure that appropriate changes were made to resolve customers' problems.,Contact customers to respond to inquiries or to notify them of claim investigation results or any planned adjustments.,Determine charges for services requested, collect deposits or payments, or arrange for billing.,Complete contract forms, prepare change of address records, or issue service discontinuance orders, using computers.,Refer unresolved customer grievances to designated departments for further investigation.,Resolve customers' service or billing complaints by performing activities such as exchanging merchandise, refunding money, or adjusting bills.,Review insurance policy terms to determine whether a particular loss is covered by insurance.,Review claims adjustments with dealers, examining parts claimed to be defective, and approving or disapproving dealers' claims.,Solicit sales of new or additional services or products.,Compare disputed merchandise with original requisitions and information from invoices and prepare invoices for returned goods.,Obtain and examine all relevant information to assess validity of complaints and to determine possible causes, such as extreme weather conditions that could increase utility bills.,Order tests that could determine the causes of product malfunctions.,Recommend improvements in products, packaging, shipping, service, or billing methods and procedures to prevent future problems."
document2 = "Body Language Choreography Strategies Will Make Your Presentations and Speeches Dynamic, Memorable and Engaging!"

documents = (
    document1,
    document2)


tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)

cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

array([[0.05524332]])

In [46]:
from scipy import spatial
import gensim.downloader as api

In [47]:
model = api.load("glove-wiki-gigaword-50") 

In [48]:
s0 = 'Mark zuckerberg owns the facebook company'
s1 = 'Facebook company ceo is mark zuckerberg'
s2 = 'Microsoft is owned by Bill gates'
s3 = 'How to learn japanese'

def preprocess(s):
    return [i.lower() for i in s.split()]

def get_vector(s):
    return np.sum(np.array([model[i] for i in preprocess(s)]), axis=0)


print('s0 vs s1 ->',1 - spatial.distance.cosine(get_vector(s0), get_vector(s1)))
print('s0 vs s2 ->', 1 - spatial.distance.cosine(get_vector(s0), get_vector(s2)))
print('s0 vs s3 ->', 1 - spatial.distance.cosine(get_vector(s0), get_vector(s3)))

s0 vs s1 -> 0.965923011302948
s0 vs s2 -> 0.8659112453460693
s0 vs s3 -> 0.5877998471260071


In [55]:
list(course_df['Title'].values)

[' 10 Ritz-Carlton Marketing Secrets to Guarantee...',
 ' Adobe Animate CC (Scripting): HTML5 (Español)',
 ' Adobe Photoshop Focus Projects Course',
 ' Affiliate Marketing - WHAT It Takes To Get To 500 A Day',
 ' Affinity Designer - making seamless patterns',
 ' Amazon Suspend ve Hesap Sağlığı Trademark Copyright Patent',
 ' Animate a Logo in Adobe After Effects CC with Motion Graphic',
 " Author School Visits 101: Marketing Children's Books",
 ' Autodesk Revit MEP 2013',
 ' Axure RP 9 Fundamentals and Mobile prototyping for UX Design',
 ' Basics of Accounting (தமிழ் மொழியில் கணக்கியல் அடிப்படைகள்)',
 ' Beginning to Draw From the Mind with Zen Doodle',
 ' Building an Online Business while traveling the whole World',
 ' CFA® Level 1 2014 – Quantitative Methods',
 ' CIA Part 2- 2020 Version - 900+MCQs',
 ' Capturing, Analyzing, and Using Lessons Learned (PMI - PMP)',
 ' Como Crear un Plan de Marketing Digital Para Tu Sitio Web',
 ' Complete PHP OOP Concepts for Absolute Beginners + Proje

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=39436d08-244a-48f2-9b52-ac5d3c551bf2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>