In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

### Load raw data

In [2]:
df_products = pd.read_json('./raw-data/cleanJobs.json')

### Defining a dictionary

In [3]:
tfidf = TfidfVectorizer(stop_words='english')
df_products['description'] = df_products['description'].fillna('')
tfidf_matrix = tfidf.fit_transform(df_products['description'])

### Cosine sim: 
The most time consuming part: run time 5m 50s in my machine. But it is worth it

In [4]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

### Create indices 
*AKA Index the data*

In [6]:
indices = pd.Series(df_products.index, index=df_products['_id']).drop_duplicates()
indices

_id
64fb9e437822e405a3994faa        0
64fb9e447822e405a3994fae        1
64fb9e467822e405a3994fb2        2
64fb9e477822e405a3994fb6        3
64fb9e487822e405a3994fba        4
                            ...  
65a80607ca4b20fa591c37cf    26128
65a80646ca4b20fa591c37dc    26129
65a8066cca4b20fa591c37e9    26130
65a806e7ca4b20fa591c37f6    26131
65a80726ca4b20fa591c3803    26132
Length: 26133, dtype: int64

### Original idea: Recommender Algorithm
Takes the description of a given record, and returns the most related ones

In [7]:
def get_recommendations(id, cosine_sim = cosine_sim):
    idx = indices[id]
    sim_scores = enumerate(cosine_sim[idx])
    sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    sim_index=[i[0] for i in sim_scores]
    print(df_products['_id'].iloc[sim_index])

Run the function: run time in my machine 0.0s for 26k records
Just make sure the ID you pass is of a record that exists in our file

In [9]:
get_recommendations("64fb9e487822e405a3994fba")

12693    64fbc1447822e405a39a160a
704      64fba0d77822e405a3995aac
16848    64fbcc607822e405a39a56f6
3333     64fba83c7822e405a39983c6
12271    64fbc0247822e405a39a0f72
18386    64fc283bc4c7ca19f07ee9bf
5483     64fbae057822e405a399a55e
3207     64fba7e87822e405a39981ce
25006    65a370827a427dbd6ac3475c
296      64fb9f707822e405a399544a
Name: _id, dtype: object


### The search algorithm
The previous code searches for records whose description is similar to the one in question 
#
For example, if we have records A,B,C,...,Y,Z, 
- then a user selects record 'W', 
- the algorithm ranks all the records according to how similar their descriptions are to that of record 'W'.
- The records with most similar descriptions come first and those with most dis-similar descriptions come last.
- Finally it returns the first `n` number of records specified in our case: `n = 11`
#
In our modification:
- Instead of finding a record then use its description as a point of reference: we use a custom input
- The rest of the code works the same

In [10]:
def search(input_text):
    # Transform the input text into a TF-IDF vector
    input_vector = tfidf.transform([input_text])

    # Compute the cosine similarity between the input text and all job descriptions
    cosine_scores = linear_kernel(input_vector, tfidf_matrix).flatten()

    # Get the indices of job descriptions sorted by similarity
    similar_indices = cosine_scores.argsort()[::-1]

    # Return the top 10 most similar job IDs and their descriptions
    top_similar_indices = similar_indices[1:11]
    job_recommendations = [(df_products['_id'].iloc[i], df_products['description'].iloc[i]) for i in top_similar_indices]

    return job_recommendations

In [11]:
search("Filipino product review writer to write about the best Skin Care Products in the philippines")

[('64fbca4e7822e405a39a4b16',
  "Job Title: Esthetician Job Summary: As an Esthetician, you will be responsible for providing professional skin care treatments to clients, including facials, waxing, and other skin care services. You will analyze clients' skin conditions, make recommendations for treatment plans, and perform procedures to improve and maintain the health and appearance of their skin. You will also educate clients on proper skin care techniques and provide guidance on how to achieve optimal skin health. Additionally, you will assist with maintaining a clean and sanitary work environment and ensure compliance with all applicable health and safety regulations. Responsibilities: Provide professional skin care treatments to clients, including facials, waxing, and other skin care services, following established protocols and guidelines. Conduct thorough skin analyses to determine clients' skin conditions and develop appropriate treatment plans. Perform a wide range of esthetic