# Recommender System

In [87]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import sigmoid_kernel

In [88]:
df=pd.read_csv('split.csv')

In [89]:
df.head()

Unnamed: 0,details,filename_new
0,danah nielsen hostess albuquerque nm challen...,Resume DanahNielsen_26409
1,christopher brown west memphis ar obtain orga...,Resume ChristopherBrown_30650
2,minnesota state park er security ranger season...,Resume MinnesotaStateParksWorkerSecurityRanger...
3,naveen net developer java developer bcbsm mi d...,Resume NaveenNetDeveloper_46188
4,tim schannauer electrician helper lane valente...,Resume TimSchannauer_22646


In [90]:
#TF-IDF Vectoriser

tfv = TfidfVectorizer(min_df=3, max_df = 0.75, max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words = 'english')
tfv_matrix = tfv.fit_transform(df["details"])
# max_df = 0.75

In [91]:
tfv_matrix

<1599x20106 sparse matrix of type '<class 'numpy.float64'>'
	with 247838 stored elements in Compressed Sparse Row format>

In [92]:
# Computing the sigmoid kernel

sig = sigmoid_kernel(tfv_matrix,tfv_matrix)

In [93]:
#Exporting as pickle file
import pickle
with open('sig.pkl', 'wb') as f:
    pickle.dump(sig, f)

In [94]:
#Indexing for the title
indices_sig = pd.Series(df.index, index=df['filename_new']).drop_duplicates()

In [95]:
#function for recommendation
def give_rec(title, sig=sig):
    # Get the index corresponding to original_title
    idx = indices_sig[title]

    # Get the pairwsie similarity scores 
    sig_scores = list(enumerate(sig[idx]))

    # Sort the output 
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)

    # Scores of the 10 most similar output
    sig_scores = sig_scores[1:25]

    #  indices
    title_indices = [i[0] for i in sig_scores]

    # Top 10 most similar 
    return df['filename_new'].iloc[title_indices]

# Given Resume and similar JD output

In [96]:
sigrec_1=give_rec('Resume DanahNielsen_26409')

In [97]:
# Import re module to use regular expression
import re
 
# Declare the list contains subject code
sublist = sigrec_1

# Declare the filter function
def Filter(datalist):
    # Search data based on regular expression in the list
    return [val for val in datalist
        if re.search(r'^JD', val)]

# Print the filter data
print(Filter(sublist))

['JD Customer Service R_138097', 'JD Customer Service R_122169', 'JD Customer Service R_145262']


# Given JD similar and Resume output

In [98]:
sigrec_2=give_rec('JD Senior IT Network_56504')

In [99]:
# Import re module to use regular expression
import re
 
# Declare the list contains subject code
sublist = sigrec_2

# Declare the filter function
def Filter(datalist):
    # Search data based on regular expression in the list
    return [val for val in datalist
        if re.search(r'^Resume', val)]

# Print the filter data
print(Filter(sublist))

['Resume BTechAtoyebi_21137', 'Resume JasonEnglish_48805', 'Resume LuciAsenjo_13398']


# Given Resume and sililar Resume output

In [100]:
sigrec_3=give_rec('Resume Associate_24579')

In [101]:
# Import re module to use regular expression
import re
 
# Declare the list contains subject code
sublist = sigrec_3

# Declare the filter function
def Filter(datalist):
    # Search data based on regular expression in the list
    return [val for val in datalist
        if re.search(r'^Resume', val)]

# Print the filter data
print(Filter(sublist))

['Resume Communicator_47889', 'Resume StephanieCullins_20339', 'Resume MarissaAnderson_46415', 'Resume JazzminJohnson_20476', 'Resume BrittaniPearson_14034', 'Resume YurisDelcid_22677', 'Resume ChristopherMartinelli_43849', 'Resume MarySanders_6097', 'Resume CashierSalesAssociate_5664', 'Resume WarehouseEmployee_11106', 'Resume OvernightStocking_33309', 'Resume OperationsManagerFriedmansApplianceCenter_48662', 'Resume CustomerServiceRepresentative_9918', 'Resume MarkStuder_13053', 'Resume AshleyNavarro_20162', 'Resume SeniorCustomerServiceRepresentative_34676', 'Resume WinstonBailey_17689', 'Resume ShawtrellJenkins_41953', 'Resume TatianaSimon_46152', 'Resume CrewMember_11837', 'Resume AustinJiang_22563', 'Resume StockAssociate_44849', 'Resume Conducetdadministrative_44894']


# Given JD and similar JD output

In [102]:
sigrec_4=give_rec('JD Senior IT Network_56504')

In [103]:
# Import re module to use regular expression
import re
 
# Declare the list contains subject code
sublist = sigrec_4

# Declare the filter function
def Filter(datalist):
    # Search data based on regular expression in the list
    return [val for val in datalist
        if re.search(r'^JD', val)]

# Print the filter data
print(Filter(sublist))

['JD Network Engineer_28249', 'JD Information Securi_27888', 'JD Sr Network Integra_45136', 'JD Infrastructure Arc_29292', 'JD NOC Engineer_31706', 'JD Project Manager Ne_27426', 'JD NOC Engineer_64709', 'JD IT Tech Services L_37442', 'JD Data Center Archit_37869', 'JD Systems Administra_27390', 'JD IT Services Techni_115385', 'JD Desktop Support Sp_41057', 'JD Cisco Contact Cent_34104', 'JD SW Engineer Embedd_34124', 'JD Area Manager Serv_65801', 'JD IT Manager_93204', 'JD Senior Desktop Sup_25277', 'JD Operations Directo_65647', 'JD Risk Regulation A_86673', 'JD Account Executive_42343', 'JD Sr Collaboration E_32835']
