In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Select a random sample of 10% of the job offers and job applications
job_offers = pd.read_csv('FinalDataSetJobOffers.csv').sample(frac=0.1, random_state=42)
job_seekers= pd.read_csv('CvDatasetFinal_3.csv').sample(frac=0.1, random_state=42)

In [None]:
# Check for missing values in the job offers dataframe
print("Missing values in job_offers:")
print(job_offers.isna().sum())

# Check for missing values in the job applications dataframe
print("Missing values in job_applications:")
print(job_seekers.isna().sum())

Missing values in job_offers:
Job post               0
Company name           0
Job description        0
Required skills        0
Location               0
Company rating         0
Company review         0
Experience required    0
dtype: int64
Missing values in job_applications:
Category             0
Name                 0
Email                0
Phone                0
Education            0
Skills               0
Experience           2
Experience_Rating    0
dtype: int64


In [None]:
# Drop rows with missing values in job_offers and job_applications dataframes
job_offers.dropna(inplace=True)
job_seekers.dropna(inplace=True)


In [None]:
job_offers.head()

Unnamed: 0,Job post,Company name,Job description,Required skills,Location,Company rating,Company review,Experience required
23304,senior manag analyt,mirum india,experi digit medium includ excel skill googl a...,tableau manag data studio digit medium sa data...,mumbai,3.3,30.0,10 yr
5550,senior technic lead data engin,infostretch solut pvt ltd,flexibl shift time accommod deploy critic call...,jira ssi bitbucket sourc tree sql queri itil f...,pune,3.6,147.0,10 yr
22050,python develop,diver lynx,must hands-on experi python design build maint...,skill python test data scienc machin learn htm...,bangalore/bengaluru,2.9,63.0,2-6 yr
10902,microsoft activ directori applic develop,accentur,technic experi 1 pki secur certif manag use sc...,busi process consult activ directori microsoft...,bangalore/bengaluru,4.1,26528.0,4-6 yr
25912,programm,smart system llc,look experienc programm develop websit,higher educ excel php html mysql skill good or...,yerevan armenia,3.95215,147.0,10 yr


In [None]:
job_seekers.head()

Unnamed: 0,Category,Name,Email,Phone,Education,Skills,Experience,Experience_Rating
140,Java Developer,Karen Thompson,karen.thompson@gmail.com,343.781.4444,detail bachelor engineering computer savitriba...,skill language java operating system window li...,32 month linux 6 month adavance java le 1 year...,5
398,Hadoop,Andrew Adams,andrew.adams@gmail.com,001-559-587-9020x6074,detail electronics communication indore madhya...,set programming language apache hadoop python ...,31 month hadoop 31 month hadoop 31 month hive ...,5
6,Data Science,Christine Myers,christine.myers@gmail.com,+1-131-902-7364x03539,detail january 2017 b tech computer science en...,skill python tableau data visualization studio...,13 month python 24 month solution 24 month dat...,5
334,Network Security Engineer,Jade Hensley,jade.hensley@gmail.com,(558)588-2984x33032,detail july 2012 april 2015 bachelor science i...,skill writing skill english good certainly cle...,24 monthscompany detail company karvy innotech...,3
322,Network Security Engineer,Tiffany Bailey,tiffany.bailey@gmail.com,548.866.0991,detail september 2006 august 2011 bachelor eng...,set skill skilled analyzing monitoring network...,72 month checkpoint 72 month cisco 72 month ci...,5


In [None]:
# Prepare input data
job_skills = job_seekers['Skills'].str.get_dummies(sep=',')
job_skills = job_skills.reindex(columns=job_offers['Required skills'].unique(), fill_value=0)

job_features = np.array(job_skills)

In [None]:
# Prepare target data
target_job = 'python develop'
target_job_description = job_offers[job_offers['Job post'] == target_job]['Job description'].values[0]

target_job_skills = pd.DataFrame({'skills': [target_job_description]})
target_job_skills = target_job_skills['skills'].str.get_dummies(sep=',')
target_job_skills = target_job_skills.reindex(columns=job_offers['Required skills'].unique(), fill_value=0)

target_features = np.array(target_job_skills)

In [None]:
# Define and train the model
model = Sequential()
model.add(Dense(128, input_dim=job_features.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(target_features.shape[1], activation='sigmoid'))

adam = Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=adam)

In [None]:
num_samples = job_features.shape[0]
target_job_data = np.tile(target_features, (num_samples, 1))
model.fit(job_features, target_job_data, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f538626c7f0>

In [None]:
# Make recommendations for a new job seeker
new_job_seeker = pd.DataFrame({'skills': ['Python, SQL']})
new_job_seeker_skills = new_job_seeker['skills'].str.get_dummies(sep=',')
new_job_seeker_skills = new_job_seeker_skills.reindex(columns=job_offers['Required skills'].unique(), fill_value=0)

new_job_seeker_features = np.array(new_job_seeker_skills)

In [None]:
# Predict the job offers that are most similar to the new job seeker's skills
predictions = model.predict(new_job_seeker_features)
similarities = cosine_similarity(predictions, job_features)
most_similar_job_indices = np.argsort(similarities)[-5:]

recommended_job_titles = job_offers.iloc[most_similar_job_indices.ravel()]['Job post'].values
print(recommended_job_titles)


['senior manag analyt' 'softwar engin android'
 'associ princip engin big data' 'machin learn engin'
 'senior data analyst' 'softwar engin java' 'reactj sr softwar engin'
 'lead python develop b1 visa' 'senior staff engin' 'dotnet develop'
 'technolog consult sap secur' 'embed softwar engin'
 'senior assist system engin' 'softwar engin ii' 'l3 support engin'
 'data engin ndhm delhi' 'senior machin learn engin' 'data engin'
 'softwar engineer-java develop' 'data scientist'
 'manufactur engin me applic lead' 'graphic design'
 'busi data analyst market' 'specialist softwar engin' 'data scientist'
 'senior intermedi softwar engin' 'senior java develop'
 'senior technic lead data engin' 'python develop'
 'microsoft activ directori applic develop' 'programm' 'sr softwar engin'
 'market execut' 'softwar engin rubi rail' 'group manag'
 'site reliabl engin' 'softwar engin' 'linux system engineer/linux admin'
 'big data engin' 'senior project manag epm enterpris perform manag'
 'princip group en