In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [None]:
# Define parameters for synthetic data
num_professionals = 1000
num_jobs = 500
skills = ['nursing', 'physician', 'radiology', 'pharmacy', 'lab']
locations = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Philadelphia']
certifications = ['BLS', 'ACLS', 'PALS', 'CPR', 'NRP']
education = ['Associate', 'Bachelor', 'Master', 'Doctorate']
work_preferences = ['part-time', 'full-time', 'day shift', 'night shift']
skill_levels = ['entry-level', 'intermediate', 'advanced']
max_experience_years = 30

In [None]:
# Generate synthetic data for healthcare professionals
professionals_df = pd.DataFrame(columns=['id', 'skill', 'location', 'certification', 'education', 'skill_level', 'experience', 'work_preference'])
for i in range(num_professionals):
    professional_id = i + 1
    skill = random.choice(skills)
    location = random.choice(locations)
    certification = random.sample(certifications, random.randint(1, 3))
    education = random.choice(education)
    skill_level = random.choice(skill_levels)
    experience_years = random.randint(0, max_experience_years)
    work_preference = random.choice(work_preferences)
    professionals_df.loc[i] = [professional_id, skill, location, certification, education, skill_level, experience_years, work_preference]

In [None]:
# Generate synthetic data for job opportunities
jobs_df = pd.DataFrame(columns=['id', 'skill', 'location', 'certification_required', 'education_required', 'skill_level_required', 'experience_required', 'work_preference'])
for i in range(num_jobs):
    job_id = i + 1
    skill = random.choice(skills)
    location = random.choice(locations)
    certification_required = random.sample(certifications, random.randint(1, 3))
    education_required = random.choice(education)
    skill_level_required = random.choice(skill_levels)
    experience_years_required = random.randint(0, max_experience_years)
    work_preference = random.choice(work_preferences)
    jobs_df.loc[i] = [job_id, skill, location, certification_required, education_required, skill_level_required, experience_years_required, work_preference]

In [None]:
# Save synthetic data to CSV files
professionals_df.to_csv('professionals.csv', index=False)
jobs_df.to_csv('jobs.csv', index=False)

In [None]:
# Load synthetic data
professionals_df = pd.read_csv('/content/professionals.csv')
jobs_df = pd.read_csv('jobs.csv')

# Remove duplicates
professionals_df.drop_duplicates(inplace=True)
jobs_df.drop_duplicates(inplace=True)

In [None]:
professionals_df.head()

Unnamed: 0,id,skill,location,certification,education,skill_level,experience,work_preference
0,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift
1,2,lab,Chicago,['NRP'],l,advanced,21,full-time
2,3,nursing,Chicago,['PALS'],l,intermediate,29,night shift
3,4,pharmacy,Chicago,"['NRP', 'CPR']",l,intermediate,24,night shift
4,5,nursing,New York,"['PALS', 'BLS']",l,advanced,29,part-time


In [None]:
jobs_df.head()

Unnamed: 0,id,skill,location,certification_required,education_required,skill_level_required,experience_required,work_preference
0,1,pharmacy,New York,"['PALS', 'NRP', 'CPR']",l,intermediate,9,full-time
1,2,nursing,New York,"['PALS', 'NRP']",l,intermediate,28,full-time
2,3,nursing,Los Angeles,['PALS'],l,intermediate,1,night shift
3,4,physician,Chicago,"['CPR', 'BLS']",l,intermediate,7,night shift
4,5,nursing,Philadelphia,"['PALS', 'CPR', 'ACLS']",l,intermediate,30,night shift


In [None]:
# Handle missing values
professionals_df.fillna(value=np.nan, inplace=True)
jobs_df.fillna(value=np.nan, inplace=True)


In [None]:
jobs_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   id                      500 non-null    int64 
 1   skill                   500 non-null    object
 2   location                500 non-null    object
 3   certification_required  500 non-null    object
 4   education_required      500 non-null    object
 5   skill_level_required    500 non-null    object
 6   experience_required     500 non-null    int64 
 7   work_preference         500 non-null    object
dtypes: int64(2), object(6)
memory usage: 35.2+ KB


In [None]:
professionals_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               1000 non-null   int64 
 1   skill            1000 non-null   object
 2   location         1000 non-null   object
 3   certification    1000 non-null   object
 4   education        1000 non-null   object
 5   skill_level      1000 non-null   object
 6   experience       1000 non-null   int64 
 7   work_preference  1000 non-null   object
dtypes: int64(2), object(6)
memory usage: 70.3+ KB


In [None]:
# Merge the two data frames on the skill column
merged_df = pd.merge(professionals_df, jobs_df, on='skill', how='inner', suffixes=('_pro', '_job'))


In [None]:
merged_df.head()

Unnamed: 0,id_pro,skill,location_pro,certification,education,skill_level,experience,work_preference_pro,id_job,location_job,certification_required,education_required,skill_level_required,experience_required,work_preference_job
0,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift,2,New York,"['PALS', 'NRP']",l,intermediate,28,full-time
1,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift,3,Los Angeles,['PALS'],l,intermediate,1,night shift
2,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift,5,Philadelphia,"['PALS', 'CPR', 'ACLS']",l,intermediate,30,night shift
3,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift,6,New York,"['CPR', 'BLS']",l,intermediate,5,full-time
4,1,nursing,Philadelphia,"['ACLS', 'BLS', 'NRP']",Bachelor,entry-level,11,day shift,9,Houston,['NRP'],l,intermediate,22,night shift


In [None]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100122 entries, 0 to 100121
Data columns (total 15 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   id_pro                  100122 non-null  int64 
 1   skill                   100122 non-null  object
 2   location_pro            100122 non-null  object
 3   certification           100122 non-null  object
 4   education               100122 non-null  object
 5   skill_level             100122 non-null  object
 6   experience              100122 non-null  int64 
 7   work_preference_pro     100122 non-null  object
 8   id_job                  100122 non-null  int64 
 9   location_job            100122 non-null  object
 10  certification_required  100122 non-null  object
 11  education_required      100122 non-null  object
 12  skill_level_required    100122 non-null  object
 13  experience_required     100122 non-null  int64 
 14  work_preference_job     100122 non-n

In [None]:
# Columns to encode
cols_to_encode = ['skill', 'location_pro', 'certification', 'education', 'skill_level', 'work_preference_pro']

# Create a LabelEncoder object
le = LabelEncoder()

# Encode the categorical columns
for col in cols_to_encode:
    merged_df[col] = le.fit_transform(merged_df[col])

In [None]:
merged_df.head()

Unnamed: 0,id_pro,skill,location_pro,certification,education,skill_level,experience,work_preference_pro,id_job,location_job,certification_required,education_required,skill_level_required,experience_required,work_preference_job
0,1,1,4,1,0,1,11,0,2,New York,"['PALS', 'NRP']",l,intermediate,28,full-time
1,1,1,4,1,0,1,11,0,3,Los Angeles,['PALS'],l,intermediate,1,night shift
2,1,1,4,1,0,1,11,0,5,Philadelphia,"['PALS', 'CPR', 'ACLS']",l,intermediate,30,night shift
3,1,1,4,1,0,1,11,0,6,New York,"['CPR', 'BLS']",l,intermediate,5,full-time
4,1,1,4,1,0,1,11,0,9,Houston,['NRP'],l,intermediate,22,night shift


In [None]:
# Create a user-item matrix
user_item_matrix = merged_df.pivot_table(index="id_pro", columns="id_job", values="experience_required", fill_value=0)

# Preview the user-item matrix
print(user_item_matrix.head())


id_job  1    2    3    4    5    6    7    8    9    10   ...  491  492  493  \
id_pro                                                    ...                  
1         0   28    1    0   30    5    0    0   22    0  ...    0    0    0   
2         0    0    0    0    0    0    0    0    0    0  ...   10   27    0   
3         0   28    1    0   30    5    0    0   22    0  ...    0    0    0   
4         9    0    0    0    0    0   18    0    0   22  ...    0    0    0   
5         0   28    1    0   30    5    0    0   22    0  ...    0    0    0   

id_job  494  495  496  497  498  499  500  
id_pro                                     
1         3   19    0    0    0    0    0  
2         0    0    0    0    0    0    0  
3         3   19    0    0    0    0    0  
4         0    0    9    0    0    0    0  
5         3   19    0    0    0    0    0  

[5 rows x 500 columns]


In [None]:
# Compute cosine similarity matrix
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim_matrix = cosine_similarity(user_item_matrix)

In [None]:
from sklearn.metrics.pairwise import pairwise_distances

# Compute the cosine similarity between the users based on their encoded features
user_similarity = 1 - pairwise_distances(user_item_matrix, metric='cosine')

In [None]:
# Transpose the user_item_matrix to create the item_user_matrix
item_user_matrix = user_item_matrix.T

In [None]:
def recommend_jobs_for_user(user_id, user_item_matrix, item_user_matrix, top_n=10):
    """Recommend top N jobs for a given user ID based on the collaborative filtering model"""
    # Get the row index for the user ID
    user_index = user_item_matrix.index.get_loc(user_id)
    # Get the user-item similarity scores for the user
    user_scores = user_item_matrix.iloc[user_index,:]
    # Sort the scores in descending order
    sorted_scores = user_scores.sort_values(ascending=False)
    # Get the top N job IDs
    top_job_ids = sorted_scores.index[:top_n]
    # Convert the job IDs to integers
    top_job_ids = [int(job_id) for job_id in top_job_ids]
    # Return the top job IDs
    return top_job_ids

In [None]:
# Get a list of recommended job IDs for user with ID
recommended_job_ids = recommend_jobs_for_user(25, user_item_matrix, item_user_matrix, top_n=5)
print(recommended_job_ids)


[62, 243, 427, 112, 280]


In [None]:
# Define a function to get job titles from IDs
def get_job_titles(job_ids):
    # Load the dataset
    jobs_df = pd.read_csv('jobs.csv')

    # Filter the dataset to only include the desired job IDs
    recommended_jobs = jobs_df[jobs_df['id'].isin(job_ids)]

    # Extract the job titles from the filtered dataset
    job_titles = recommended_jobs[['id', 'skill', 'location', 'certification_required',
       'education_required', 'skill_level_required', 'experience_required',
       'work_preference']].values.tolist()

    return job_titles

In [None]:
# Get the job titles for the recommended job IDs
recommended_job_titles = get_job_titles(job_ids=recommended_job_ids)
recommendations = pd.DataFrame(recommended_job_titles)
recommendations.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7
0,62,radiology,Los Angeles,"['ACLS', 'BLS']",l,advanced,30,day shift
1,112,radiology,New York,['ACLS'],l,advanced,29,part-time
2,243,radiology,Los Angeles,"['PALS', 'ACLS']",l,entry-level,29,day shift
3,280,radiology,Chicago,['BLS'],l,intermediate,29,part-time
4,427,radiology,New York,"['NRP', 'CPR']",l,advanced,29,part-time


In [None]:
# create flask, https://github.com/abh2050/Codes/tree/main/Recommender_system
from flask import Flask, request
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///database.db'
db = SQLAlchemy(app)

class Job(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    skill = db.Column(db.String(50), nullable=False)
    location = db.Column(db.String(50), nullable=False)
    certification_required = db.Column(db.String(50), nullable=False)
    education_required = db.Column(db.String(50), nullable=False)
    skill_level_required = db.Column(db.String(50), nullable=False)
    experience_required = db.Column(db.Integer, nullable=False)
    work_preference = db.Column(db.String(50), nullable=False)

@app.route('/jobs', methods=['POST'])
def create_job():
    data = request.form
    new_job = Job(skill=data['skill'], location=data['location'], certification_required=data['certification_required'], 
                  education_required=data['education_required'], skill_level_required=data['skill_level_required'], 
                  experience_required=data['experience_required'], work_preference=data['work_preference'])
    db.session.add(new_job)
    db.session.commit()
    return 'Job created successfully'

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
<!DOCTYPE html>
<html>
  <head>
    <title>Job Form</title>
    <style>
      label, input {
        display: block;
        margin-bottom: 10px;
      }
    </style>
  </head>
  <body>
    <h1>Job Form</h1>
    <form id="job-form">
      <label for="skill">Skill:</label>
      <input type="text" id="skill" name="skill" required>

      <label for="location">Location:</label>
      <input type="text" id="location" name="location" required>

      <label for="certification_required">Certification Required:</label>
      <input type="text" id="certification_required" name="certification_required" required>

      <label for="education_required">Education Required:</label>
      <input type="text" id="education_required" name="education_required" required>

      <label for="skill_level_required">Skill Level Required:</label>
      <input type="text" id="skill_level_required" name="skill_level_required" required>

      <label for="experience_required">Experience Required:</label>
      <input type="number" id="experience_required" name="experience_required" required>

      <label for="work_preference">Work Preference:</label>
      <select id="work_preference" name="work_preference" required>
        <option value="Full-time">Full-time</option>
        <option value="Part-time">Part-time</option>
        <option value="Contract">Contract</option>
        <option value="Temporary">Temporary</option>
      </select>

      <button type="submit">Submit</button>
    </form>
    <script>
      const form = document.getElementById('job-form');
      form.addEventListener('submit', (event) => {
        event.preventDefault();
        const formData = new FormData(form);
        fetch('/jobs', {
          method: 'POST',
          body: formData
        }).then((response) => {
          if (response.ok) {
            alert('Job created successfully');
            form.reset();
          } else {
            alert('Error creating job');
          }
        }).catch((error) => {
          console.error(error);
          alert('Error creating job');
        });
      });
    </script>
  </body>
</html>