In [140]:
#Import the necessary libraries
import pandas as pd
import torch
import random
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader


In [141]:
#Load the data
candidates_df = pd.read_csv("candidates_data.csv")
job_orders_df = pd.read_csv("job_orders_data.csv")

In [142]:
print(candidates_df.head())


  CandidateID                                             Skills  \
0          C1  ['Java', 'Project Management', 'Python', 'Mach...   
1          C2  ['Machine Learning', 'SQL', 'C++', 'Deep Learn...   
2          C3        ['Leadership', 'Project Management', 'C++']   
3          C4  ['C++', 'Python', 'Excel', 'Leadership', 'Proj...   
4          C5  ['Machine Learning', 'Leadership', 'Data Analy...   

   ExperienceYears   Education CurrentLocation  RelocationWillingness  \
0               13  Bachelor's         Seattle                  False   
1               15    Master's         Chicago                   True   
2               12  Bachelor's         Chicago                   True   
3                4    Master's         Chicago                  False   
4                7    Master's         Seattle                   True   

   SalaryExpectation PersonalityAssessment  
0              42849             Extrovert  
1              41451              Ambivert  
2              51

In [143]:

print(job_orders_df.head())

  JobID                                     RequiredSkills        JobTitle  \
0    J1  ['Project Management', 'Python', 'Leadership',...         Analyst   
1    J2  ['Project Management', 'Excel', 'SQL', 'Data A...  Data Scientist   
2    J3   ['Excel', 'Project Management', 'Deep Learning']         Analyst   
3    J4               ['Excel', 'Data Analysis', 'Python']  Data Scientist   
4    J5                  ['Data Analysis', 'C++', 'Excel']         Analyst   

                                      JobDescription       Location  \
0  We are looking for a Analyst to join our Finan...        Chicago   
1  We are looking for a Data Scientist to join ou...        Chicago   
2  We are looking for a Analyst to join our Retai...        Chicago   
3  We are looking for a Data Scientist to join ou...  San Francisco   
4  We are looking for a Analyst to join our Retai...        Seattle   

       SalaryRange CompanyInfo    JobType  
0   $71765-$146458     Finance  Full-time  
1   $146688-$988

In [144]:
#Convert string lists to Python lists
mlb = MultiLabelBinarizer()
candidates_df['Skills'] = candidates_df['Skills'].apply(eval)  # Convert string lists to Python lists
job_orders_df['RequiredSkills'] = job_orders_df['RequiredSkills'].apply(eval)

candidate_skills = mlb.fit_transform(candidates_df['Skills'])
job_skills = mlb.transform(job_orders_df['RequiredSkills'])

In [145]:
#Combine relevant features
candidates_df['FeatureVector'] = list(candidate_skills)
job_orders_df['FeatureVector'] = list(job_skills)

In [146]:
X_candidates = torch.tensor(candidate_skills, dtype=torch.float32)
X_jobs = torch.tensor(job_skills, dtype=torch.float32)
y = torch.tensor([random.randint(0, 1) for _ in range(len(candidates_df))], dtype=torch.float32)

X_train, X_test, y_train, y_test = train_test_split(X_candidates, y, test_size=0.2, random_state=42)

In [147]:
#Create PyTorch Dataset and DataLoader
class CandidateJobDataset(Dataset):
    def __init__(self, candidates, labels):
        self.candidates = candidates
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.candidates[idx], self.labels[idx]


In [148]:
train_dataset = CandidateJobDataset(X_train, y_train)
test_dataset = CandidateJobDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [149]:
#Build a Neural Network Model
import torch.nn as nn

class MatchingModel(nn.Module):
    def __init__(self, input_size):
        super(MatchingModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x


In [150]:
model = MatchingModel(input_size=X_candidates.shape[1])

In [151]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [152]:
#Train the model
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

train_model(model, train_loader, criterion, optimizer)


Epoch 1/10, Loss: 0.6724362969398499
Epoch 2/10, Loss: 0.6818081140518188
Epoch 3/10, Loss: 0.6649078130722046
Epoch 4/10, Loss: 0.6563688516616821
Epoch 5/10, Loss: 0.6912306547164917
Epoch 6/10, Loss: 0.7135773301124573
Epoch 7/10, Loss: 0.6712399125099182
Epoch 8/10, Loss: 0.6921951770782471
Epoch 9/10, Loss: 0.703930675983429
Epoch 10/10, Loss: 0.6987135410308838


In [155]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

In [156]:
#Evaluate the Model
def evaluate_model(model, test_loader):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch).squeeze()
            predictions = (outputs > 0.5).float()  # Threshold at 0.5
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    # Calculate metrics
    precision = precision_score(all_labels, all_predictions)
    recall = recall_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)

    print("Classification Report:")
    print(classification_report(all_labels, all_predictions))
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")




In [157]:
evaluate_model(model, test_loader)

Classification Report:
              precision    recall  f1-score   support

         0.0       0.47      0.98      0.64        47
         1.0       0.67      0.04      0.07        53

    accuracy                           0.48       100
   macro avg       0.57      0.51      0.36       100
weighted avg       0.58      0.48      0.34       100

Precision: 0.67
Recall: 0.04
F1 Score: 0.07
