In [52]:
#Import the necessary libraries
import pandas as pd
import torch
import random
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader


In [53]:
#Load the data
candidates_df = pd.read_csv("candidates_data.csv")
job_orders_df = pd.read_csv("job_orders_data.csv")

In [54]:
print(candidates_df.head())


  CandidateID                                             Skills  \
0          C1  ['Java', 'Project Management', 'Python', 'Mach...   
1          C2  ['Machine Learning', 'SQL', 'C++', 'Deep Learn...   
2          C3        ['Leadership', 'Project Management', 'C++']   
3          C4  ['C++', 'Python', 'Excel', 'Leadership', 'Proj...   
4          C5  ['Machine Learning', 'Leadership', 'Data Analy...   

   ExperienceYears   Education CurrentLocation  RelocationWillingness  \
0               13  Bachelor's         Seattle                  False   
1               15    Master's         Chicago                   True   
2               12  Bachelor's         Chicago                   True   
3                4    Master's         Chicago                  False   
4                7    Master's         Seattle                   True   

   SalaryExpectation PersonalityAssessment  
0              42849             Extrovert  
1              41451              Ambivert  
2              51

In [55]:

print(job_orders_df.head())

  JobID                                     RequiredSkills        JobTitle  \
0    J1  ['Project Management', 'Python', 'Leadership',...         Analyst   
1    J2  ['Project Management', 'Excel', 'SQL', 'Data A...  Data Scientist   
2    J3   ['Excel', 'Project Management', 'Deep Learning']         Analyst   
3    J4               ['Excel', 'Data Analysis', 'Python']  Data Scientist   
4    J5                  ['Data Analysis', 'C++', 'Excel']         Analyst   

                                      JobDescription       Location  \
0  We are looking for a Analyst to join our Finan...        Chicago   
1  We are looking for a Data Scientist to join ou...        Chicago   
2  We are looking for a Analyst to join our Retai...        Chicago   
3  We are looking for a Data Scientist to join ou...  San Francisco   
4  We are looking for a Analyst to join our Retai...        Seattle   

       SalaryRange CompanyInfo    JobType  
0   $71765-$146458     Finance  Full-time  
1   $146688-$988

In [56]:
#Convert string lists to Python lists
mlb = MultiLabelBinarizer()
candidates_df['Skills'] = candidates_df['Skills'].apply(eval)  # Convert string lists to Python lists
job_orders_df['RequiredSkills'] = job_orders_df['RequiredSkills'].apply(eval)

candidate_skills = mlb.fit_transform(candidates_df['Skills'])
job_skills = mlb.transform(job_orders_df['RequiredSkills'])

In [57]:
#Combine relevant features
candidates_df['FeatureVector'] = list(candidate_skills)
job_orders_df['FeatureVector'] = list(job_skills)

In [58]:
X_candidates = torch.tensor(candidate_skills, dtype=torch.float32)
X_jobs = torch.tensor(job_skills, dtype=torch.float32)
y = torch.tensor([random.randint(0, 1) for _ in range(len(candidates_df))], dtype=torch.float32)

X_train, X_test, y_train, y_test = train_test_split(X_candidates, y, test_size=0.2, random_state=42)

In [59]:
#Create PyTorch Dataset and DataLoader
class CandidateJobDataset(Dataset):
    def __init__(self, candidates, labels):
        self.candidates = candidates
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.candidates[idx], self.labels[idx]


In [60]:
train_dataset = CandidateJobDataset(X_train, y_train)
test_dataset = CandidateJobDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [61]:
#Build a Neural Network Model
import torch.nn as nn

class MatchingModel(nn.Module):
    def __init__(self, input_size):
        super(MatchingModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x


In [62]:
model = MatchingModel(input_size=X_candidates.shape[1])

In [63]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [64]:
#Train the model
def train_model(model, train_loader, criterion, optimizer, epochs=25):
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

train_model(model, train_loader, criterion, optimizer)


Epoch 1/25, Loss: 0.689159095287323
Epoch 2/25, Loss: 0.6837843656539917
Epoch 3/25, Loss: 0.691313624382019
Epoch 4/25, Loss: 0.6791688799858093
Epoch 5/25, Loss: 0.6912940740585327
Epoch 6/25, Loss: 0.6869218349456787
Epoch 7/25, Loss: 0.7159609794616699
Epoch 8/25, Loss: 0.6904095411300659
Epoch 9/25, Loss: 0.6927465200424194
Epoch 10/25, Loss: 0.6913606524467468
Epoch 11/25, Loss: 0.7072659134864807
Epoch 12/25, Loss: 0.6690250635147095
Epoch 13/25, Loss: 0.6794211268424988
Epoch 14/25, Loss: 0.6688342094421387
Epoch 15/25, Loss: 0.6454399228096008
Epoch 16/25, Loss: 0.6777408123016357
Epoch 17/25, Loss: 0.6434713006019592
Epoch 18/25, Loss: 0.6544849276542664
Epoch 19/25, Loss: 0.610523521900177
Epoch 20/25, Loss: 0.647114098072052
Epoch 21/25, Loss: 0.669386088848114
Epoch 22/25, Loss: 0.6953056454658508
Epoch 23/25, Loss: 0.6307263970375061
Epoch 24/25, Loss: 0.6451723575592041
Epoch 25/25, Loss: 0.620597243309021


In [65]:
#Evaluate the Model
def evaluate_model(model, test_loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch).squeeze()
            predictions = (outputs > 0.5).float()
            correct += (predictions == y_batch).sum().item()
            total += y_batch.size(0)
    print(f"Accuracy: {correct / total * 100:.2f}%")

evaluate_model(model, test_loader)


Accuracy: 56.00%
