In [None]:
import pandas as pd
import numpy as np

np.random.seed(1693)


In [None]:
# Number of samples
n_samples = 1000  # Increase the number of samples to 1000

# Years of work experience
work_exp = np.round(np.random.normal(loc=5, scale=2, size=n_samples).clip(min=0),1)

# Education level (1=High School, 2=Bachelor's, 3=Master's, 4=PhD)
education = np.random.choice([1, 2, 3, 4], p=[0.1, 0.5, 0.35, 0.05], size=n_samples)

# Years in a leadership role
leadership_exp = np.round(np.random.normal(loc=2, scale=1, size=n_samples).clip(min=0),1)

# Years of relevant field experience (cannot exceed work experience)
relevant_exp = np.round(np.random.normal(loc=0.8 * work_exp, scale=1).clip(min=0),1)
relevant_exp = np.minimum(relevant_exp, work_exp)

# Demographics
age = np.round(np.random.normal(loc=30, scale=5, size=n_samples).clip(min=18),1)
gender = np.random.choice(['M', 'F', 'O'], p=[0.4, 0.55, 0.05], size=n_samples)
ethnicity = np.random.choice(['Asian', 'White', 'Black', 'Hispanic', 'Other'], p=[0.3, 0.3, 0.2, 0.1, 0.1], size=n_samples)

# Recruitment source (1=Online job board, 2=Referral, 3=Company website)
recruitment_source = np.random.choice([1, 2, 3], p=[0.6, 0.25, 0.15], size=n_samples)

# Skills
skill_probs = [0.7, 0.8, 0.6]  # Probability of having each skill
skills = np.random.binomial(n=1, p=skill_probs, size=(n_samples, 3))

In [None]:
# Create DataFrame
data = pd.DataFrame({
    'Work Experience (Years)': work_exp,
    'Education': education,
    'Leadership Experience (Years)': leadership_exp,
    'Relevant Experience (Years)': relevant_exp,
    'Age': age,
    'Gender': gender,
    'Ethnicity': ethnicity,
    'Recruitment Source': recruitment_source,
    'SQL': skills[:, 0],
    'Python': skills[:, 1],
    'R': skills[:, 2],
})

# Hiring decision rule
data['Hired'] = ((data['Work Experience (Years)'] >= 3) & (data['Education'] >= 2) & (data['Python'] == 1)).astype(int)

In [None]:
# Display the first 10 rows of the dataset
data.head(10)

Unnamed: 0,Work Experience (Years),Education,Leadership Experience (Years),Relevant Experience (Years),Age,Gender,Ethnicity,Recruitment Source,SQL,Python,R,Hired
0,2.8,2,1.6,2.1,25.6,M,White,2,0,1,1,0
1,6.0,3,1.0,4.5,27.3,F,Black,1,1,1,1,1
2,5.7,3,2.8,3.3,34.0,M,Black,2,0,1,1,1
3,6.9,3,2.1,5.0,26.3,F,Asian,3,1,1,1,1
4,6.7,2,2.6,4.8,23.9,M,White,1,1,1,0,1
5,5.3,2,2.6,4.6,36.4,F,Hispanic,3,0,1,1,1
6,6.4,1,1.4,6.0,27.2,M,Asian,1,1,1,1,0
7,6.9,3,2.2,4.0,34.0,M,White,1,0,0,1,0
8,5.9,2,1.8,5.6,29.1,F,Asian,3,1,1,1,1
9,6.7,3,1.3,6.4,44.1,O,Black,2,1,1,1,1


In [None]:
# Save the dataset to a CSV file
data.to_csv('synthetic_applicant_data.csv', index=False)

Train The Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



In [None]:
# Load the dataset
data = pd.read_csv('/content/synthetic_applicant_data.csv')

# Encode categorical variables using one-hot encoding
data_encoded = pd.get_dummies(data, columns=['Gender', 'Ethnicity', 'Recruitment Source'])

# Scale the continuous variables using StandardScaler
scaler = StandardScaler()
data_encoded[['Work Experience (Years)', 'Education', 'Leadership Experience (Years)', 'Relevant Experience (Years)', 'Age']] = scaler.fit_transform(data_encoded[['Work Experience (Years)', 'Education', 'Leadership Experience (Years)', 'Relevant Experience (Years)', 'Age']])

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(data_encoded.drop('Hired', axis=1), data_encoded['Hired'], test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [None]:
# Print the shape of the datasets
print('Training data shape:', X_train.shape)
print('Validation data shape:', X_val.shape)
print('Test data shape:', X_test.shape)

Training data shape: (640, 19)
Validation data shape: (160, 19)
Test data shape: (200, 19)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

In [None]:
# Define the neural network architecture
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [None]:
# Train the model on the training data with validation split and early stopping
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2)

# Evaluate the model on the validation set
score = model.evaluate(X_val, y_val, verbose=0)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Test the final model on the test set
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.04796288162469864
Test accuracy: 0.9800000190734863


In [None]:
model.save('my_model.h5')