In [1]:
#@title Load the data
# importing necessary modules
import tensorflow as tf
import numpy as np
import pandas as pd

dftrain = pd.read_csv(tf.keras.utils.get_file('df_train.csv', 'https://raw.githubusercontent.com/just-a-Programmer1/Recruitment_rep/main/Datasets/df_train.csv')) # Loading training data
dftest = pd.read_csv(tf.keras.utils.get_file('df_test.csv', 'https://raw.githubusercontent.com/just-a-Programmer1/Recruitment_rep/main/Datasets/df_test.csv')) # loading testing data
y_train = dftrain.pop('Was_hired')
y_test = dftest.pop('Was_hired')
dftrain.pop('Name')
dftrain.pop('Gender')
dftrain.pop('Application_year')
dftest.pop('Name')
dftest.pop('Gender')
dftest.pop('Application_year')
x_train = dftrain.copy()
x_test = dftest.copy()
x_test = x_test.astype({"Cumulative_GPA": "float64"})
y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
y_test = np.asarray(y_test).astype('float32').reshape((-1,1))
def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
        values.append(tf.cast(inputs[key], tf.float32))

    return fun(values, axis=-1)


categorical_feature_names = ['Education', 'Employee_eval']
inputs = {}
for name, column in x_train.items():
    if type(column[0]) == str:
        dtype = tf.string
    elif name in categorical_feature_names:
        dtype = tf.int64
    else:
        dtype = tf.float32

    inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)

preprocessed = []

numeric_feature_names = ['Age', 'Experience', 'Test_result', 'Cumulative_GPA']
numeric_features = x_train[numeric_feature_names]

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(stack_dict(dict(numeric_features)))

numeric_inputs = {}
for name in numeric_feature_names:
    numeric_inputs[name] = inputs[name]

numeric_inputs = stack_dict(numeric_inputs)
numeric_normalized = normalizer(numeric_inputs)

preprocessed.append(numeric_normalized)

vocab = [1, 4, 7, 99]
lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')

lookup([-1, 4, 1])

for name in categorical_feature_names:
    vocab = sorted(set(x_train[name]))
    print(f'name: {name}')
    print(f'vocab: {vocab}\n')

    if type(vocab[0]) is str:
        lookup = tf.keras.layers.StringLookup(vocabulary=vocab, output_mode='one_hot')
    else:
        lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')

    x = inputs[name][:, tf.newaxis]
    x = lookup(x)
    preprocessed.append(x)

preprocesssed_result = tf.concat(preprocessed, axis=-1)

preprocessor = tf.keras.Model(inputs, preprocesssed_result)

preprocessor(dict(x_train.iloc[:1]))

body = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu'), # second layer of the neural network
    tf.keras.layers.Dense(32, activation='relu'), # Third layer of the neural network
    tf.keras.layers.Dense(2, activation='softmax') # last layer of the neural network
])

x = preprocessor(inputs) # first layer of the neural network

result = body(x)


model = tf.keras.Model(inputs, result)

# Stochastic Gradient Descent with Learning Rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(), # Loss function
              metrics=['accuracy'])

history = model.fit(dict(x_train), y_train, epochs=25, batch_size=1, verbose=0)

In [None]:
#@title Start neural network training
dftrain = pd.read_csv(tf.keras.utils.get_file('train.csv', 'https://raw.githubusercontent.com/just-a-Programmer1/Recruitment_rep/main/Datasets/train.csv'))
dftest = pd.read_csv(tf.keras.utils.get_file('test.csv', 'https://raw.githubusercontent.com/just-a-Programmer1/Recruitment_rep/main/Datasets/test.csv'))
dfval = pd.read_csv(tf.keras.utils.get_file('val.csv', 'https://raw.githubusercontent.com/just-a-Programmer1/Recruitment_rep/main/Datasets/val.csv'))
x = dftrain['feedback']
y = dftrain['label']
x1 = dftest['feedback']
y1 = dftest['label']
x2 = dfval['feedback']
y2 = dfval['label']
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
test_dataset = tf.data.Dataset.from_tensor_slices((x1, y1))
val_dataset = tf.data.Dataset.from_tensor_slices((x2, y2))
BUFFER_SIZE = 665
BATCH_SIZE = 1
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
VOCAB_SIZE = 5000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
model_new = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(len(encoder.get_vocabulary()), 64, mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])
model_new.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(0.0001),
              metrics=['accuracy'])
history = model_new.fit(train_dataset, epochs=3,
                    validation_data=val_dataset)

In [None]:
#@title Enter your data and pass the test
name = '' #@param {type:"string"}
#@markdown Enter your age
age = 18 #@param {type:"slider", min:18, max:65, step:1}
age = np.float64(age)
#@markdown How well are you recommended by other employers?
grade = "Null" #@param ["Null", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
grade = grade.format(object)
#@markdown Education
education = "High_school" #@param ["High_school", "Bachelor", "Master", "PhD"]
education = education.format(object)
#@markdown Enter your work experience in the IT field
experience = 0 #@param {type:"slider", min:0, max:50, step:1}
experience = np.int64(experience)
#@markdown Your cumulative GPA
rating = 2 #@param {type:"slider", min:2, max:5, step:0.01}
rating = np.float64(rating)
#@markdown test part
test = 0
#@markdown Choose the skill that suits you best
question_1 = 'efficiency' #@param ["efficiency", "industriousness", "purposefulness", "creativity"]
if question_1 == 'creativity':
  test += 3
#@markdown Do you consider yourself communicative?
question_2 = False #@param {type:"boolean"}
if question_2 == True:
  test += 1
#@markdown How do you see the future?
question_3 = 'I live in the present' #@param ["I live in the present", "The future is unpredictable", "A philosophical question that requires an ambiguous answer"] {allow-input: true}
if question_3 != ' ':
  test += 1
#@markdown Which is greater: the sum of all the digitss or their product? (use decimal notation)
question_4 = 'Sum of all digits' #@param ["Sum of all digits", "Product of all digits", "There is no correct answer", "It all depends on the numbers"]
if question_4 == 'Sum of all digits':
  test += 3
else:
  print("You gave the wrong answer to question №4!\nCorrect answer: the product of all digits is 0 and the sum is 45,\nbecause 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 = 45\n")
#@markdown 1.5 squirrels eat 1.5 nuts in 1.5 minutes. How many nuts will 9 squirrels eat in 9 minutes?
question_5 =  0 #@param {type:"integer"}
if question_5 == 54:
  test += 3
else:
  print("You gave the wrong answer to question №5!\nCorrect answer: 9 / 1.5 = 6 - so many more times we are given time;\n9 * 6 = 54 - so many acorns will be eaten by 9 squirrels in 9 minutes\n")
#@markdown You have 32 motorcycles with a full tank (which is enough for a 100 km ride) connected to each other. using all motorcycles, how many km can you drive? All motorcycles are in the same line
question_6 = 0#@param {type:"integer"}
if question_6 == 300:
  test += 3
else:
  print("You gave the wrong answer to question №6!\nCorrect answer: initially drive 50 km. All bikes will be\nhalf full. Dump the fuel from one half of the bikes\nto the other half. You now have 16 motorcycles with a full tank.\nDrive another 50 km and repeat the operation. In this way, you can drive 300 km\n")
test = test * 7 + 2
test = np.float64(test)
print(f'You passed the test with {int(test)} points out of 100. Congratulations {name}!')

df = pd.DataFrame({'Age': [age],
                   'Cumulative_GPA': [rating],
                   'Education': [education],
                   'Employee_eval': [grade],
                   'Experience': [experience],
                   'Test_result': [test]})

final = model.predict(dict(df), verbose=0)
print(f'You fit us {int(final[0][1] * 100)}%') # Output on display

In [None]:
#@title Enter your letter of recommendation
review = '' #@param {type: "string"}
rev_prediction = model_new.predict(np.array([review]), verbose=0)
if rev_prediction > 0:
  print('Based on your characteristics, we can conclude that you are a good employee and will suit us')
else:
  print('Based on your characteristics, we can conclude that you are a bad employee and will not suit us')

In [None]:
#@title Prediction rate
#@markdown Rate higher than 0 is considered well
rev_prediction[0][0]

In [None]:
#@title The accuracy of neural neworks
_, linear = model.evaluate(dict(x_test), y_test, verbose=0)
_, lstm = model_new.evaluate(test_dataset, verbose=0)
print(f'The accuracy of linear model: {round(linear*100)} %\nThe accuracy of LSTM model:   {round(lstm*100)} %')