In [33]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, StandardScaler
import os

# Load data
csv_path = os.path.join('..', 'Data', 'Diagested_data', 'Numeric_resume_data.csv')
data = pd.read_csv(csv_path)
df = pd.DataFrame(data)

# Define trait columns
trait_columns = [
    'Leadership', 'Communication', 'Teamwork', 'Problem Solving', 'Creativity',
    'Adaptability', 'Work Ethic', 'Time Management', 'Interpersonal Skills', 
    'Attention to Detail', 'Initiative', 'Analytical Thinking', 'Emotional Intelligence', 
    'Integrity', 'Resilience', 'Cultural Awareness', 'Programming Languages', 
    'Technical Skills', 'Office Tools'
]

# Convert job titles to numerical labels
job_titles = df['Job Title'].unique()
job_domains = df[['Job Title', 'Domain']].drop_duplicates().set_index('Job Title')['Domain'].to_dict()
job_title_to_index = {title: idx for idx, title in enumerate(job_titles)}
index_to_job_title = {idx: title for title, idx in job_title_to_index.items()}
df['Job Label'] = df['Job Title'].map(job_title_to_index)

# Prepare feature and target arrays
X = df[trait_columns].values
y = df['Job Label'].values

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding
num_classes = len(job_titles)
lb = LabelBinarizer()
lb.fit(y)  # Fit on the entire set of labels to ensure it has all classes
y_train_onehot = lb.transform(y_train)
y_test_onehot = lb.transform(y_test)

# Define the neural network model
model = Sequential([
    Dense(512, input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.01),
    Dropout(0.5),
    Dense(256),
    LeakyReLU(alpha=0.01),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_onehot, epochs=100, batch_size=64, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test_onehot)
print(f'Test Accuracy: {accuracy:.2f}')

# Function to recommend top 5 jobs with their domains
def recommend_jobs(trait_scores, top_n=5):
    # Predict job probabilities for the given trait scores
    trait_scores = np.array(trait_scores).reshape(1, -1)
    trait_scores_scaled = scaler.transform(trait_scores)  # Scale the input trait scores
    predictions = model.predict(trait_scores_scaled)
    
    # Get the indices of the top N probabilities
    top_indices = np.argsort(predictions[0])[-top_n:][::-1]
    
    # Map indices back to job titles and domains
    top_jobs = [(index_to_job_title[idx], job_domains[index_to_job_title[idx]]) for idx in top_indices]
    return top_jobs

# Example trait scores
trait_scores = [2, 1, 3, 3, 2, 1, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 2, 1, 0]

# Recommend top 5 jobs
recommended_jobs = recommend_jobs(trait_scores)
print('Recommended Jobs:')
for job, domain in recommended_jobs:
    print(f'- {job} in the field of {domain}')

# Save the model to a file
model_save_path = os.path.join('..', 'Code', 'Pickles', 'job_recommendation_algorith.h5')
model.save(model_save_path)
print(f'Model saved to {model_save_path}')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78