In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

# Load the trained model
model_filename = 'trained_model.joblib'
model = joblib.load(model_filename)

# Load and preprocess new data
new_data = pd.read_csv('user_data.csv')  # Replace with the path to your new data CSV file
names = new_data['Full Name']  # Store the names for printing later
polling_stations = new_data['Polling Station']  # Store the polling stations for printing
new_data = new_data.drop(["Full Name", "Email", "DOB", "Position Type"], axis=1)  # Remove 'Position Type'
new_data['Experience'] = new_data['Experience'].str.replace(' years', '').astype(int)
new_data['Current Salary'] = new_data['Current Salary'].str.replace('[$,]', '', regex=True).astype(float)

label_encoder = LabelEncoder()
categorical_columns = ["Education Level", "Gender", "Current City", "Province", "District", "Polling Division", "Polling Station", "Current Position of the Job", "Name of the Institute","FamName","FamPollingStation"]

for column in categorical_columns:
    new_data[column] = label_encoder.fit_transform(new_data[column])

# Load the trained scaler
scaler = joblib.load('scaler.joblib')

# Scale new data
new_data_scaled = scaler.transform(new_data)

label_encoder_filename = 'label_encoder.joblib'
joblib.dump(label_encoder, label_encoder_filename)

# Make predictions
new_predictions = model.predict(new_data_scaled)

# Define a dictionary to map position codes to position names
position_names = {
    0: 'Clerk',
    1: 'Junior Polling Officer',
    2: 'Senior Polling Officer'
}

# Keep track of the best predictions
best_predictions = {position: [] for position in position_names.values()}

# Determine the best predictions for each position
for name, prediction, gender, polling_station in zip(names, new_predictions, new_data['Gender'], polling_stations):
    position_name = position_names[prediction]
    
    # Exclude females from being predicted as Senior Polling Officers
    if position_name == 'Senior Polling Officer' and gender == 'Female':
        position_name = 'Junior Polling Officer'
        
    if position_name == 'Clerk':
        best_predictions[position_name].append((name, polling_station))
    elif not best_predictions[position_name]:
        best_predictions[position_name].append((name, polling_station))

# Print the best predicted job positions and polling stations
print("Best Predicted Job Positions:")
for position_name, name_polling_list in best_predictions.items():
    if name_polling_list:
        print(f"{position_name}:")
        for name, polling_station in name_polling_list:
            print(f"  {name} (Polling Station: {polling_station})")


ValueError: could not convert string to float: 'Fed'