In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Load the data from CSV file
file_path = 'user_data.csv'
df = pd.read_csv(file_path)

# Preprocess the data
df = df.drop(["Full Name", "Email", "DOB"], axis=1)

# Convert 'Experience' column to numeric
df['Experience'] = df['Experience'].str.replace(' years', '').astype(int)

# Convert 'Current Salary' column to numeric
df['Current Salary'] = df['Current Salary'].str.replace('[$,]', '', regex=True).astype(float)

label_encoder = LabelEncoder()
categorical_columns = ["Education Level", "Gender", "Current City", "Province", "District", "Polling Division", "Polling Station", "Current Position of the Job", "Name of the Institute", "Position Type","FamName","FamPollingStation"]

for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column])

X = df.drop("Position Type", axis=1)
y = df["Position Type"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

scaler_filename = 'scaler.joblib'
joblib.dump(scaler, scaler_filename)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Save the trained model to a file
model_filename = 'trained_model.joblib'
joblib.dump(model, model_filename)

print("Model trained and saved successfully.")


Model trained and saved successfully.
