In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [19]:
# Load your dataset
data = pd.read_csv('user_data.csv')

# Drop irrelevant columns and missing values
data = data.drop(['Full Name', 'DOB', 'Email', 'Current City', 'Name of the Institute'], axis=1)
data = data.dropna()

# Encode categorical features
label_encoder = LabelEncoder()
data['Education Level'] = label_encoder.fit_transform(data['Education Level'])
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['Current Position of the Job'] = label_encoder.fit_transform(data['Current Position of the Job'])
data['Polling Station'] = label_encoder.fit_transform(data['Polling Station'])


In [20]:
data['Experience'] = data['Experience'].str.extract('(\d+)').astype(float)

In [21]:
# Preprocess 'Current Salary' column
data['Current Salary'] = data['Current Salary'].str.replace('[\$,]', '', regex=True).astype(float)

In [22]:
# Split the data into features (X) and target labels (y)
X = data.drop('Position Type', axis=1)
y = data['Position Type']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [23]:
# Preprocess 'Experience' column


In [24]:
# Initialize the Random Forest classifier
clf = RandomForestClassifier()

# Train the model
clf.fit(X_train, y_train)


RandomForestClassifier()

In [25]:
# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))


                        precision    recall  f1-score   support

                 Clerk       1.00      0.95      0.97        40
Junior Polling Officer       1.00      1.00      1.00        24
Senior Polling Officer       0.95      1.00      0.97        36

              accuracy                           0.98       100
             macro avg       0.98      0.98      0.98       100
          weighted avg       0.98      0.98      0.98       100



In [46]:
# Prepare a new data instance for prediction (replace with your own values)
new_data = pd.DataFrame({
    'Age': [25],
    'Education Level': [3],
    'Gender': [1],
    'Polling Station': [2],
    'Current Position of the Job': [1],
    'Experience': [10],
    'Current Salary': [50000]
})

# Make a prediction
prediction = clf.predict(new_data)
print("Predicted Position:", prediction)


Predicted Position: ['Senior Polling Officer']
