In [None]:
# recruitment_prediction.ipynb

## 1. Import Library
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## 2. Load Data
df = pd.read_csv("dataset/aug_train.csv")
df.head()

## 3. Cek Missing Values
df.isnull().sum()

## 4. Distribusi Target
sns.countplot(x="target", data=df)
plt.title("Distribusi Kandidat Diterima vs Tidak")
plt.show()

## 5. Preprocessing
df = df.drop("enrollee_id", axis=1)
df.fillna({
    "education_level": "Unknown",
    "major_discipline": "Unknown",
    "experience": "0",
    "company_type": "Unknown",
    "company_size": "Unknown",
    "last_new_job": "0"
}, inplace=True)

## 6. Encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

## 7. Train-Test Split
from sklearn.model_selection import train_test_split
X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 8. Train Model
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

## 9. Evaluate
from sklearn.metrics import classification_report
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

## 10. Save Model
import joblib
joblib.dump(model, "../model/recruitment_model.pkl")
