In [None]:
# 1. Split the data
df_known = df[df['Gender'].isin(['Male', 'Female'])].copy()
df_unknown = df[df['Gender'] == 'Other'].copy()

# 2. Encode target
df_known['Gender_Label'] = df_known['Gender'].map({'Male': 0, 'Female': 1})

# 3. Prepare data
X_known = df_known.drop(['Gender', 'Gender_Label'], axis=1)
y_known = df_known['Gender_Label']
X_unknown = df_unknown.drop(['Gender'], axis=1)

# Optional: Identify categorical columns excluding 'Gender'
cat_features = [
    'Workout Type',
    'Workout Intensity',
    'Mood Before Workout',
    'Mood After Workout'
]

# 4. Train-test split for model validation
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_known, y_known, test_size=0.2, random_state=42, stratify=y_known)

# 5. Train model
from catboost import CatBoostClassifier

gender_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    cat_features=cat_features,
    verbose=0,
    random_seed=42
)

gender_model.fit(X_train, y_train)

# 6. Evaluate
from sklearn.metrics import classification_report
y_pred_test = gender_model.predict(X_test)
print(classification_report(y_test, y_pred_test))

# 7. Predict Gender for “Other”
predicted_labels = gender_model.predict(X_unknown)

# Map predictions back to labels
predicted_gender = ['Male' if label == 0 else 'Female' for label in predicted_labels]

# Add predictions to the original df
df.loc[df['Gender'] == 'Other', 'Predicted Gender'] = predicted_gender

# Show results
df[df['Gender'] == 'Other'][['Gender', 'Predicted Gender']]
