# Amazon.com - Employee Access Challenge

This code implements a CatBoost classifier to predict Amazon employee access permissions, training on historical data and saving the model for deployment, with a streamlined CPU-based approach achieving efficient binary classification.

Dataset: https://www.kaggle.com/competitions/amazon-employee-access-challenge/data

Hugging Face: https://huggingface.co/spaces/alperugurcan/access-decision

In [3]:
import pandas as pd
from catboost import CatBoostClassifier
import joblib

# Load data
train_df = pd.read_csv('/kaggle/input/amazon-employee-access-challenge/train.csv')
test_df = pd.read_csv('/kaggle/input/amazon-employee-access-challenge/test.csv')

# Prepare features and target
X = train_df.drop('ACTION', axis=1)
y = train_df['ACTION']
test_ids = test_df['id']

# Initialize model with CPU parameters from the start
model = CatBoostClassifier(
    iterations=100,
    learning_rate=0.1,
    depth=6,
    verbose=0,
    task_type='CPU',  # Set CPU from the start
    bootstrap_type='Bernoulli',
    subsample=0.8,
    eval_metric='Accuracy',  # Changed from AUC to Accuracy
    early_stopping_rounds=20
)

# Train model
model.fit(X, y)

# Save model
joblib.dump(model, 'amazon_access_model.joblib', compress=3)

# Verify the save
try:
    loaded_model = joblib.load('amazon_access_model.joblib')
    print("Model saved successfully!")
except Exception as e:
    print(f"Error saving model: {e}")

# Make predictions with loaded model
predictions = loaded_model.predict(test_df)

# Create submission
pd.DataFrame({
    "Id": test_ids,
    "Action": predictions
}).to_csv("submission.csv", index=False)

Model saved successfully!
