In [24]:
!pip install catboost



In [25]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [26]:
# Simple data
data = {
    'num_feature_1': [1.2, 2.4, 3.1, 0.5, 4.8],  # Numerical features
    'num_feature_2': [0.3, 1.1, 0.7, 1.3, 2.2],  # Numerical features
    'cat_feature_1': ['A', 'B', 'A', 'B', 'A'],   # Categorical feature
    'target': [0, 1, 0, 1, 0]  # Binary target
}

In [27]:
# Create DataFrame
df = pd.DataFrame(data)

In [28]:
df

Unnamed: 0,num_feature_1,num_feature_2,cat_feature_1,target
0,1.2,0.3,A,0
1,2.4,1.1,B,1
2,3.1,0.7,A,0
3,0.5,1.3,B,1
4,4.8,2.2,A,0


In [29]:
# Encode categorical features as 'category'
df['cat_feature_1'] = df['cat_feature_1'].astype('category')

In [30]:
df

Unnamed: 0,num_feature_1,num_feature_2,cat_feature_1,target
0,1.2,0.3,A,0
1,2.4,1.1,B,1
2,3.1,0.7,A,0
3,0.5,1.3,B,1
4,4.8,2.2,A,0


In [31]:
# Split data
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
# Initialize and train CatBoost model
model = CatBoostClassifier(iterations=100, depth=3, learning_rate=0.1, loss_function='Logloss', verbose=0, cat_features=['cat_feature_1'])
model.fit(X_train, y_train)

<catboost.core.CatBoostClassifier at 0x7e6ad94dc610>

In [33]:
# Make predictions
y_pred = model.predict(X_test)

In [34]:
# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.0000


In [35]:
# Predict on a new sample
new_sample = pd.DataFrame([[3.5, 1.0, 'B']], columns=X.columns)
new_sample['cat_feature_1'] = new_sample['cat_feature_1'].astype('category')

In [36]:
new_prediction = model.predict(new_sample)

In [37]:
new_prediction

array([0])