In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from catboost import CatBoostClassifier, Pool

In [2]:
df = pd.read_csv("training_data.csv") 
df.head()

Unnamed: 0,hour_of_day,day_of_week,month,holiday,weekday,summertime,temp,dew,humidity,precip,snow,snowdepth,windspeed,cloudcover,visibility,increase_stock
0,5,5,1,0,0,0,-7.2,-15.0,53.68,0.0,0,0.0,16.3,31.6,16.0,low_bike_demand
1,21,4,1,0,1,0,-1.3,-12.8,40.97,0.0,0,0.0,23.9,85.7,16.0,low_bike_demand
2,21,3,8,0,1,1,26.9,21.8,73.39,0.0,0,0.0,0.0,81.1,16.0,low_bike_demand
3,1,6,1,0,0,0,3.1,-4.0,59.74,0.0,0,0.0,19.2,0.0,16.0,low_bike_demand
4,17,0,3,0,1,0,11.7,-11.4,18.71,0.0,0,0.0,10.5,44.6,16.0,low_bike_demand


In [3]:
# Convert high/low to numeric target
df['target_num'] = df['increase_stock'].map({'high_bike_demand': 1, 'low_bike_demand': 0})

# Add weekend feature (5 = Saturday, 6 = Sunday)
df['is_weekend'] = df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)

df[['increase_stock', 'target_num', 'day_of_week', 'is_weekend']].head()


Unnamed: 0,increase_stock,target_num,day_of_week,is_weekend
0,low_bike_demand,0,5,1
1,low_bike_demand,0,4,0
2,low_bike_demand,0,3,0
3,low_bike_demand,0,6,1
4,low_bike_demand,0,0,0


In [4]:
X = df.drop(columns=['increase_stock', 'target_num'])
y = df['target_num']


In [5]:
categorical_features = [
    'hour_of_day',
    'day_of_week',
    'month',
    'holiday',
    'weekday',
    'summertime',
    'is_weekend'
]

# Make sure they exist
categorical_features = [c for c in categorical_features if c in X.columns]

# Convert to string (CatBoost handles better)
for col in categorical_features:
    X[col] = X[col].astype(str)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [7]:
train_pool = Pool(X_train, y_train, cat_features=categorical_features)
test_pool = Pool(X_test, y_test, cat_features=categorical_features)


In [17]:
class_weights = [1, 1.5]
model = CatBoostClassifier(
    iterations=600,
    learning_rate=0.03,
    depth=6,
    loss_function='Logloss',
    eval_metric='F1',
    class_weights=class_weights,
    random_seed=42,
    verbose=100
)

model.fit(train_pool, eval_set=test_pool, use_best_model=True)


0:	learn: 0.6646295	test: 0.7368421	best: 0.7368421 (0)	total: 3.54ms	remaining: 2.12s
100:	learn: 0.7865169	test: 0.7951807	best: 0.8087774 (49)	total: 128ms	remaining: 634ms
200:	learn: 0.8482143	test: 0.8318584	best: 0.8318584 (194)	total: 234ms	remaining: 464ms
300:	learn: 0.9077491	test: 0.8520710	best: 0.8520710 (241)	total: 339ms	remaining: 337ms
400:	learn: 0.9524506	test: 0.8157100	best: 0.8571429 (320)	total: 450ms	remaining: 223ms
500:	learn: 0.9702683	test: 0.7904192	best: 0.8571429 (320)	total: 555ms	remaining: 110ms
599:	learn: 0.9840348	test: 0.8165680	best: 0.8571429 (320)	total: 664ms	remaining: 0us

bestTest = 0.8571428571
bestIteration = 320

Shrink model to first 321 iterations.


<catboost.core.CatBoostClassifier at 0x1371a2e20>

In [18]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.940625
F1 Score: 0.8347826086956521

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.96       262
           1       0.84      0.83      0.83        58

    accuracy                           0.94       320
   macro avg       0.90      0.90      0.90       320
weighted avg       0.94      0.94      0.94       320


Confusion Matrix:
 [[253   9]
 [ 10  48]]


In [10]:
model.get_feature_importance(prettified=True)


Unnamed: 0,Feature Id,Importances
0,hour_of_day,42.202682
1,temp,17.26309
2,month,7.570137
3,humidity,7.426419
4,summertime,5.618877
5,day_of_week,4.207414
6,dew,3.452961
7,weekday,3.06228
8,cloudcover,2.40999
9,is_weekend,1.898395
