In [1]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.dummy import DummyClassifier

import xgboost as xgb


In [None]:
'''
conda activate catboost_env
conda install scikit-learn
jupyter notebook
'''

In [2]:
import catboost
from catboost import CatBoostClassifier

In [None]:
# Import Dataframes

In [3]:
checkout_df = pd.read_csv('checkout_df.csv', index_col = 0)
return_df = pd.read_csv('return_df.csv', index_col = 0)

In [4]:
checkout_df = checkout_df.drop('outgoing_bikes_count', axis=1)
return_df = return_df.drop('incoming_bikes_count', axis=1)

In [None]:
# Variable preparation for modeling

In [5]:
# season variable encoding
def map_seasons(df, column_name):
    season_mapping = {
        'winter': 0,
        'spring': 1,
        'summer': 2,
        'fall': 3
    }
    
    df[column_name] = df[column_name].map(season_mapping)
    return df

map_seasons(return_df, 'season')
map_seasons(checkout_df, 'season')

# time variable splitting
def time_preprocess(df, column_name):
    df['hour'] = df[column_name].apply(lambda x: int(x.split(':')[0]))
    df['minute'] = df[column_name].apply(lambda x: int(x.split(':')[1]))
    df.drop(columns=[column_name], inplace=True)
    return df

time_preprocess(return_df, 'stoptime')
time_preprocess(checkout_df, 'starttime')

Unnamed: 0,Checkout Kiosk ID,Year,season,day,demand,hour,minute
0,0.0,2022,3,0,Low,8,15
1,0.0,2022,3,0,Medium,9,45
2,0.0,2022,3,0,Medium,10,0
3,0.0,2022,3,0,Low,10,30
4,0.0,2022,3,0,Medium,11,0
...,...,...,...,...,...,...,...
491657,4879.0,2023,0,6,Low,19,45
491658,4879.0,2023,0,6,Low,20,0
491659,4879.0,2023,0,6,Medium,20,30
491660,4879.0,2023,0,6,Low,20,45


In [6]:
return_df

Unnamed: 0,Return Kiosk ID,Year,season,day,demand,hour,minute
0,0.0,2022,3,0,Low,4,0
1,0.0,2022,3,0,Low,8,45
2,0.0,2022,3,0,Low,9,45
3,0.0,2022,3,0,Medium,11,45
4,0.0,2022,3,0,Medium,12,0
...,...,...,...,...,...,...,...
532836,4879.0,2023,0,6,Low,20,15
532837,4879.0,2023,0,6,Low,20,30
532838,4879.0,2023,0,6,Low,20,45
532839,4879.0,2023,0,6,Low,21,30


In [None]:
# Checkout modeling

In [8]:
# Get X and Y
c_x = checkout_df.drop(columns=['demand']) # features
c_y = checkout_df['demand'] # target 


In [None]:
# Random Forest Classifier 

In [31]:
from joblib import dump
# set up and train model
x_train, x_test, y_train, y_test = train_test_split(c_x, c_y, test_size=0.2, random_state=42)
checkout_rfc = RandomForestClassifier(n_estimators=50, max_depth=10, random_state = 0, verbose=True, n_jobs=-1)
checkout_rfc.fit(x_train, y_train)
dump(rfc, 'Models/rfc_checkout.joblib') 

# test model
y_prediction = checkout_rfc.predict(x_test)

# assess model performance
checkout_performance = classification_report(y_test, y_prediction)
print(checkout_performance)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.3s finished


FileNotFoundError: [Errno 2] No such file or directory: 'Models/rfc_checkout.joblib'

In [14]:
from sklearn.model_selection import cross_val_score

# Create a RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=50, max_depth=10, random_state = 0, verbose=True, n_jobs=-1)

# Perform 5-fold cross-validation and compute accuracy scores
accuracy_scores = cross_val_score(rfc, c_x, c_y, cv=5, scoring='accuracy')

# Calculate average accuracy
average_accuracy = accuracy_scores.mean()

print("Average Accuracy:", average_accuracy)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:    8.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:    8.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 184 tasks      | e

Average Accuracy: 0.36959128287806814


[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:    0.3s finished


In [None]:
# XGBoost Classifier

In [19]:
# Get X and Y
c_x = checkout_df.drop(columns=['demand'])  # features
c_y = checkout_df['demand']  # target 

# Create a label-to-number mapping
label_mapping = {'High': 0, 'Low': 1, 'Medium': 2}

# Convert labels to numerical format
c_y_encoded = c_y.map(label_mapping)

# set up and train model
x_train, x_test, y_train, y_test = train_test_split(c_x, c_y_encoded, test_size=0.2, random_state=42)
xgb_model = xgb.XGBClassifier(random_state=0, verbosity=1, n_jobs=-1, max_depth=3, n_estimators=50)
xgb_model.fit(x_train, y_train)

# test model
y_prediction = xgb_model.predict(x_test)

# assess model performance
checkout_performance = classification_report(y_test, y_prediction)
print(checkout_performance)

              precision    recall  f1-score   support

           0       0.60      0.59      0.60     29596
           1       0.54      0.60      0.57     35307
           2       0.41      0.36      0.39     33430

    accuracy                           0.52     98333
   macro avg       0.52      0.52      0.52     98333
weighted avg       0.51      0.52      0.51     98333



In [20]:
# Create a RandomForestClassifier
xgb_model = xgb.XGBClassifier(random_state=0, verbosity=1, n_jobs=-1, max_depth=3, n_estimators=50)
# Perform 5-fold cross-validation and compute accuracy scores
accuracy_scores = cross_val_score(xgb_model, c_x, c_y_encoded, cv=5, scoring='accuracy')

# Calculate average accuracy
average_accuracy = accuracy_scores.mean()

print("Average Accuracy:", average_accuracy)

Average Accuracy: 0.3439395958923089


In [None]:
# Catboost Classifier

In [21]:
# Get X and Y
c_x = checkout_df.drop(columns=['demand']) # features
c_y = checkout_df['demand'] # target 

# set up and train model
x_train, x_test, y_train, y_test = train_test_split(c_x, c_y, test_size=0.2, random_state=42)
catboost_model = CatBoostClassifier(random_state=0, verbose=1, thread_count=-1, max_depth=3, n_estimators=50)
catboost_model.fit(x_train, y_train)

# test model
y_prediction = catboost_model.predict(x_test)

# assess model performance
checkout_performance = classification_report(y_test, y_prediction)
print(checkout_performance)


Learning rate set to 0.5
0:	learn: 1.0652174	total: 123ms	remaining: 6.01s
1:	learn: 1.0468776	total: 152ms	remaining: 3.66s
2:	learn: 1.0351549	total: 181ms	remaining: 2.84s
3:	learn: 1.0277240	total: 216ms	remaining: 2.49s
4:	learn: 1.0213377	total: 248ms	remaining: 2.23s
5:	learn: 1.0164405	total: 277ms	remaining: 2.03s
6:	learn: 1.0101832	total: 307ms	remaining: 1.89s
7:	learn: 1.0066263	total: 351ms	remaining: 1.84s
8:	learn: 1.0015692	total: 388ms	remaining: 1.77s
9:	learn: 0.9989580	total: 420ms	remaining: 1.68s
10:	learn: 0.9950316	total: 449ms	remaining: 1.59s
11:	learn: 0.9898437	total: 480ms	remaining: 1.52s
12:	learn: 0.9862472	total: 509ms	remaining: 1.45s
13:	learn: 0.9845440	total: 542ms	remaining: 1.39s
14:	learn: 0.9822549	total: 580ms	remaining: 1.35s
15:	learn: 0.9782428	total: 612ms	remaining: 1.3s
16:	learn: 0.9762763	total: 643ms	remaining: 1.25s
17:	learn: 0.9742198	total: 679ms	remaining: 1.21s
18:	learn: 0.9728064	total: 710ms	remaining: 1.16s
19:	learn: 0.9718

In [22]:
# Create a RandomForestClassifier
catboost_model = CatBoostClassifier(random_state=0, verbose=1, thread_count=-1, max_depth=3, n_estimators=50)
# Perform 5-fold cross-validation and compute accuracy scores
accuracy_scores = cross_val_score(catboost_model, c_x, c_y, cv=5, scoring='accuracy')

# Calculate average accuracy
average_accuracy = accuracy_scores.mean()

print("Average Accuracy:", average_accuracy)

Learning rate set to 0.5
0:	learn: 1.0668163	total: 50.6ms	remaining: 2.48s
1:	learn: 1.0478620	total: 80.9ms	remaining: 1.94s
2:	learn: 1.0368131	total: 112ms	remaining: 1.75s
3:	learn: 1.0281977	total: 154ms	remaining: 1.77s
4:	learn: 1.0187529	total: 202ms	remaining: 1.82s
5:	learn: 1.0113421	total: 242ms	remaining: 1.78s
6:	learn: 1.0019025	total: 278ms	remaining: 1.71s
7:	learn: 0.9973519	total: 309ms	remaining: 1.62s
8:	learn: 0.9936338	total: 341ms	remaining: 1.55s
9:	learn: 0.9898003	total: 385ms	remaining: 1.54s
10:	learn: 0.9854947	total: 426ms	remaining: 1.51s
11:	learn: 0.9831001	total: 462ms	remaining: 1.46s
12:	learn: 0.9799690	total: 506ms	remaining: 1.44s
13:	learn: 0.9782551	total: 543ms	remaining: 1.4s
14:	learn: 0.9761079	total: 576ms	remaining: 1.34s
15:	learn: 0.9734047	total: 613ms	remaining: 1.3s
16:	learn: 0.9715636	total: 651ms	remaining: 1.26s
17:	learn: 0.9669742	total: 695ms	remaining: 1.23s
18:	learn: 0.9634812	total: 732ms	remaining: 1.2s
19:	learn: 0.9621

13:	learn: 0.9695992	total: 453ms	remaining: 1.17s
14:	learn: 0.9656060	total: 487ms	remaining: 1.14s
15:	learn: 0.9619486	total: 520ms	remaining: 1.1s
16:	learn: 0.9599657	total: 552ms	remaining: 1.07s
17:	learn: 0.9569380	total: 590ms	remaining: 1.05s
18:	learn: 0.9548481	total: 623ms	remaining: 1.02s
19:	learn: 0.9532569	total: 657ms	remaining: 985ms
20:	learn: 0.9501368	total: 693ms	remaining: 957ms
21:	learn: 0.9480600	total: 728ms	remaining: 926ms
22:	learn: 0.9467456	total: 757ms	remaining: 888ms
23:	learn: 0.9459675	total: 789ms	remaining: 855ms
24:	learn: 0.9449802	total: 821ms	remaining: 821ms
25:	learn: 0.9432776	total: 854ms	remaining: 788ms
26:	learn: 0.9403184	total: 890ms	remaining: 758ms
27:	learn: 0.9390804	total: 929ms	remaining: 730ms
28:	learn: 0.9375509	total: 966ms	remaining: 700ms
29:	learn: 0.9367836	total: 999ms	remaining: 666ms
30:	learn: 0.9358614	total: 1.03s	remaining: 634ms
31:	learn: 0.9352591	total: 1.07s	remaining: 601ms
32:	learn: 0.9338574	total: 1.1s

In [39]:
# Calculate the most frequent class
most_frequent_class = checkout_df['demand'].value_counts().idxmax()

count = checkout_df['demand'].value_counts()[most_frequent_class]
total = len(checkout_df)

guess_percent = count/total

guess_percent



0.35875255765139463

In [43]:
# Calculate the most frequent class
most_frequent_class = checkout_df['demand'].value_counts().idxmax()

# Create a majority class baseline classifier
baseline_classifier = DummyClassifier(strategy="constant", constant=most_frequent_class)

# Perform 5-fold cross-validation
cross_val_predictions = cross_val_predict(baseline_classifier, checkout_df.drop(columns=['demand']), checkout_df['demand'], cv=5)

# Calculate classification report for cross-validation predictions
cross_val_performance = classification_report(checkout_df['demand'], cross_val_predictions)

# Calculate cross-validation accuracy scores
accuracy_scores = cross_val_score(baseline_classifier, checkout_df.drop(columns=['demand']), checkout_df['demand'], cv=5, scoring='accuracy')

# Calculate average accuracy
average_accuracy = accuracy_scores.mean()

# Print average accuracy and classification report
print("Average Accuracy:", average_accuracy)
print("Classification Report:")
print(cross_val_performance)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Baseline Majority Class Average Accuracy: 0.3587525576602992

Classification Report for Cross-Validation:
              precision    recall  f1-score   support

        High       0.00      0.00      0.00    147268
         Low       0.36      1.00      0.53    176385
      Medium       0.00      0.00      0.00    168009

    accuracy                           0.36    491662
   macro avg       0.12      0.33      0.18    491662
weighted avg       0.13      0.36      0.19    491662



In [9]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'max_depth': [None, 10, 20, 30],  # Define the range of tree depths to search
    'n_estimators': [50, 100, 200],  # Define the range of number of trees
    # Add other hyperparameters to tune if needed
}

# Create a Random Forest Classifier
rfc = RandomForestClassifier(random_state=0, verbose=True, n_jobs=-1)

# Create a GridSearchCV object
grid_search = GridSearchCV(rfc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search.fit(c_x, c_y)

# Get the best hyperparameters and the associated performance
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Accuracy:", best_score)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   38.9s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   40.8s
[Parallel(n_jobs=-1)]: Done  34 tasks     

[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    5.0s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   51.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.6s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    2.1s finished
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   53.0s finished
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   53.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  37 tasks      | elapsed:    0.8s
[Parallel(n_jobs=8)]: Done  35 tasks      | elapsed:    1.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    2.9s finished
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    3.1s finished
[Parallel(n_jobs=8)]: Done 184 t

[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:   36.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   29.6s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.4s
[Parallel(n_jobs=8)]: Done  50 out of  50 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   29.5s
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  1.2min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   32.2s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   32.1s
[Parallel(n_job

[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  1.7min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   11.7s finished
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  1.8min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   10.6s finished
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   48.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    6.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  1.8min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n

Best Parameters: {'max_depth': 10, 'n_estimators': 50}
Best Cross-Validation Accuracy: 0.3727092868728216


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.6s finished


In [17]:
from sklearn.model_selection import RandomizedSearchCV

# Define the parameter distribution
param_dist = {
    'max_depth': [3, 5, 7, 10],  # Define the range of tree depths to search
    'n_estimators': [50, 100, 150, 200],  # Define the range of number of trees
    # Add other hyperparameters to tune if needed
}

# Create an XGBoost Classifier
xgb_model = xgb.XGBClassifier(random_state=0, verbosity=1, n_jobs=-1)

# Create a RandomizedSearchCV object
random_search = RandomizedSearchCV(xgb_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the random search to the data
random_search.fit(c_x, c_y_encoded)

# Get the best hyperparameters and the associated performance
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Accuracy:", best_score)


Best Parameters: {'n_estimators': 50, 'max_depth': 3}
Best Cross-Validation Accuracy: 0.3439395958923089


In [18]:
# Define the parameter distribution
param_dist = {
    'max_depth': [3, 5, 7, 10],  # Define the range of tree depths to search
    'iterations': [50, 100, 150, 200],  # Define the range of number of trees
    # Add other hyperparameters to tune if needed
}

# Create a CatBoost Classifier
catboost_model = CatBoostClassifier(random_state=0, verbose=0, thread_count=-1)

# Create a RandomizedSearchCV object
random_search = RandomizedSearchCV(catboost_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the random search to the data
random_search.fit(c_x, c_y)

# Get the best hyperparameters and the associated performance
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Accuracy:", best_score)


Best Parameters: {'max_depth': 3, 'iterations': 50}
Best Cross-Validation Accuracy: 0.33149598959676724


In [None]:
# Return modeling

In [23]:
# Get X and Y
r_x = return_df.drop(columns=['demand']) # features
r_y = return_df['demand'] # target 

In [29]:
# set up and train model
x_train, x_test, y_train, y_test = train_test_split(r_x, r_y, test_size=0.2, random_state=42)
return_rfc = RandomForestClassifier(n_estimators=50, max_depth=10, random_state = 0, verbose=True, n_jobs=-1)
return_rfc.fit(x_train, y_train)

# test model
y_prediction = return_rfc.predict(x_test)

# assess model performance
checkout_performance = classification_report(y_test, y_prediction)
print(checkout_performance)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done  50 out of  50 | elapsed:    0.1s finished


              precision    recall  f1-score   support

        High       0.58      0.51      0.55     30120
         Low       0.54      0.61      0.58     39376
      Medium       0.40      0.39      0.40     37073

    accuracy                           0.51    106569
   macro avg       0.51      0.51      0.51    106569
weighted avg       0.51      0.51      0.51    106569



In [30]:
# Export RFC Models
import joblib

# Export Checkout model
checkout_rfc_filename = 'checkout_rfc.pkl'
joblib.dump(checkout_rfc, checkout_rfc_filename)

# Export Return model
return_rfc_filename = 'return_rfc.pkl'
joblib.dump(return_rfc, return_rfc_filename)

['return_rfc.pkl']