# Dry Beans Dataset

Fit the dry beans model to a a decision tree model for demonstration purposes.

In [None]:
import sklearn.model_selection
import pandas as pd
import sklearn.tree
import pickle
import pprint
import yaml

In [6]:
CONFIG_FILENAME = "dry-beans-config.yaml"
MODEL_ARTIFACT_FILENAME = "dt_model.pkl"
FEATURES_FILENAME = "Dry_Beans_Dataset.csv"

# Load our config file:
with open(CONFIG_FILENAME, "r") as fin:
    config = yaml.safe_load(fin)
print(f"\n{pprint.pformat(config)}\n")

# Get our dataset:
df = pd.read_csv(FEATURES_FILENAME)
print("Dataframe loaded.", flush=True)

# Create our train / test split:
labels = df.pop("Class").tolist()
data = df.values.tolist()
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
    data, labels
)


{'bucket': 'bushong-exp-2025-data',
 'csv': 'Dry_Beans_Dataset.csv',
 'dataset_id': 'dry_beans',
 'key_file': 'svc-acct-key.json',
 'project_id': 'bushong-exp-2025',
 'region': 'us-central1',
 'svc_acct': 'dry-beans',
 'table': 'features'}

Dataframe loaded.


In [7]:
print("Fitting model...", flush=True)
dt_model = sklearn.tree.DecisionTreeClassifier()
dt_model.fit(x_train, y_train)

print("Scoring model...", flush=True)
accuracy = dt_model.score(x_test, y_test)
print(f"Accuracy: {accuracy:.3}")

print(f"Writing model: {MODEL_ARTIFACT_FILENAME}", flush=True)
with open(MODEL_ARTIFACT_FILENAME, "wb") as fout:
    pickle.dump(dt_model, fout)

print("Fit complete.", flush=True)

Fitting model...
Scoring model...
Accuracy: 0.892
Writing model: dt_model.pkl
Fit complete.
