In [39]:
!pip install wandb
!pip install keras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [48]:
import logging
import wandb
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from joblib import load
from joblib import dump
import tensorflow as tf
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
from sklearn.metrics import fbeta_score, precision_score, recall_score, accuracy_score

In [41]:
# Login to Weights & Biases
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [42]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    df = {key: value[:,tf.newaxis] for key, value in dataframe.items()}
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))

    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

def get_normalization_layer(name, dataset):
    # Create a Normalization layer for the feature.
    normalizer = tf.keras.layers.Normalization(axis=None)

    # Prepare a Dataset that only yields the feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the statistics of the data.
    normalizer.adapt(feature_ds)

    return normalizer

In [43]:
# global variables

# name of the artifact related to test dataset
artifact_test_name = "decision_tree/test.csv:latest"

# name of the model artifact
artifact_model_name = "decision_tree/model_export:latest"

In [44]:
# configure logging
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")

# reference for a logging obj
logger = logging.getLogger()

In [45]:
# initiate the wandb project
run = wandb.init(project="decision_tree",job_type="test")

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [46]:
logger.info("Downloading and reading test artifact")
test_data_path = run.use_artifact(artifact_test_name).file()
df_test = pd.read_csv(test_data_path)

# Extract the target from the features
logger.info("Extracting target from dataframe")
df_test['target'] = np.where(df_test['assessment']=='unacc', 0, 1)

#logger.info("Droping columns 'status' and 'url'")
#df_test = df_test.drop(columns=['status', 'url'])

test_ds = df_to_dataset(df_test,shuffle=False)

2022-07-25 21:03:18 Downloading and reading test artifact
2022-07-25 21:03:19 Extracting target from dataframe
  after removing the cwd from sys.path.


In [19]:
# Takes a look at test set
df_test.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,assessment,target
0,high,vhigh,4,more,big,low,unacc,0
1,high,vhigh,4,4,big,low,unacc,0
2,low,low,2,more,med,low,unacc,0
3,med,high,2,more,small,low,unacc,0
4,high,vhigh,5more,2,big,high,unacc,0


In [47]:
# Download inference artifact
logger.info("Downloading and load the exported model")
model_export_path = run.use_artifact(artifact_model_name).file()
model = joblib.load(model_export_path)

2022-07-25 21:03:22 Downloading and load the exported model


AttributeError: ignored

In [11]:
# predict
logger.info("Infering")
loss, accuracy = model.evaluate(test_ds)

logger.info("Accuracy: {}".format(accuracy))
logger.info("Loss: {}".format(loss))

2022-07-25 20:16:53 Infering


NameError: ignored

In [21]:
predict = model.predict(test_ds)

pred = []
targ = []

for i in range(0,len(predict)):
    targ.append(1.0 if df_test['target'][i] == 1.0 else 0.0)
    pred.append(1.0 if predict[i] >= 0.5 else 0.0)

labels = ["unacc", "acc"]

fig_confusion_matrix, ax = plt.subplots(1,1,figsize=(7,4))
ConfusionMatrixDisplay(confusion_matrix(pred,targ,labels=[1,0]),
                       display_labels=["unacc","acc"]).plot(values_format=".0f",ax=ax)

ax.set_xlabel("True Label")
ax.set_ylabel("Predicted Label")
plt.show()

# Evaluation Metrics
logger.info("Test Evaluation metrics")
fbeta = fbeta_score(targ, pred, beta=1, zero_division=1)
precision = precision_score(targ, pred, zero_division=1)
recall = recall_score(targ, pred, zero_division=1)
acc = accuracy_score(targ, pred)

logger.info("Test Accuracy: {}".format(acc))
logger.info("Test Precision: {}".format(precision))
logger.info("Test Recall: {}".format(recall))
logger.info("Test F1: {}".format(fbeta))

run.summary["Acc"] = acc
run.summary["Precision"] = precision
run.summary["Recall"] = recall
run.summary["F1"] = fbeta

NameError: ignored

In [None]:
# Uploading figures
logger.info("Uploading figures")
run.log(
    {
        "confusion_matrix": wandb.Image(fig_confusion_matrix),
        # "other_figure": wandb.Image(other_fig)
    }
)

In [None]:
run.finish()