In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import ray
import warnings
warnings.filterwarnings("ignore")

### Evaluation

<p style="font-weight: bold; background-color: yellow; padding: 10px; display: inline;">TODO</p>

- overall diagram for this section (more detailed than overall e2e)
- mention that eval is batch inference

In [3]:
import mlflow
from sklearn.metrics import precision_recall_fscore_support
from urllib.parse import urlparse

In [4]:
from doggos.infer import TorchPredictor
from doggos.utils import add_class, batch_metric

In [5]:
# Get best run
model_registry = "/mnt/user_storage/mlflow"
experiment_name = "doggos"
mlflow.set_tracking_uri(f"file:{model_registry}")
sorted_runs = mlflow.search_runs(
    experiment_names=[experiment_name], 
    order_by=["metrics.val_loss ASC"])
best_run = sorted_runs.iloc[0]

In [6]:
# Load and preproces eval dataset
artifacts_dir = urlparse(best_run.artifact_uri).path
predictor = TorchPredictor.from_artifacts_dir(artifacts_dir=artifacts_dir)
test_ds = ray.data.read_images("s3://doggos-dataset/test", include_paths=True)
test_ds = test_ds.map(add_class)
test_ds = predictor.preprocessor.transform(ds=test_ds)

2025-02-19 21:47:26,528	INFO worker.py:1636 -- Connecting to existing Ray cluster at address: 10.0.55.44:6379...
2025-02-19 21:47:26,537	INFO worker.py:1812 -- Connected to Ray cluster. View the dashboard at [1m[32mhttps://session-3apyclh1wkjd883fht6vsy8xjh.i.anyscaleuserdata.com [39m[22m
2025-02-19 21:47:26,544	INFO packaging.py:393 -- Pushing file package 'gcs://_ray_pkg_7d4fc9210c75f6811c389bb991914b6b7f18e4e5.zip' (2.86MiB) to Ray cluster...
2025-02-19 21:47:26,571	INFO packaging.py:406 -- Successfully pushed file package 'gcs://_ray_pkg_7d4fc9210c75f6811c389bb991914b6b7f18e4e5.zip'.


In [7]:
# y_pred (batch inference)
pred_ds = test_ds.map_batches(
    predictor,
    fn_kwargs={"device": "cuda"},
    concurrency=4,
    batch_size=64,
    num_gpus=1,
)
pred_ds.take(1)

2025-02-19 21:47:26,786	INFO dataset.py:2631 -- Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.
2025-02-19 21:47:26,792	INFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-02-19_20-21-51_646153_2288/logs/ray-data
2025-02-19 21:47:26,793	INFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ListFiles] -> TaskPoolMapOperator[PartitionFiles] -> TaskPoolMapOperator[ReadFiles] -> TaskPoolMapOperator[Map(add_class)->Map(Preprocessor.convert_to_label)] -> ActorPoolMapOperator[MapBatches(EmbeddingGenerator)] -> TaskPoolMapOperator[MapBatches(drop_columns)] -> TaskPoolMapOperator[MapBatches(TorchPredictor)] -> LimitOperator[limit=1]


Running 0: 0.00 row [00:00, ? row/s]

- ListFiles 1: 0.00 row [00:00, ? row/s]

- PartitionFiles 2: 0.00 row [00:00, ? row/s]

- ReadFiles 3: 0.00 row [00:00, ? row/s]

- Map(add_class)->Map(Preprocessor.convert_to_label) 4: 0.00 row [00:00, ? row/s]

- MapBatches(EmbeddingGenerator) 5: 0.00 row [00:00, ? row/s]

- MapBatches(drop_columns) 6: 0.00 row [00:00, ? row/s]

- MapBatches(TorchPredictor) 7: 0.00 row [00:00, ? row/s]

- limit=1 8: 0.00 row [00:00, ? row/s]

[36m(autoscaler +25s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.


[{'path': 'doggos-dataset/test/chihuahua/chihuahua_3006.jpg',
  'class': 'chihuahua',
  'label': 7,
  'embedding': array([ 1.55004308e-01,  3.50220859e-01,  6.03504479e-03,  6.07165873e-01,
          1.74657986e-01,  8.32328424e-02,  4.55964990e-02,  2.52718627e-01,
          1.10991307e-01,  6.14749454e-02,  1.04563266e-01, -1.77179605e-01,
          3.77644926e-01,  1.32574603e-01,  9.00012702e-02, -1.21757388e-04,
          1.80395916e-01, -2.37003922e-01,  3.23602378e-01,  6.06229901e-03,
         -7.01510489e-01, -2.49777824e-01,  4.55600441e-01,  1.16429061e-01,
          1.82448328e-03,  2.85436958e-03,  1.19094074e-01, -9.10363644e-02,
         -4.61813845e-02, -1.60100311e-02,  3.78849730e-02, -1.79648995e-02,
         -8.57185870e-02,  6.68228492e-02, -4.17238951e-01,  1.05522819e-01,
          2.25980356e-02,  1.94572985e-01, -2.90276945e-01,  4.56804574e-01,
         -9.43910182e-02,  4.28021342e-01,  3.46948087e-01,  2.31809869e-01,
          9.72746089e-02,  8.10200870e-0

<p style="font-weight: bold; background-color: yellow; padding: 10px; display: inline;">TODO</p>

- mention how Ray data is hyper optimized for throughput so preserving order is not a priority. But for evaluation, this is crucial!
    - mention the flag to preserve order but how that's not efficient
    - offer your solution of preserving entire row and calculating batch metrics

In [8]:
# Calculate metrics
metrics_ds = pred_ds.map_batches(batch_metric)
aggregate_metrics = metrics_ds.sum(["precision", "recall", "f1", "count"])
precision = aggregate_metrics["sum(precision)"] / aggregate_metrics["sum(count)"]
recall = aggregate_metrics["sum(recall)"] / aggregate_metrics["sum(count)"]
f1 = aggregate_metrics["sum(f1)"] / aggregate_metrics["sum(count)"]

2025-02-19 21:47:59,678	INFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-02-19_20-21-51_646153_2288/logs/ray-data
2025-02-19 21:47:59,679	INFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ListFiles] -> TaskPoolMapOperator[PartitionFiles] -> TaskPoolMapOperator[ReadFiles] -> TaskPoolMapOperator[Map(add_class)->Map(Preprocessor.convert_to_label)] -> ActorPoolMapOperator[MapBatches(EmbeddingGenerator)] -> TaskPoolMapOperator[MapBatches(drop_columns)] -> TaskPoolMapOperator[MapBatches(TorchPredictor)] -> TaskPoolMapOperator[MapBatches(batch_metric)] -> AllToAllOperator[Aggregate] -> LimitOperator[limit=1]


Running 0: 0.00 row [00:00, ? row/s]

- ListFiles 1: 0.00 row [00:00, ? row/s]

- PartitionFiles 2: 0.00 row [00:00, ? row/s]

- ReadFiles 3: 0.00 row [00:00, ? row/s]

- Map(add_class)->Map(Preprocessor.convert_to_label) 4: 0.00 row [00:00, ? row/s]

- MapBatches(EmbeddingGenerator) 5: 0.00 row [00:00, ? row/s]

- MapBatches(drop_columns) 6: 0.00 row [00:00, ? row/s]

- MapBatches(TorchPredictor) 7: 0.00 row [00:00, ? row/s]

- MapBatches(batch_metric) 8: 0.00 row [00:00, ? row/s]

- Aggregate 9: 0.00 row [00:00, ? row/s]

Sort Sample 10:   0%|          | 0.00/1.00 [00:00<?, ? row/s]

Shuffle Map 11:   0%|          | 0.00/1.00 [00:00<?, ? row/s]

Shuffle Reduce 12:   0%|          | 0.00/1.00 [00:00<?, ? row/s]

- limit=1 13: 0.00 row [00:00, ? row/s]

[36m(MapBatches(batch_metric) pid=17403, ip=10.0.103.180)[0m   _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1: {f1:.2f}")

Precision: 0.98
Recall: 0.85
F1: 0.90


In [10]:
import IPython
IPython.get_ipython().kernel.do_shutdown(restart=True)

{'status': 'ok', 'restart': True}

: 