In [1]:
%load_ext autoreload
%autoreload 2

import os 
os.environ["DCBENCH_CONFIG"] = "/home/sabri/code/dcbench/dcbench-config.yaml"

![alt text](https://datacentricai.cc/images/logos/slicediscovery_hu6687ef99f16931b2f0d65c72bfc1ddad_90737_250x250_fit_box_2.png "Slice Discovery Logo")

# `dcbench`: Slice Discovery Demo

This notebook showcases the interface for Slice Discovery in `dcbench`. You can read more about the task in our [documentation](https://dcbench.readthedocs.io/en/latest/tasks.html#slice-discovery) and on our [website](https://www.datacentricai.cc/benchmark/).

Let's start off by importing `dcbench` and listing the supported tasks.

In [2]:
import dcbench
dcbench.tasks

ModuleNotFoundError: No module named 'dcbench'

## Exploring problems

In this notebook, we'll focus on the [Slice Discovery](https://dcbench.readthedocs.io/en/latest/tasks.html#slice-discovery) task. 

In the `dcbench` API, each task is represented by a `dcbench.Task` object that can be accessed by *task_id* (*e.g.* `dcbench.tasks["slice_discovery"]`).

In [41]:
slice_discovery = dcbench.tasks["slice_discovery"]

Each task features a collection of *problems* (*i.e.* instances of the task). For example, the `slice_discovery` task includes dozens of problems across a number of different datasets.

We can list the problems with 

In [42]:
slice_discovery.problems

Unnamed: 0,alpha,dataset,n_pred_slices,slice_category,slice_names,target_name
p_117306,0.0171,imagenet,5,rare,[craft.n.02],vehicle.n.01
p_117341,0.0171,imagenet,5,rare,[cart.n.01],vehicle.n.01
p_117406,0.0171,imagenet,5,rare,[rocket.n.01],vehicle.n.01
p_117634,0.0171,imagenet,5,rare,[barrow.n.03],vehicle.n.01
p_117980,0.0171,imagenet,5,rare,[bicycle.n.01],vehicle.n.01
p_118007,0.0171,imagenet,5,rare,[wagon.n.01],vehicle.n.01
p_118045,0.0171,imagenet,5,rare,[motorcycle.n.01],vehicle.n.01
p_118259,0.0171,imagenet,5,rare,[hat.n.01],clothing.n.01
p_118311,0.0171,imagenet,5,rare,[shirt.n.01],clothing.n.01
p_118660,0.0171,imagenet,5,rare,[menu.n.02],food.n.01


In [44]:
problem = slice_discovery.problems["p_118045"]

In [45]:
problem.artifacts

{'activations': <dcbench.common.artifact.DataPanelArtifact at 0x7f34da93ed30>,
 'base_dataset': <dcbench.common.artifact.VisionDatasetArtifact at 0x7f34da936fa0>,
 'clip': <dcbench.common.artifact.DataPanelArtifact at 0x7f34da936d00>,
 'model': <dcbench.common.artifact.ModelArtifact at 0x7f34da93edc0>,
 'test_predictions': <dcbench.common.artifact.DataPanelArtifact at 0x7f34da93ee20>,
 'test_slices': <dcbench.common.artifact.DataPanelArtifact at 0x7f34da93ee80>,
 'val_predictions': <dcbench.common.artifact.DataPanelArtifact at 0x7f34da93eee0>}

In [47]:
problem["base_dataset"]

Unnamed: 0,id (PandasSeriesColumn),image (ImageColumn),name (PandasSeriesColumn),synset (PandasSeriesColumn)
0,n01440764_10026,,"tench, Tinca tinca",n01440764
1,n01440764_10027,,"tench, Tinca tinca",n01440764
2,n01440764_10029,,"tench, Tinca tinca",n01440764
3,n01440764_10040,,"tench, Tinca tinca",n01440764
4,n01440764_10042,,"tench, Tinca tinca",n01440764
...,...,...,...,...
1331162,ILSVRC2012_val_00005961,,fountain,n03388043
1331163,ILSVRC2012_val_00008801,,"confectionery, confectionary, candy store",n03089624
1331164,ILSVRC2012_val_00008176,,"ostrich, Struthio camelus",n01518878
1331165,ILSVRC2012_val_00004764,,"paddlewheel, paddle wheel",n03874293


In [48]:
from dcbench.tasks.slice_discovery import confusion_sdm, domino_sdm

In [49]:
solution = confusion_sdm(problem)

In [50]:
problem.evaluate(solution)

  _warn_prf(average, modifier, msg_start, len(result))


{'precision_at_10': 0.0, 'precision_at_25': 0.0, 'auroc': 0.24456890699253225}

In [51]:
solution = domino_sdm(problem)

 34%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                                                                                                                                                                                                                  | 34/100 [00:01<00:02, 25.03it/s]


In [52]:
problem.evaluate(solution)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'precision_at_10': 0.0, 'precision_at_25': 0.0, 'auroc': 0.29141208418194164}

In [33]:
import meerkat as mk
dp = mk.merge(solution["pred_slices"], problem["base_dataset"], on="id")
dp.lz[(-dp["pred_slices"][:,2]).argsort()[:10]]

Unnamed: 0,id (PandasSeriesColumn),pred_slices (NumpyArrayColumn),index (PandasSeriesColumn),image (ImageColumn),name (PandasSeriesColumn),synset (PandasSeriesColumn)
0,n02782093_3990,"np.ndarray(shape=(5,))",4894,,balloon,n02782093
1,n04606251_35359,"np.ndarray(shape=(5,))",188,,wreck,n04606251
2,n04606251_4133,"np.ndarray(shape=(5,))",1432,,wreck,n04606251
3,n02797295_17818,"np.ndarray(shape=(5,))",5062,,"barrow, garden cart, lawn cart, wheelbarrow",n02797295
4,n04252225_11836,"np.ndarray(shape=(5,))",180,,"snowplow, snowplough",n04252225
5,n02951358_2013,"np.ndarray(shape=(5,))",5252,,canoe,n02951358
6,n02797295_7023,"np.ndarray(shape=(5,))",1807,,"barrow, garden cart, lawn cart, wheelbarrow",n02797295
7,n04037443_243,"np.ndarray(shape=(5,))",5634,,"racer, race car, racing car",n04037443
8,n03599486_18298,"np.ndarray(shape=(5,))",5418,,"jinrikisha, ricksha, rickshaw",n03599486
9,n03444034_9491,"np.ndarray(shape=(5,))",575,,go-kart,n03444034


In [37]:
problem["base_dataset"]

Unnamed: 0,id (PandasSeriesColumn),target (NumpyArrayColumn),probs (ClassificationOutputColumn),split (PandasSeriesColumn)
0,n03788365_14724,0.0,torch.Tensor(shape=torch.Size([2])),test
1,n02692877_45880,1.0,torch.Tensor(shape=torch.Size([2])),valid
2,n02814533_96520,1.0,torch.Tensor(shape=torch.Size([2])),test
3,n03977966_37600,1.0,torch.Tensor(shape=torch.Size([2])),test
4,n03977966_33724,1.0,torch.Tensor(shape=torch.Size([2])),test
...,...,...,...,...
9050,n01828970_2030,0.0,torch.Tensor(shape=torch.Size([2])),test
9051,n02727426_15975,0.0,torch.Tensor(shape=torch.Size([2])),valid
9052,n07760859_9947,0.0,torch.Tensor(shape=torch.Size([2])),test
9053,n04552348_15078,1.0,torch.Tensor(shape=torch.Size([2])),valid
