After creating a visualization of a dataset, get a few samples from each cluster and display them.

Focus on:
client.get_job_samples()
client.get_thumbnail_images()

In [None]:
import matplotlib.pyplot as plt

from akride import AkriDEClient, JobContext

# Get the API_KEY from Data Explorer → Utilities → Get CLI/SDK config:
sdk_config_dict = {
  "saas_endpoint": "https://app.akridata.ai",
  "api_key": "akridata-apikey",
  "mode": "saas"
}
# Define the Data Explorer client side:
client = AkriDEClient(sdk_config_dict=sdk_config_dict)

In [None]:
# Utility:
def display_images(images: list,
                   n_rows: int,
                   n_cols: int,
                   figure_w: int,
                   figure_h: int,
                   save_file: str = None):
    """Display a grid of n_rows x n_cols of images. Show the images or save if path provided"""
    if len(images) > (n_rows * n_cols):
        raise Exception(f"Provided {len(images)} images. Too much for a fig with {n_rows * n_cols} subplots")
    fig = plt.figure(figsize=(figure_w, figure_h))
    for i, img in enumerate(images):
        fig.add_subplot(n_rows, n_cols, i + 1)
        plt.axis('off')
        plt.tight_layout()
        plt.imshow(img)

    if save_file is None:
        plt.show()
    else:
        plt.savefig(save_file)

In [None]:
# Retrieve the job by name:
job = client.get_job_by_name("data-explore".upper())
print(f"Got job - {job.get_name()}")

# Get number of clusters to sample from:
num_clusters = job.info.to_dict()["tunables_default"]["max_clusters"]
print(f"Data has {num_clusters} clusters")

In [None]:
# Number of samples to get from each cluster:
max_count = 5

for cluster_id in range(1, num_clusters + 1):
    # Set the cluster ID:
    spec = {"cluster_id": cluster_id, "max_count": max_count}
    # Get thumbnails of the chosen samples:
    samples = client.get_job_samples(job, JobContext.CLUSTER_RETRIEVAL, spec)  # type: ignore
    thumbnails = client.get_thumbnail_images(samples)
    # display grid:
    print(f"Examples for cluster {cluster_id}")
    display_images(thumbnails, n_rows=2, n_cols=3, figure_w=5, figure_h=5, save_file=None)
    # display_images(thumbnails, n_rows=2, n_cols=3, figure_w=5, figure_h=5, save_file="./" + str(cluster_id) + ".jpg")

print("Provided examples for each cluster")