In [None]:
import fiftyone as fo
import fiftyone.utils.huggingface as fouh

# Load the dataset from Hugging Face if it's your first time using it

# dataset = fouh.load_from_hub(
# "Voxel51/Coursera_lecture_dataset_train", 
# dataset_name="lecture_dataset_train", 
# persistent=True)

In [None]:
#because I have the dataset saved locally, I will load it like so
cloned_dataset = fo.load_dataset("lecture_dataset_train_clone")

In [None]:
#  #clone the dataset to avoid modifying the original dataset
# cloned_dataset = dataset.clone(name="lecture_dataset_train_clone")

In [None]:
import cv2
import numpy as np
from scipy.ndimage import label
from scipy.spatial.distance import pdist

### Image complexity

We can use Canny edge detection to measure the ratio of edge pixels to total pixels

This metric can be useful because:

1. It provides a measure of the level of detail and intricacy in an image.

2. Higher complexity can indicate more challenging images for object detection.

3. It can help identify images that might require more processing power or sophisticated algorithms for accurate analysis.

4. Understanding image complexity can aid in balancing datasets and evaluating model performance across different complexity levels.


#### Limitations

- **Oversimplification:** Edge detection reduces an image to binary information (edge or non-edge), discarding valuable texture and color information that could be crucial for object detection.

- **Sensitivity to Noise:** Canny edge detection can be sensitive to image noise, potentially leading to inaccurate complexity assessments in noisy images.

- **Parameter Dependency:** The effectiveness of Canny edge detection heavily relies on the chosen threshold parameters (100 and 200 in this case), which may not be optimal for all images in a diverse dataset.

In [None]:
def calculate_image_complexity(dataset):
    """
    Calculate the complexity of images in a FiftyOne dataset using Canny edge detection and color information.

    Parameters:
    dataset (fiftyone.core.dataset.Dataset): FiftyOne dataset object.

    Returns:
    None. It just adds the field to the dataset
    """
    for sample in dataset.iter_samples():
        img = cv2.imread(sample.filepath)
        # Convert the image to float32
        img_float = img.astype(np.float32) / 255.0
        # Calculate the color variance for the image
        color_variance = np.var(img_float, axis=(0, 1)).sum()
        # Convert to grayscale for edge detection
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 100, 200)
        edge_complexity = np.sum(edges > 0) / (img.shape[0] * img.shape[1])
        # Combine edge complexity and color variance
        complexity = edge_complexity + color_variance
        sample["image_complexity_score"] = complexity
        sample.save()

In [None]:
calculate_image_complexity(cloned_dataset)

In [None]:
fo.launch_app(cloned_dataset)

### Visual clutter 

Calculates the variance of pixel intensities in the image.

This metric is useful because:

1. It measures the level of disorder or chaos in an image, which can impact object detection.

2. High visual clutter can make it more difficult to isolate and identify individual objects.

3. It provides insight into the visual complexity of scenes beyond just object count or density.

4. Understanding visual clutter can help in developing strategies to improve model performance on visually complex images.

#### Limitations

- **Oversimplification:** Reducing visual clutter to a single variance value may oversimplify the concept, missing important spatial relationships.

- **Grayscale Conversion:** Converting to grayscale loses color information, which can be a significant factor in visual clutter and object detection.

- **Global Measure:** The global variance doesn't capture local variations in clutter, which might be more relevant for object detection.

- **Insensitivity to Structure:** High variance doesn't necessarily correlate with difficulty in object detection. A highly structured image could have high variance but be relatively easy for object detection.

In [None]:
def calculate_visual_clutter(dataset):
    """
    Calculate the visual clutter of images in a FiftyOne dataset using pixel intensity variance and color variance.

    Parameters:
    dataset (fiftyone.core.dataset.Dataset): FiftyOne dataset object.

    Returns:
    None. It just adds the field to the dataset.
    """
    for sample in dataset.iter_samples():
        img = cv2.imread(sample.filepath)
        
        # Calculate grayscale variance
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_clutter = np.var(gray)
        
        # Calculate color variance
        img_float = img.astype(np.float32) / 255.0  # Convert to float32
        color_variance = np.var(img_float, axis=(0, 1)).sum()
        
        # Combine both measures
        clutter = gray_clutter + color_variance
        sample["visual_clutter_score"] = clutter
        sample.save()

In [None]:
calculate_visual_clutter(cloned_dataset)

In [None]:
fo.launch_app(cloned_dataset)

### Object Clutter

An object vlutter score will identify number of detections per image. This is a simple and useful metric. It provides a quick measure of how busy or crowded an image is in terms of objects.

This metric is useful because:

1. It provides a simple measure of scene complexity in terms of object count.

2. Higher clutter scores can indicate more challenging images for object detection.

3. It helps identify images that may require more processing time or have higher chances of false positives/negatives.

4. Understanding clutter can aid in balancing datasets and evaluating model performance across different complexity levels.

**Pros:**

- Easy to calculate and interpret

- Gives a clear indication of image complexity

**Cons:**

- Doesn't account for object size or distribution

- May not distinguish between genuinely cluttered scenes and scenes with many small objects

**Usefulness:** High, especially as a basic measure of image complexity.

In [None]:
from fiftyone import ViewField as F

detection_counts = cloned_dataset.values(F("ground_truth.detections").length())

cloned_dataset.set_values("object_clutter_score", detection_counts)

In [None]:
fo.launch_app(cloned_dataset)

### Object diversity

This will measure the number of distinct classes per image. This is an excellent metric for measuring the semantic diversity of an image.

This metric is useful because:

1. It quantifies the variety of object types present in an image.

2. Higher diversity can indicate more complex scenes that require broader object recognition capabilities.

3. It helps in assessing the range of objects a model needs to handle within a single image.

4. Understanding instance diversity can guide dataset curation to ensure a wide range of object combinations are represented.

**Pros:**

- Directly measures the variety of object types in an image

- Easy to calculate and interpret

**Cons:**

- Doesn't account for the number of instances of each class

- Treats all classes equally, regardless of their visual or semantic similarity

**Usefulness:** High, particularly for understanding the range of objects a model needs to handle.

---
Note: Below we're using `$` in the ViewField. The "$" in FiftyOne is like an "absolute path" in file systems.

- Without "$": Relative path. Depends on where you are in the data structure.

- With "$": Absolute path. Always starts from the root of each sample.

Use "$" when:

1. In complex queries where context might be ambiguous
2. Inside operations like map() or reduce()
3. You want to be explicitly clear you're referring to a top-level field

You often don't need "$" for simple, top-level queries.

In this case, `F("$ground_truth.detections.label")` ensures you're accessing the correct data structure (the top-level `ground_truth` field), and prevents FiftyOne from mistakenly operating on a string instead of an array.

In [None]:
from fiftyone import ViewField as F

object_diversity_expression = F("$ground_truth.detections.label").unique().length()

object_diversity_scores = cloned_dataset.values(object_diversity_expression)

cloned_dataset.set_values("object_diversity_score", object_diversity_scores)

In [None]:
cloned_dataset.first()

In [None]:
fo.launch_app(cloned_dataset)

###  Diversity Ratio

This metric considers the number of detections and number of classes per image. This is a more nuanced approach to measuring diversity that takes into account both the number of objects and the variety of classes.

This metric is useful because:

1. It balances the number of objects with the variety of object types, providing a more nuanced view of image complexity.

2. It can distinguish between images with many objects of few classes and those with fewer objects but more diverse classes.

3. Higher ratios might indicate images that could present challenges to an object detection system.

4. This metric can help in creating balanced datasets that challenge models in different ways.

I'm using `log10(object_clutter) + 1` in the denominator to put more emphasis on the number of unique classes relative to the total number of detections. This aim is to highlight images with a diverse range of object classes even if they don't have an extremely high number of total detections.

**Pros:**

- Combines quantity and variety of objects

- Can distinguish between images with many objects of few classes and those with fewer objects but more classes

**Cons:**

- May require careful design to balance the influence of object count and class count

- Interpretation might be less intuitive than simpler metrics

**Usefulness:** High, as it provides a more comprehensive view of image complexity.

In [None]:
from fiftyone import ViewField as F

diversity_expression = F("object_diversity_score") / (F("object_clutter_score").log10() + 1)

diversity_ratios = cloned_dataset.values(diversity_expression)

cloned_dataset.set_values("diversity_ratio", diversity_ratios)

In [None]:
fo.launch_app(cloned_dataset)

### Objectness score 

This metric measures the percentage of pixels that belong to classes across the whole image. Basically what percentage of the pixels contains a detection.

1. It provides insight into how much of the image is occupied by objects of interest.

2. Lower scores might indicate images with large background areas or small objects, which can be challenging for detection.

3. It can help identify images where objects occupy a significant portion of the scene, potentially affecting detection strategies.

4. Understanding objectness can aid in analyzing model performance relative to object size and prominence in the image.

This is a valuable metric for understanding how much of the image is occupied by objects of interest.

**Pros:**
- Provides insight into the density of annotated objects
- Can help identify images with large background areas

**Cons:**
- Doesn't account for the number or diversity of objects
- May be biased towards images with large objects

**Usefulness:** High, especially when combined with other metrics.

In [None]:
from fiftyone import ViewField as F

rel_bbox_area = F("bounding_box")[2] * F("bounding_box")[3]

im_width, im_height = F("$metadata.width"), F("$metadata.height")

abs_area = rel_bbox_area * im_width * im_height

cloned_dataset.set_field("ground_truth.detections.relative_bbox_area", rel_bbox_area).save()

cloned_dataset.set_field("ground_truth.detections.absolute_bbox_area", abs_area).save()

In [None]:
objectness_scores = cloned_dataset.values(F("$ground_truth.detections.relative_bbox_area").sum())

cloned_dataset.set_values("objectness_score", objectness_scores)

In [None]:
fo.launch_app(cloned_dataset)

### Spatial Distribution Score
This metric is useful because:
1. It quantifies how spread out or clustered objects are within an image.
2. Images with more evenly distributed objects might present different challenges than those with clustered objects.
3. It can help identify images where objects are tightly grouped, which might be challenging for object separation.
4. Understanding spatial distribution can aid in developing models that perform well across various object arrangements.


In [None]:
bounding_box_centroid = (
    (F("bounding_box")[0] + F("bounding_box")[2]/2),
    (F("bounding_box")[1] + F("bounding_box")[3]/2)
    )

cloned_dataset.set_field("ground_truth.detections.bbox_centroid", bounding_box_centroid).save()

centroids = cloned_dataset.values("ground_truth.detections.bbox_centroid")

averaege_pair_wise_distances = [np.mean(pdist(c)) if len(c) > 0 else 0 for c in centroids]

cloned_dataset.set_values("spatial_distribution_score", averaege_pair_wise_distances)

In [None]:
fo.launch_app(cloned_dataset)

# Using zero shot models to get scene info

In [None]:
import fiftyone.zoo as foz

# Make zero-shot predictions with custom classes

model = foz.load_zoo_model(
    "clip-vit-base32-torch",
    text_prompt="",
    classes=[],
)

cloned_dataset.apply_model(model, label_field="scene")

Required reading:

https://docs.voxel51.com/recipes/creating_views.html