# Image Analysis for datasets

This example notebook shows how to use datasetinsights to do image analysis on datasets

## Variance of Laplacian

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import os
from pycocotools.coco import COCO
from datasetinsights.stats.image_analysis import (
    get_bbox_var_laplacian,
    get_bbox_fg_bg_var_laplacian,
    laplacian_img,
    get_final_mask,
    get_seg_fg_bg_var_laplacian
)

In [None]:
annotation_file_path = "<file-path>"
img_dir_path = "<img-dir-path>"

In [None]:
coco = COCO(annotation_file=annotation_file_path)
img_ids = coco.getImgIds(catIds=[1])

### Calcluating variance according to bbox

In [None]:
bg_vars, fg_vars = [], []

In [None]:
for img_id in tqdm(img_ids[:10000]):
    img_meta_data = coco.loadImgs(ids=[img_id])
    file_name = img_meta_data[0]['file_name']

    img_path = os.path.join(img_dir_path, file_name)
    laplacian = laplacian_img(img_path)

    ann_ids = coco.getAnnIds(imgIds=img_id)
    annotations = coco.loadAnns(ann_ids)
    bbox_var_lap, img_var_laplacian = get_fg_bg_var_laplacian(laplacian, annotations)

    bg_vars.append(img_var_laplacian)
    fg_vars.extend(bbox_var_lap)



In [None]:
sns.distplot(bg_vars, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Background")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

In [None]:
sns.distplot(fg_vars, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Foreground")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

### Calcluating variance according to segmentation

In [None]:
seg_bg, seg_fg = [], []

In [None]:
for img_id in tqdm(img_ids[:10000]):
    img_meta_data = coco.loadImgs(ids=[img_id])
    file_name = img_meta_data[0]['file_name']

    img_path = os.path.join(img_dir_path, file_name)
    laplacian = laplacian_img(img_path)

    ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd = None)
    annotations = coco.loadAnns(ann_ids, )
    if not annotations:
        continue
        
    masks = list(map(coco.annToMask, annotations))
    final_mask = get_final_mask(masks)
    
    fg_var_lap, bg_var_lap = get_seg_fg_bg_var_laplacian(laplacian=laplacian, final_mask=final_mask)
    
    seg_fg.append(fg_var_lap)
    seg_bg.append(bg_var_lap)
        
    

In [None]:
sns.distplot(seg_bg, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Background")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

In [None]:
sns.distplot(seg_fg, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Foreground")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

## Spectral Analysis

In [None]:
from datasetinsights.stats.image_analysis import get_average_psd_1d

In [None]:
img_dir_path = "/Users/saurav.dhakad/Downloads/coco_download_img"

In [None]:
avg_psd_1d, std_psd_1d = get_average_psd_1d(img_dir=img_dir_path, img_type="jpg")

In [None]:
fig, ax = plt.subplots(dpi=300)
ax.plot(avg_psd_1d, color="red", label="PSD")
ax.set_xscale('log')
ax.set_yscale('log')
plt.xlim([1, None])