# Image Analysis for datasets

This example notebook shows how to use datasetinsights to do image analysis on datasets. Please change the code as per your visulization requirements.

## Variance of Laplacian

In [15]:
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import os
from pycocotools.coco import COCO
from datasetinsights.stats.image_analysis import (
    get_bbox_var_laplacian,
    get_bbox_fg_bg_var_laplacian,
    laplacian_img,
    get_final_mask,
    get_seg_fg_bg_var_laplacian
)

In [None]:
annotation_file_path = "<file-path>"
img_dir_path = "<img-dir-path>"

In [None]:
coco = COCO(annotation_file=annotation_file_path)
img_ids = coco.getImgIds(catIds=[1])

### Calcluating variance according to bbox

In [None]:
def get_bg_fg_vars(img_ids):
    bg_vars, fg_vars, full_img_var = [], [], []
    for img_id in tqdm(img_ids):
        img_meta_data = coco.loadImgs(ids=[img_id])
        file_name = img_meta_data[0]['file_name']

        img_path = os.path.join(img_dir_path, file_name)
        laplacian = laplacian_img(img_path)
        full_img_var.append(laplacian)

        ann_ids = coco.getAnnIds(imgIds=img_id)
        annotations = coco.loadAnns(ann_ids)
        bbox_var_lap, img_var_laplacian = get_bbox_fg_bg_var_laplacian(laplacian, annotations)

        bg_vars.append(img_var_laplacian)
        fg_vars.extend(bbox_var_lap)
        
        return bg_vars, fg_vars, full_img_var



In [None]:
bg_vars, fg_vars, full_img_var = get_bg_fg_vars(img_ids)
#Check for NANs or 0s, if present remove them

#### Box Plot Code

In [None]:
box_plot_data=[bg_vars_a, bg_vars_b]
dataset_name=['PSP-A','PSP-B']

fig, ax = plt.subplots(dpi=100, figsize=(18,10))

box = ax.boxplot(box_plot_data,vert=0,patch_artist=True,labels=dataset_name, 
          showmeans=True, meanline=True, showfliers=False,)
 
colors = ['red', 'blue',]
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.55)

for median in box['medians']:
    median.set_color('black')
for i, means in enumerate(box['means']):
    means.set_color('black')
    x, y = means.get_xydata()[1]
    mean = np.array(box_plot_data[i]).mean()
    text = ' μ={:.2f}'.format(mean)
    ax.annotate(text, xy=(x, y+0.05), fontsize=15)
    

plt.plot([], [], '--', linewidth=1, color='black', label='Mean')
plt.plot([], [], '-', linewidth=1, color='black', label='Median')

plt.legend(fontsize=15)


plt.xticks(fontsize=15 )
plt.yticks(fontsize=15 )
plt.xscale('log')
plt.minorticks_off()
plt.xlabel("Var of Laplacian of background in log scale", fontsize=18)
plt.ylabel("Dataset", fontsize=18)
plt.show()

In [None]:
sns.distplot(bg_vars, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Background")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

In [None]:
sns.distplot(fg_vars, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Foreground")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

### Calcluating variance according to segmentation

In [None]:
seg_bg, seg_fg = [], []

In [None]:
for img_id in tqdm(img_ids[:10000]):
    img_meta_data = coco.loadImgs(ids=[img_id])
    file_name = img_meta_data[0]['file_name']

    img_path = os.path.join(img_dir_path, file_name)
    laplacian = laplacian_img(img_path)

    ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd = None)
    annotations = coco.loadAnns(ann_ids, )
    if not annotations:
        continue
        
    masks = list(map(coco.annToMask, annotations))
    final_mask = get_final_mask(masks)
    
    fg_var_lap, bg_var_lap = get_seg_fg_bg_var_laplacian(laplacian=laplacian, final_mask=final_mask)
    
    seg_fg.append(fg_var_lap)
    seg_bg.append(bg_var_lap)
        
    

In [None]:
sns.distplot(seg_bg, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Background")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

In [None]:
sns.distplot(seg_fg, hist = True, kde = False,
             kde_kws = {'linewidth': 3},
             label = "Foreground")
plt.legend(prop={'size': 16}, title = '')
plt.xlabel('Var of Laplacian')
plt.ylabel('Counts')
plt.show()

## Spectral Analysis

In [None]:
from datasetinsights.stats.image_analysis import get_average_psd_1d

In [None]:
img_dir_path = "<img-dir-path>"

In [None]:
avg_psd_1d, std_psd_1d = get_average_psd_1d(img_dir=img_dir_path, img_type="jpg")

In [None]:
fig, ax = plt.subplots(dpi=300)
ax.plot(avg_psd_1d_a, color="red", label="PSD-A")
ax.plot(avg_psd_1d_b, color="blue", label="PSD-B")
ax.set_xscale('log')
ax.set_yscale('log')
plt.xlim([1, None])

## Wavelet Transform

In [None]:
from datasetinsights.stats.image_analysis import get_wt_coeffs_var

In [None]:
img_dir_path = "<img-dir-path>"

In [None]:
horizontal_coeff, vertical_coeff, diagonal_coeff = get_wt_coeffs_var(img_dir_path)
#Check for NANs or 0s, if present remove them

In [None]:
plt.hist(horizontal_coeff, bins=np.logspace(np.log10(min(horizontal_coeff)), np.log10(max(horizontal_coeff))), label='', alpha=0.5)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Var of cH in Log Scale")
plt.ylabel("Count in Log Scale")
plt.legend(loc='best')
plt.show()

### Line Plot

In [None]:
hist_a = plt.hist(horizontal_coeff_a, bins=np.logspace(np.log10(max([min(horizontal_coeff_a), np.finfo(float).eps])), np.log10(max(horizontal_coeff_a))), label='PSP-A', alpha=0.5)
hist_b = plt.hist(horizontal_coeff_b, bins=np.logspace(np.log10(max([min(horizontal_coeff_b), np.finfo(float).eps])), np.log10(max(horizontal_coeff_b))), label='PSP-B', alpha=0.5)


In [None]:
fig, ax = plt.subplots(dpi=100, figsize=(16,9))
ax.plot(hist_a[1][1:],hist_a[0], linestyle='-',  label="PSP-A", linewidth=3, color="red")
ax.plot(hist_b[1][1:],hist_b[0], linestyle='-',  label="PSP-B", linewidth=3, color="blue")

ax.set_xscale('log')
ax.minorticks_off()

plt.legend(loc="upper left", fontsize=28)
plt.xlabel("Var of cH in Log Scale", fontsize=30)
plt.ylabel("Count", fontsize=30)
plt.xticks(fontsize=25 )
plt.yticks(fontsize=25 )

#plt.xlim(1/100000, 100000)
plt.setp(ax.spines.values(), linewidth=2.5)

plt.show()