# Dataset analysis

In [None]:
from segmentizer import Segmentizer
from segmentizer.data_loader import LASIESTADataLoader
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
from tqdm import tqdm_notebook, tnrange
from IPython.display import clear_output
import seaborn as sns
import pandas as pd
import numpy as np
from multiprocessing import Pool

In [4]:
def load_data(db_name):
    
    total_red = []
    total_green = []
    total_blue = []
    
    background_red = []
    background_green = []
    background_blue = []
    
    foreground_red = []
    foreground_green = []
    foreground_blue = []
    
    stats_total = dict()
    stats_background = dict()
    
    data_loader = LASIESTADataLoader('../datasets/'+db_name, '../datasets/'+db_name+'-GT', db_name)
    
    print("Start loading dataset " + db_name)
    
    for original_frame, label_frame in tqdm_notebook(data_loader, total=data_loader.get_nr_of_frames()):
        for i in range(original_frame.shape[0]):
            for j in range(original_frame.shape[1]):
                total_red.append(original_frame[i,j,0])
                total_green.append(original_frame[i,j,1])
                total_blue.append(original_frame[i,j,2])
          
                # Is background pixel
                if np.array_equal(label_frame[i,j], [0,0,0]):
                    background_red.append(original_frame[i,j,0])
                    background_green.append(original_frame[i,j,1])
                    background_blue.append(original_frame[i,j,2])  
                    
                # Is foreground pixel
                if np.array_equal(label_frame[i,j], [0,0,0]) is False and np.array_equal(label_frame[i,j], [128,128,128]) is False:
                    foreground_red.append(original_frame[i,j,0])
                    foreground_green.append(original_frame[i,j,1])
                    foreground_blue.append(original_frame[i,j,2])    
                
    stats['total_mean_red'] = np.mean(total_red)
    stats['total_mean_green'] = np.mean(total_green)
    stats['total_mean_blue'] = np.mean(total_blue)
    
    stats['total_std_red'] = np.std(total_red)
    stats['total_std_green'] = np.std(total_green)
    stats['total_std_blue'] = np.std(total_blue)
    
    stats['total_min_red'] = np.min(total_red)
    stats['total_min_green'] = np.min(total_green)
    stats['total_min_blue'] = np.min(total_blue)
    
    stats['total_max_red'] = np.max(total_red)
    stats['total_max_green'] = np.max(total_green)
    stats['total_max_blue'] = np.max(total_blue)
    
    stats['total_percentile_25_red'] = np.percentile(total_red, 25)
    stats['total_percentile_25_green'] = np.percentile(total_green, 25)
    stats['total_percentile_25_blue'] = np.percentile(total_blue, 25)
    
    stats['total_percentile_50_red'] = np.percentile(total_red, 50)
    stats['total_percentile_50_green'] = np.percentile(total_green, 50)
    stats['total_percentile_50_blue'] = np.percentile(total_blue, 50)
    
    stats['total_percentile_75_red'] = np.percentile(total_red, 75)
    stats['total_percentile_75_green'] = np.percentile(total_green, 75)
    stats['total_percentile_75_blue'] = np.percentile(total_blue, 75)
    
    stats['background_mean_red'] = np.mean(background_red)
    stats['background_mean_green'] = np.mean(background_green)
    stats['background_mean_blue'] = np.mean(background_blue)
    
    stats['background_std_red'] = np.std(background_red)
    stats['background_std_green'] = np.std(background_green)
    stats['background_std_blue'] = np.std(background_blue)
    
    stats['background_min_red'] = np.min(background_red)
    stats['background_min_green'] = np.min(background_green)
    stats['background_min_blue'] = np.min(background_blue)
    
    stats['background_max_red'] = np.max(background_red)
    stats['background_max_green'] = np.max(background_green)
    stats['background_max_blue'] = np.max(background_blue)
    
    stats['background_percentile_25_red'] = np.percentile(background_red, 25)
    stats['background_percentile_25_green'] = np.percentile(background_green, 25)
    stats['background_percentile_25_blue'] = np.percentile(background_blue, 25)
    
    stats['background_percentile_50_red'] = np.percentile(background_red, 50)
    stats['background_percentile_50_green'] = np.percentile(background_green, 50)
    stats['background_percentile_50_blue'] = np.percentile(background_blue, 50)
    
    stats['background_percentile_75_red'] = np.percentile(background_red, 75)
    stats['background_percentile_75_green'] = np.percentile(background_green, 75)
    stats['background_percentile_75_blue'] = np.percentile(background_blue, 75)
    
    stats['foreground_mean_red'] = np.mean(foreground_red)
    stats['foreground_mean_green'] = np.mean(foreground_green)
    stats['foreground_mean_blue'] = np.mean(foreground_blue)
    
    stats['foreground_std_red'] = np.std(foreground_red)
    stats['foreground_std_green'] = np.std(foreground_green)
    stats['foreground_std_blue'] = np.std(foreground_blue)
    
    stats['foreground_min_red'] = np.min(foreground_red)
    stats['foreground_min_green'] = np.min(foreground_green)
    stats['foreground_min_blue'] = np.min(foreground_blue)
    
    stats['foreground_max_red'] = np.max(foreground_red)
    stats['foreground_max_green'] = np.max(foreground_green)
    stats['foreground_max_blue'] = np.max(foreground_blue)
    
    stats['foreground_percentile_25_red'] = np.percentile(foreground_red, 25)
    stats['foreground_percentile_25_green'] = np.percentile(foreground_green, 25)
    stats['foreground_percentile_25_blue'] = np.percentile(foreground_blue, 25)
    
    stats['foreground_percentile_50_red'] = np.percentile(foreground_red, 50)
    stats['foreground_percentile_50_green'] = np.percentile(foreground_green, 50)
    stats['foreground_percentile_50_blue'] = np.percentile(foreground_blue, 50)
    
    stats['foreground_percentile_75_red'] = np.percentile(foreground_red, 75)
    stats['foreground_percentile_75_green'] = np.percentile(foreground_green, 75)
    stats['foreground_percentile_75_blue'] = np.percentile(foreground_blue, 75)
    
    print("Dataset " + db_name + ' loaded successfully')
    
    return stats
        

In [5]:
datasets = [
    'I_BS_01',
    'I_CA_01',
    'I_IL_01',
    'I_MB_01',
    'I_MC_01',
    'I_OC_01',
    'I_SI_01',
    'I_SM_01'
]

stats = {
    
}

for dataset in datasets:
    stats[dataset] = load_data(dataset)

Start loading dataset I_BS_01


Dataset I_BS_01 loaded successfully
Start loading dataset I_CA_01


Dataset I_CA_01 loaded successfully
Start loading dataset I_IL_01


Dataset I_IL_01 loaded successfully
Start loading dataset I_MB_01


Dataset I_MB_01 loaded successfully
Start loading dataset I_MC_01


Dataset I_MC_01 loaded successfully
Start loading dataset I_OC_01


Dataset I_OC_01 loaded successfully
Start loading dataset I_SI_01


Dataset I_SI_01 loaded successfully
Start loading dataset I_SM_01


Dataset I_SM_01 loaded successfully


In [6]:
for dataset in datasets:
    print('########## ' + dataset + ' ########## \n')
    print('Mean Total (Red): ' + str(stats[dataset]['total_mean_red']))
    print('Std Total (Red): ' + str(stats[dataset]['total_std_red']))
    print('Min Total (Red): ' + str(stats[dataset]['total_min_red']))
    print('Max Total (Red): ' + str(stats[dataset]['total_max_red']))
    print('25% Total (Red): ' + str(stats[dataset]['total_percentile_25_red']))
    print('50% Total (Red): ' + str(stats[dataset]['total_percentile_50_red']))
    print('75% Total (Red): ' + str(stats[dataset]['total_percentile_75_red']))
    print()
    print('Mean Background (Red): ' + str(stats[dataset]['background_mean_red']))
    print('Std Background (Red): ' + str(stats[dataset]['background_std_red']))
    print('Min Background (Red): ' + str(stats[dataset]['background_min_red']))
    print('Max Background (Red): ' + str(stats[dataset]['background_max_red']))
    print('25% Background (Red): ' + str(stats[dataset]['background_percentile_25_red']))
    print('50% Background (Red): ' + str(stats[dataset]['background_percentile_50_red']))
    print('75% Background (Red): ' + str(stats[dataset]['background_percentile_75_red']))
    print()
    print('Mean Foreground (Red): ' + str(stats[dataset]['foreground_mean_red']))
    print('Std Foreground (Red): ' + str(stats[dataset]['foreground_std_red']))
    print('Min Foreground (Red): ' + str(stats[dataset]['foreground_min_red']))
    print('Max Foreground (Red): ' + str(stats[dataset]['foreground_max_red']))
    print('25% Foreground (Red): ' + str(stats[dataset]['foreground_percentile_25_red']))
    print('50% Foreground (Red): ' + str(stats[dataset]['foreground_percentile_50_red']))
    print('75% Foreground (Red): ' + str(stats[dataset]['foreground_percentile_75_red']))
    print()
    print('Mean Total (Green): ' + str(stats[dataset]['total_mean_green']))
    print('Std Total (Green): ' + str(stats[dataset]['total_std_green']))
    print('Min Total (Green): ' + str(stats[dataset]['total_min_green']))
    print('Max Total (Green): ' + str(stats[dataset]['total_max_green']))
    print('25% Total (Green): ' + str(stats[dataset]['total_percentile_25_green']))
    print('50% Total (Green): ' + str(stats[dataset]['total_percentile_50_green']))
    print('75% Total (Green): ' + str(stats[dataset]['total_percentile_75_green']))
    print()
    print('Mean Background (Green): ' + str(stats[dataset]['background_mean_green']))
    print('Std Background (Green): ' + str(stats[dataset]['background_std_green']))
    print('Min Background (Green): ' + str(stats[dataset]['background_min_green']))
    print('Max Background (Green): ' + str(stats[dataset]['background_max_green']))
    print('25% Background (Green): ' + str(stats[dataset]['background_percentile_25_green']))
    print('50% Background (Green): ' + str(stats[dataset]['background_percentile_50_green']))
    print('75% Background (Green): ' + str(stats[dataset]['background_percentile_75_green']))
    print()
    print('Mean Foreground (Blue): ' + str(stats[dataset]['foreground_mean_blue']))
    print('Std Foreground (Blue): ' + str(stats[dataset]['foreground_std_blue']))
    print('Min Foreground (Blue): ' + str(stats[dataset]['foreground_min_blue']))
    print('Max Foreground (Blue): ' + str(stats[dataset]['foreground_max_blue']))
    print('25% Foreground (Blue): ' + str(stats[dataset]['foreground_percentile_25_blue']))
    print('50% Foreground (Blue): ' + str(stats[dataset]['foreground_percentile_50_blue']))
    print('75% Foreground (Blue): ' + str(stats[dataset]['foreground_percentile_75_blue']))


########## I_BS_01 ########## 

Mean Total (Red): 98.4174992437
Std Total (Red): 65.3772621032
Min Total (Red): 6
Max Total (Red): 255
25% Total (Red): 35.0
50% Total (Red): 73.0
75% Total (Red): 165.0

Mean Background (Red): 99.6040885687
Std Background (Red): 65.8130519577
Min Background (Red): 6
Max Background (Red): 255
25% Background (Red): 35.0
50% Background (Red): 76.0
75% Background (Red): 166.0

Mean Foreground (Red): 83.1284600709
Std Foreground (Red): 58.7478999882
Min Foreground (Red): 6
Max Foreground (Red): 255
25% Foreground (Red): 34.0
50% Foreground (Red): 60.0
75% Foreground (Red): 134.0

Mean Total (Green): 97.3340688131
Std Total (Green): 63.165817767
Min Total (Green): 5
Max Total (Green): 255
25% Total (Green): 37.0
50% Total (Green): 74.0
75% Total (Green): 160.0

Mean Background (Green): 98.6301663595
Std Background (Green): 63.6847319651
Min Background (Green): 7
Max Background (Green): 255
25% Background (Green): 37.0
50% Background (Green): 77.0
75% Backgrou

In [None]:
def pixel_analysis(db_name):
    
    red_frame_statistics = [[[] for _ in range(353)]  for _ in range(288)] 
    green_frame_statistics = [[[] for _ in range(353)]  for _ in range(288)] 
    blue_frame_statistics = [[[] for _ in range(353)]  for _ in range(288)] 
    
    data_loader = LASIESTADataLoader('../datasets/'+db_name, '../datasets/'+db_name+'-GT', db_name)
    
    print("Start loading dataset " + db_name)
    
    data_loader.set_next_frame_idx(0)

    for original_frame in tqdm_notebook(data_loader, total=data_loader.get_nr_of_frames()):
        for i, row in enumerate(original_frame):
            for j, rgb in enumerate(row):
                red_frame_statistics[i][j].append(rgb[0])
                green_frame_statistics[i][j].append(rgb[1])
                blue_frame_statistics[i][j].append(rgb[2])
             
                
    print("Dataset " + db_name + ' loaded successfully')
    
    return stats
        