In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import glob
import sys
import cv2
import os

import altair as alt

In [2]:
data_dir = "../data/bpcis/"

In [3]:
image_dirs_phase = []
image_dirs_fluor = []
image_dirs_worm  = []

# Bacterial Phase Contrast
bact_phase_train_dirs = sorted(glob.glob(os.path.join(data_dir, "bact_phase_train/*_img.tif")))
bact_phase_test_dirs  = sorted(glob.glob(os.path.join(data_dir, "bact_phase_test/*_img.tif")))

# Bacterial Fluorescence
bact_fluor_train_dirs = sorted(glob.glob(os.path.join(data_dir, "bact_fluor_train/*_img.tif")))
bact_fluor_test_dirs  = sorted(glob.glob(os.path.join(data_dir, "bact_fluor_test/*_img.tif")))

# Worm
worm_train_dirs = sorted(glob.glob(os.path.join(data_dir, "worm_train/*_img.tif")))
worm_test_dirs  = sorted(glob.glob(os.path.join(data_dir, "worm_test/*_img.tif")))

image_dirs_phase.extend([
    *bact_phase_train_dirs, 
    *bact_phase_test_dirs,
])

image_dirs_fluor.extend([
    *bact_fluor_train_dirs, 
    *bact_fluor_test_dirs,
])

image_dirs_worm.extend([
    *worm_train_dirs, 
    *worm_test_dirs
])

In [4]:
shapes_phase = []
shapes_fluor = []
shapes_worm  = []

for image_dir in image_dirs_phase:
    image = cv2.imread(image_dir, cv2.IMREAD_GRAYSCALE)
    shapes_phase.append(np.array(image.shape))
    
for image_dir in image_dirs_fluor:
    image = cv2.imread(image_dir, cv2.IMREAD_GRAYSCALE)
    shapes_fluor.append(np.array(image.shape))
    
for image_dir in image_dirs_worm:
    image = cv2.imread(image_dir, cv2.IMREAD_GRAYSCALE)
    shapes_worm.append(np.array(image.shape))
    
shapes_phase = np.array(shapes_phase)
shapes_fluor = np.array(shapes_fluor)
shapes_worm  = np.array(shapes_worm)

In [5]:
print(
    shapes_phase.shape,
    shapes_fluor.shape,
    shapes_worm.shape
)

(397, 2) (218, 2) (120, 2)


In [6]:
data_phase = {"y": shapes_phase[:, 0], "x": shapes_phase[:, 1]}
data_fluor = {"y": shapes_fluor[:, 0], "x": shapes_fluor[:, 1]}
data_worm  = {"y": shapes_worm[:, 0], "x": shapes_worm[:, 1]}

source_phase = pd.DataFrame(data = data_phase)
source_fluor = pd.DataFrame(data = data_fluor)
source_worm  = pd.DataFrame(data = data_worm)

source_fluor

Unnamed: 0,y,x
0,515,516
1,515,516
2,515,516
3,511,516
4,515,516
...,...,...
213,350,350
214,345,348
215,350,350
216,350,347


In [7]:
chart_phase = (
    alt.Chart(
        source_phase,
        height = 250,
        width = 275,
    )
    .mark_circle(
        size = 250,
        stroke = "black", 
        strokeWidth = 1.0,
        color = "teal"
    )
    .encode(
        x = alt.X(
            "x", 
            axis = alt.Axis(titleFontSize = 15), 
            title = "Image Width (pixels)", 
            scale = alt.Scale(domain = [0, 2200])
        ),
        y = alt.X(
            "y", 
            axis = alt.Axis(titleFontSize = 15, titlePadding = 9),
            title = "Image Height (pixels)",
            scale = alt.Scale(domain = [0, 2200])
        ),
    )
    .properties(
        title = f"Bacterial Phase Contrast"
    )
)

chart_phase

In [8]:
chart_fluor= (
    alt.Chart(
        source_fluor,
        height = 250,
        width = 275,
    )
    .mark_circle(
        size = 250,
        stroke = "black", 
        strokeWidth = 1.0,
        color = "teal"
    )
    .encode(
        x = alt.X(
            "x", 
            #axis = alt.Axis(titleFontSize = 15), 
            axis = alt.Axis(titleFontSize = 15), 
            title = "Image Width (pixels)", 
            scale = alt.Scale(domain = [0, 2200])
        ),
        y = alt.X(
            "y", 
            axis = alt.Axis(titleFontSize = 15, titlePadding = 9),
            title = None,#"Image Height (pixels)",
            scale = alt.Scale(domain = [0, 2200])
        ),
    )
    .properties(
        title = f"Bacterial Fluorescence"
    )
)

chart_fluor

In [9]:
chart_worm = (
    alt.Chart(
        source_worm,
        height = 250,
        width = 275,
    )
    .mark_circle(
        size = 250,
        stroke = "black", 
        strokeWidth = 1.0,
        color = "teal"
    )
    .encode(
        x = alt.X(
            "x", 
            axis = alt.Axis(titleFontSize = 15), 
            title = "Image Width (pixels)", 
            scale = alt.Scale(domain = [0, 2200])
        ),
        y = alt.X(
            "y", 
            axis = alt.Axis(titleFontSize = 15, titlePadding = 9), 
            title = None,#"Image Height (pixels)",
            scale = alt.Scale(domain = [0, 2200]),
        ),
    )
    .properties(
        title = f"Worm"
    )
)

chart_worm 

In [10]:
chart = (
    alt.hconcat(
        chart_phase,
        chart_fluor,
        chart_worm, 
        padding = {"left": 25, "right": 25, "bottom": 25, "top": 25}
    )
    .configure_title(
        anchor = alt.TitleAnchor("middle"),
        fontSize = 16,
        font = "Lato",
        #fontWeight = 600
    ).configure_axis(
        labelFontSize = 13,
        labelFont = "Lato",
        titleFont = "Lato",
        #titleFontWeight = 600,
        titleFontSize = 16
    )
)

chart

In [11]:
image_count = {
    "Subset": [
        "Bacterial Phase", 
        "Bacterial Phase",  
        "Bacterial Fluor",   
        "Bacterial Fluor",    
        #"Bacterial Phase Contrast", 
        #"Bacterial Phase Contrast",  
        #"Bacterial Fluorescence",   
        #"Bacterial Fluorescence",    
        "Worm",                     
        "Worm"                     
    ],
    "Split": [
        "Train",
        "Test",
        "Train",
        "Test",
        "Train",
        "Test"
    ],
    "Count": [
        len(bact_phase_train_dirs),
        len(bact_phase_test_dirs),
        len(bact_fluor_train_dirs),
        len(bact_fluor_test_dirs),
        len(worm_train_dirs),
        len(worm_test_dirs)
    ]
}

In [12]:
count_source = pd.DataFrame(data = image_count)

In [13]:
bars = alt.Chart(count_source, width = alt.Step(100)).mark_bar().encode(
    x = alt.X("Subset", axis = alt.Axis(labelAngle = 0)),
    y = alt.Y("Count:Q", axis = alt.Axis(grid = False)),
    xOffset = "Split:O",
    color = alt.Color(
        "Split", 
        scale = alt.Scale(scheme = "category20"),
    )
).configure_view(
    stroke = None
).properties(
    title = "Image Count"
).configure_scale(
    bandPaddingInner = 0.2
)
    
bars

In [14]:
count_source

Unnamed: 0,Subset,Split,Count
0,Bacterial Phase,Train,249
1,Bacterial Phase,Test,148
2,Bacterial Fluor,Train,143
3,Bacterial Fluor,Test,75
4,Worm,Train,60
5,Worm,Test,60
