# Plotting and Analysis

The role of this notebook is to plot and analyze logs results of a run (or runs) of a simulator, given some fixed timing configuration.
These logs (bboxes.csv) are obtained by running a simulator on some experiments. The goal of these plots is to analyze worm's behavior,
and to analyze the systems error and how it's affected by different behaviors the worm exhibits.

It's important to note that for proper analysis, all the experiments that are analyzed by this notebook *at once* must have the same timing configuration (TimingConfig) parameters.

In [1]:
# fix imports
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import matplotlib.pyplot as plt
from wtracker.eval import *
from wtracker.sim.config import TimingConfig
from wtracker.utils.gui_utils import UserPrompt

### Timing configuration and log files selection

In [None]:
from pprint import pprint

base_path = "D:\\Guy_Gilad\\FinalEvaluations\\Exp0_config1_Optimal\\"

################################ User Input ################################

# path to the timing config file. 
# If None, a file dialog will open to select a file
time_config_path = base_path + "time_config.json"

# list containing paths to simulation log files.
# All of these simulations must have been run with the above timing config.
# If empty, a file dialog will open to select files.
log_files = base_path + "bboxes.csv"

data_save_path = base_path + "data.pkl"

############################################################################

timing_config = TimingConfig.load_json(time_config_path)

if len(log_files) == 0:
    log_files = UserPrompt.open_file(title="Select log files", filetypes=[("Log files", "*.csv")], multiple=True)


pprint(timing_config)
pprint(log_files)

### Plotting configuration

Notice that all of below plots accept `condition` as a parameter.
`condition` is expected to be a function of the following signature:

```python
def cond_func1(input_df: pd.DataFrame) -> pd.DataFrame:
    return (input_df["wrm_speed"] > 5) &  (input_df["wrm_speed"] <= 30)
```

In python, such functions can be also declared without an explicit name and declaration, using the following syntax:
(for more information read about lambda functions)

```python
cond_func1 = lambda input_df: (input_df["wrm_speed"] > 5) & (input_df["wrm_speed"] <= 30)
cond_func2 = lambda input_df: input_df["phase"] == "imaging"
```

##### Optionally, Calculate precise error

To calculate precise error of the system, run the following cell, otherwise skip it.
Note, that running this cell might take a while.

For each frame, the exact pixels in which worm's head is located are calculated. To this end, there is a need to access worm images which were extracted during the experiment initialization process.
Afterwards, the error is calculated as the proportion of worm pixels that are outside of the microscope view. 
Since to calculate this error there is a need to load images from the disk, the calculation is relatively slow.

In [None]:
import numpy as np
from wtracker.utils.frame_reader import FrameReader

# TODO: ADD DOCS FOR THIS SECTION

################################ User Input ################################

background_path = "data\\Exp2_GuyGilad_logs_yolo\\background.npy"

worm_folder_path = "D:\\Guy_Gilad\\Exp2_GuyGilad\\logs_yolo\\worms"

diff_thresh = 20

############################################################################

if background_path is None:
    background_path = UserPrompt.open_file(title="Select background images", file_types=[("Numpy files", "*.npy")])

if worm_folder_path is None:
    worm_folder_path = UserPrompt.open_directory(title="Select worm image folders")

print("Background Files: ", background_path)
print("Worm Image Folders: ", worm_folder_path)

background = np.load(background_path, allow_pickle=True)

worm_reader = FrameReader.create_from_directory(worm_folder_path)

##### Calibrate Threshold [Optional]

In [None]:
from wtracker.eval.vlc import StreamViewer
from wtracker.eval.error_calculator import ErrorCalculator
from wtracker.utils.frame_reader import FrameReader
import pandas as pd
import numpy as np

viewer = StreamViewer(window_name="Threshold Calibration")

In [None]:

################################ User Input ################################
threshold = 20
exp_number = 0 # the number of the experiment in the list
delay = 0
############################################################################
def show_sementation(wrm_view:np.ndarray, wrm_mask:np.ndarray) -> None:
    wrm_view[~wrm_mask] = 0
    viewer.imshow(wrm_view)
    viewer.waitKey(delay)


ErrorCalculator.probe_hook = show_sementation

reader = FrameReader.create_from_directory(worm_folder_path)
log = pd.read_csv(log_files[exp_number])

viewer.open()
shape = [*reader.frame_shape]
shape[:2] = background.shape[:2]
background.reshape(shape)

ErrorCalculator.calculate_precise(
    background=background,
    worm_bboxes=log[["wrm_x", "wrm_y", "wrm_w", "wrm_h"]].to_numpy(),
    mic_bboxes=log[["mic_x", "mic_y", "mic_w", "mic_h"]].to_numpy(),
    frame_nums=log['frame'].astype(int).to_list(),
    worm_reader=reader,
    diff_thresh=threshold
)

##### Calculate Precise error

In [None]:
analyzer.calc_precise_error(
    worm_reader=worm_reader,
    background=background,
    diff_thresh=diff_thresh,
)

In [None]:
analyzer.describe(["precise_error"], percentiles=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99])

##### Save Data

In [None]:
data_save_path = base_path + "analysis.csv"

if data_save_path is None:
    data_save_path = UserPrompt.save_file(title="Save data", filetypes=[("Log file", "*.csv")])

analyzer.save(data_save_path)

In [None]:
raise KeyError()

### Plotting and analysis

In [6]:
from wtracker.utils.path_utils import Files
import pandas as pd

exp_type = "Exp0"
config_type = "config3"
exp_list = ["Exp0",'Exp2', "Exp1", 'Exp3', 'Exp4']
def filter(dir:str, t_list:list[str]):
    for type in t_list:
        if type in dir:
            return True
    return False
    

all_dirs = Files("D:\\Guy_Gilad\\FinalEvaluations", scan_dirs=True)

experiment_dirs = [dir for dir in all_dirs if (config_type in dir) and filter(dir, exp_list)]

time_configs = [TimingConfig.load_json(dir + "//time_config.json") for dir in experiment_dirs]

analyzer_list = [DataAnalyzer.load(conf, dir + "//bboxes.csv") for conf, dir in zip(time_configs, experiment_dirs)]

exp_bounds = {
    "Exp0": (73,38,1551,1359),
    "Exp1": (39,32,1467,1301),
    "Exp2": (6,57,1495,1336),
    "Exp3": (10,60,1498,1322),
    "Exp4": (24,71,1496,1335),
}

for i, an in enumerate(analyzer_list):
    exp_dir = experiment_dirs[i]
    model = exp_dir.split('_')[-1]
    model = "ResMLP(1)" if model == "1)" else model
    model = "ResMLP(1)" if model == "2)" else model

    print(model)
    an._orig_data["model"] = model
    an.initialize(period=10)

print(len(analyzer_list))

CSV
Optimal
PolyFit(2)
ResMLP(1)
CSV
Optimal
PolyFit(2)
ResMLP(1)
CSV
Optimal
PolyFit(2)
ResMLP(1)
CSV
Optimal
PolyFit(2)
ResMLP(1)
CSV
Optimal
PolyFit(2)
ResMLP(1)
20


In [7]:
import numpy as np

BAD_frames = {
    "Exp0": [(41109, 41113)],
    "Exp1": [(21930, 21942),
             (18741,18763)],
    "Exp2": [(22474, 22745), (32859,32860)],
    "Exp3": [(6843,6853), (6872,6915), (37750,37751), (37807, 37808), (47485, 47506), (53117,53122)],
    "Exp4": [(5700,5798),(14963,14986),(27816, 27817),(35785,35804),(38839,38864),(39250,39300),(43543, 43544),(43661,43674),(46848,46859),(64874,64905)],
}

def remove_cycles(analyzer:DataAnalyzer, frames:tuple[int,int]):
    frame_range = np.asanyarray(list(range(frames[0], frames[1])), dtype=int)
    mask = analyzer.data['frame'].isin(frame_range)
    cycles = analyzer.data[mask]["cycle"].unique()
    analyzer.remove_cycle(cycles)
    return analyzer


for i, an in enumerate(analyzer_list):
    exp_dir = experiment_dirs[i]
    exp_name = [k for k in BAD_frames.keys() if k in exp_dir][0]
    frames_toremove = BAD_frames[exp_name]

    for fr in frames_toremove:
        #remove_cycles(an, fr)
        pass

    an.clean(bounds=exp_bounds[exp_name])
    #an.calc_anomalies(no_preds=True, remove_anomalies=False)

    print(exp_name)
    an.print_stats()

    an.change_unit("sec")

pltr = Plotter([an.data for an in analyzer_list], plot_height=7, palette="bright")

Exp0
Total Count Removed Frames: 7151 (11.688%)
Total Count of No Pred Frames: 20 (0.037%)
Total Num of Cycles: 6038
Non Perfect Predictions: 29.293%
Exp0
Total Count Removed Frames: 7151 (11.688%)
Total Count of No Pred Frames: 20 (0.037%)
Total Num of Cycles: 6038
Non Perfect Predictions: 1.747%
Exp0
Total Count Removed Frames: 7151 (11.688%)
Total Count of No Pred Frames: 20 (0.037%)
Total Num of Cycles: 6038
Non Perfect Predictions: 10.939%
Exp0
Total Count Removed Frames: 7151 (11.688%)
Total Count of No Pred Frames: 20 (0.037%)
Total Num of Cycles: 6038
Non Perfect Predictions: 12.698%
Exp1
Total Count Removed Frames: 17874 (27.501%)
Total Count of No Pred Frames: 3 (0.006%)
Total Num of Cycles: 5290
Non Perfect Predictions: 86.685%
Exp1
Total Count Removed Frames: 17874 (27.501%)
Total Count of No Pred Frames: 3 (0.006%)
Total Num of Cycles: 5290
Non Perfect Predictions: 21.814%
Exp1
Total Count Removed Frames: 17578 (27.045%)
Total Count of No Pred Frames: 299 (0.631%)
Total Nu

In [None]:
# create the plotter
pltr = Plotter([analyzer_list[0]], plot_height=7, palette="bright")

In [None]:
# print column names of the data
pprint([f"{i}: {col}" for i, col in enumerate(analyzer.column_names())])

In [None]:
analyzer.print_stats()

In [None]:
pltr.plot_trajectory()
plt.show()

In [None]:
pltr.plot_speed(log_wise=True, hue_col="log_num", condition=lambda x: x["wrm_speed"] <= 800, aspect=1.5)
plt.show()

In [None]:
pltr.plot_error(log_wise=True, error_kind="dist", hue_col="log_num", condition=lambda df: (df["worm_deviation"] > 1e-5) & (df["worm_deviation"] <300), cycle_wise=True)
plt.show()

In [None]:
pltr.plot_speed_vs_error(error_kind="dist", condition=lambda x: (x["wrm_speed"] < 1000) & (x["worm_deviation"] < 300), cycle_wise=True)
plt.show()

In [None]:
def save_fig(fig:plt.Figure, path:str, dpi:int=100):
    fig.set_dpi(dpi)
    # fig.tight_layout()
    fig.savefig(path + ".png", format="png")

In [None]:
pltr.palette = None
plot = pltr.plot_deviation(
    # percentile=0.99,
    log_wise=False,
    kind="boxen",
    # k_depth="proportion",
    # outlier_prop=0.02,
    # saturation=0.5,
    hue_col='model',
    aspect=2
)
# save_fig(plot.figure, f"C:\\Users\\slevylab\\Desktop\\plots\\Exp[0,1,2]_{config_type}")

plt.show()

In [None]:
q = pltr.data["worm_deviation"].quantile(1)
cond = lambda d: d["worm_deviation"] < q
plot = pltr.create_distplot(
    x_col="cycle_step",
    y_col="worm_deviation",
    x_label="cycle step",
    y_label="distance",
    title="Distance between worm and microscope centers as function of cycle step",
    kind="hist",
    common_bins=True,
    common_norm=True,
    stat='proportion',
    hue_col='model'
    # condition=cond
)

plt.show()

In [None]:
pltr.plot_head_size(hue_col="log_num", alpha=0.5)
plt.show()

In [None]:
# TODO: FIX, WE NEED TO CREATE AN ANALYZER FOR EACH DATA ITEM
for data in analyzer_list:
    display(analyzer.describe(columns=["wrm_speed", "bbox_error", "worm_deviation"], num=19))

In [None]:
import numpy as np

# find anomalies in the data
analyzer.calc_anomalies(
    no_preds=True,
    min_bbox_error=1.0,
    min_dist_error=np.inf,
    min_speed=np.inf,
    min_size=300,
)

In [None]:
table = pltr.data[["wrm_speed", "bbox_error", "worm_deviation"]].describe(np.linspace(0.05, 1, 19, endpoint=False))
display(table)

In [None]:
table = pltr.data[["wrm_speed", "bbox_error", "worm_deviation"]].describe(np.linspace(0.05, 1, 19, endpoint=False))
display(table)