# Training Movie Preprocessor
### Preprocesses Training Movies from Aspera IDR downloader

#### Import libraries

In [1]:
import imagej
import pandas as pd
import pathlib
from IPython.utils.io import capture_output
import os

### Determine what frames are labled from each training movie

In [2]:
#Read plates listed in features dataset to figure out which frames from each movie have training data
#Save these training locations into training_frames file

def save_training_wells(traingset_path, save_path):
    data_list = []
    with open(traingset_path) as labels_file:
        for line in labels_file:
            if ".tif" in line: #get plate/well/frame for training data
                plate = line.strip()[:9]
                well = line.strip()[14:17]
                time = line.strip()[29:33]
                frame = int(int(time)/30 + 1)
                data_list.append([plate, well, frame])
                    
    dataframe = pd.DataFrame(data_list, columns=['Plate', 'Well', 'Frame'])
    dataframe.to_csv(save_path, sep="\t")

features_path = "../0.download_data/trainingset.dat"
save_path = "training_frames.tsv"
save_training_wells(features_path, save_path)

### Load image data given plate, well, frame

In [3]:
#return movie data for particular plate/well
def load_movie_data(imagej, parent_dir, plate, well):
    #create absolute path for ImageJ to load CH5 from
    parent_path = pathlib.Path(parent_dir).absolute().resolve()
    movie_file_path = f"{plate}/{well}/00{well}_01.ch5"
    movie_path = parent_path.joinpath(movie_file_path)
    
    #imagej prints lots of output that isnt necessary, unfortunately some will still come through
    with capture_output():
        movie_path = str(movie_path)
        jmovie = ij.io().open(movie_path)
        movie = ij.py.from_java(jmovie)
        movie_arr = movie.values[-94:, :, :, 0]
        return movie_arr
    
ij = imagej.init('Fiji.app')
#imagej init sets directory to /Fiji.app so have to go back a directory :/
os.chdir("..")
parent_dir = "../0.download_data/labeled_movies_ch5/"
plate = "LT0013_38"
well = "042"
frame = 3

test = load_movie_data(ij, parent_dir, plate, well)
print(test.shape)

[INFO] Overriding Leica ROI Reader; identifier: command:de.biovoxxel.utilities.RoiReader; jar: file:/home/roshankern/Desktop/Github/mitocheck_data/1.preprocess_data/Fiji.app/plugins/Biovoxxel_Plugins-2.5.6.jar
CellH5Reader initializing /home/roshankern/Desktop/Github/mitocheck_data/0.download_data/labeled_movies_ch5/LT0013_38/042/00042_01.ch5
Plate :/sample/0/plate/
Well :/sample/0/plate/LT0013_38--ex2005_05_06--sp2005_04_11--tt163--c3/experiment/
Site :/sample/0/plate/LT0013_38--ex2005_05_06--sp2005_04_11--tt163--c3/experiment/00042/position/
Parse segmentation ROIs for cell object primary__test : 0
(94, 1024, 1344)


### Get all labeled frames from raw movies and save