# Preprocessing Shared Drive Data

This is a colab notebook for converting the Google Drive nested folder format into a multi-dimensional array per embryo.

In [None]:
import os
import pandas as pd
import numpy as np
from google.colab import files, drive
from matplotlib import image

In [None]:
drive.mount('/content/gdrive')

In [None]:
shared_path = '/content/gdrive/Shared drives/Embryo_data'
embryo_data = pd.read_excel(f'{shared_path}/embryo_info_CS101.xlsx')

output_labels = ['raw/fluo_data', 'raw/bf_data']

In [None]:
def get_c_dir(embryo_idx, t, c):
    return f'{shared_path}/Embryo{embryo_idx}/t{t}/c{c}'

def get_png_path(embryo_idx, t, c, z):
    return f'{get_c_dir(embryo_idx, t, c)}/c{c}z{z}t{t}.png'

for pd_idx in range(len(embryo_data)):
    print(f'{pd_idx}/{len(embryo_data)}')
    if all(embryo_data[['if_full_injected', 'fluo_quality_of_z_max_sum', 'fluo_quality_of_raw_png', 'if_healthy']].values[pd_idx]):
        embryo_idx, t_num, c_fluo, c_bf = embryo_data[["embryo_index", "t_num", "fluo_channel", "DIC_channel"]].values[pd_idx]
        embryo_dir = f'{shared_path}/Embryo{embryo_idx}'

        # axes per video will be (z, t, x, y) -> (z, x, y, t)
        for i, c in enumerate([c_fluo, c_bf]):
            video = []
            max_z = len(os.listdir(get_c_dir(embryo_idx, 1, c)))
            for z in range(1, max_z+1):
                z_data = []
                for t in range(1, t_num+1):
                    filename = get_png_path(embryo_idx, t, c, z)
                    img_arr = image.imread(filename)
                    z_data.append(img_arr)
                video.append(np.array(z_data))
            old_shape = video.shape
            video = np.moveaxis(video, 1, -1)
            print(f'old shape = {old_shape}, new shape = {video.shape}')
            np.save(f'{shared_path}/{output_labels[i]}/embryo{embryo_idx}', video)
    # input()