In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import os.path as path
import random
from IPython.display import HTML

import exdir
import numpy as np
import pandas as pd
from tqdm import tqdm
import skvideo
import yaml

from cow_tus.analysis.visualizations.viewer import play
import cow_tus.data.transforms.preprocessing as transforms

os.chdir('/home/Gangus/cow-tus')

In [None]:
# main_dataset = file['unprocessed']
# main_dataset.create_group('train')
# main_dataset.create_group('valid')
# main_dataset.create_group('test')
# splits = list(dataset.keys())
# for split in splits:
#     split_dataset = main_dataset[split]
#     for i in range(9):
#         exam_id = f'LD{random.randint(0, 10)}{random.randint(0, 10)}'
#         if exam_id in split_dataset:
#             while exam_id in split_dataset:
#                 exam_id = f'LD{random.randint(0, 10)}{random.randint(0, 10)}'
#         else:
#             dset = split_dataset.create_dataset(exam_id, data=np.random.rand(7,4,5))
#             dset.attrs = {
#                 'exam_id': exam_id,
#                 'label': random.randint(0, 2)
#             }

In [None]:
raw_dir = '/data/cow-tus-data/raw'
out_dir = 'sample.exdir'
group_dir = 'data/'

hypothesis_conditions = ['single-instance-learning', 'temporal-subsample']
f = exdir.File(out_dir)
g = f.require_group(hypothesis_conditions[0])
for hypothesis_condition in hypothesis_conditions[1:]:
    g = g.require_group(hypothesis_condition)
root_group = g

preprocess_fns = [
    {
        'fn': 'resize_clip',
        'args': {
            'size': (210, 280)
        }
    },
    {
        'fn': 'crop_clip_horizontally_by_proportion',
        'args': {
            'ratio': (0.25, 0.75)
        }
    },
    {
        'fn': 'rgb_to_grayscale',
        'args': {}
    }
]

raw_attrs_df = pd.read_csv(path.join(raw_dir, 'labels.csv'))

metadata = {
    'config.raw_dir': raw_dir,
    'config.out_dir': out_dir,
    'config.hypothesis_conditions': hypothesis_conditions,
    'binary.num_normals': 0,
    'binary.num_abnormals': 0,
    'multiclass.num_0': 0,
    'multiclass.num_1': 0,
    'multiclass.num_2': 0,
    'multiclass.num_3': 0,
    'meta.num_loops': 0,
    'meta.num_exams': 0
}
out_attrs_data = []
for i, row in tqdm(raw_attrs_df.iterrows(), total=len(raw_attrs_df)):
    attrs = dict(row)
    tus_score = attrs['tus_score']
    if tus_score == 'control':
        tus_score = 1
    global_label_binary = 0 if int(tus_score) == 1 else 1
    global_label_multiclass = int(tus_score) - 1
    
    if global_label_binary == 0:
        metadata['binary.num_normals'] += 1
        metadata['multiclass.num_0'] += 1
    else:
        metadata['binary.num_abnormals'] += 1
        if global_label_multiclass == 1:
            metadata['multiclass.num_1'] += 1
        elif global_label_multiclass == 2:
            metadata['multiclass.num_2'] += 1
        elif global_label_multiclass == 3:
            metadata['multiclass.num_3'] += 1
            
    exam_id = attrs['id']
    exam_path = os.path.join(raw_dir, 'exams', exam_id)
    
    exam_group = root_group.require_group(str(exam_id))
    exam_group.attrs.update(attrs)
    exam_group.attrs.update({
        'exam_name': exam_id,
        'exam_path': exam_path,
        'global_label_binary': global_label_binary,
        'global_label_multiclass': global_label_multiclass
    })
    
    concat = []
    for loop_name in os.listdir(exam_path):
        loop_name = loop_name[:loop_name.rfind('.AVI')].replace(" ", "")
        loop_path = os.path.join(loops_dir, loop_name)
        print(loop_path)
#         loop = skvideo.io.vread(path.join(loops_dir, loop_name))
#         for preprocess_fn in preprocess_fns:
#             fn = preprocess_fn['fn']
#             args = preprocess_fn['args']
#             loop = getattr(transforms, fn)(loop, **args)
        loop = np.random.rand(50, 28, 28)
        loop_shape = loop.shape
        loop_dataset = exam_group.require_dataset(loop_name, data=loop)
        loop_dataset.attrs.update({
            'loop_name': loop_name,
            'loop_path': loop_path,
            'loop_data_path': path.join(loop_dataset.directory, 'data.npy'),
            'depth': loop_shape[0],
            'height': loop_shape[1],
            'width': loop_shape[2],
            'channels': 1,
        })
        out_attrs_entry = {}
        out_attrs_entry.update(exam_group.attrs)
        out_attrs_entry.update(loop_dataset.attrs)
        out_attrs_data.append(out_attrs_entry)
    if i == 10:
        break
out_attrs_df = pd.DataFrame(out_attrs_data)
out_attrs_df.to_csv(path.join(group_dir, 'attrs.csv'), index=False)
with open(path.join(group_dir, 'metadata.yaml'), 'w') as f:
    f.write(yaml.dump(metadata))

In [None]:
# anim = play(loop)
# HTML(anim.to_html5_video())

In [None]:
print(root_group.directory)