In [31]:
from tqdm import tqdm
import numpy as np
import multiprocessing
import uuid
import os
import threading
import time
import random

In [2]:
from box_client import get_box_client
from annotation_processor import *

In [3]:
client = get_box_client()

In [4]:
rat_video_feature_folder = client.folder('67339798174')

In [5]:
box_feature_file_ids = {}
for day_folder in rat_video_feature_folder.get_items():
    for pos_folder in day_folder.get_items():
        for feature_file in pos_folder.get_items():
            box_feature_file_ids[feature_file.name] = feature_file.id

[31m"GET https://api.box.com/2.0/folders/67339798174/items?offset=0" 401 0
{'Date': 'Thu, 21 Feb 2019 17:47:34 GMT', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Strict-Transport-Security': 'max-age=31536000', 'WWW-Authenticate': 'Bearer realm="Service", error="invalid_token", error_description="The access token provided is invalid."', 'BOX-REQUEST-ID': '0ujoqfj87u8t8cp00av98rca3a4', 'Age': '0'}
b''
[0m


In [6]:
file_ids_to_file_names = {}
for k, v in box_feature_file_ids.items():
    file_ids_to_file_names[v] = k

In [7]:
seizure_annotations = load_seizure_annotations("seizure_annotations.json")

In [8]:
def choose_epoch_files(n_files, box_feature_file_ids, seizure_annotations, p=0.5):
    n_pos = int(n_files * p)
    n_neg = n_files - n_pos
    
    positive_files = []
    negative_files = []
    for name, box_id in box_feature_file_ids.items():
        if len(seizure_times_from_npz_filename(name, seizure_annotations)) > 0:
            positive_files.append(box_id)
        else:
            negative_files.append(box_id)
    
    positive_examples = list(np.random.choice(positive_files, size=n_pos))
    negative_examples = list(np.random.choice(negative_files, size=n_neg))
    
    return positive_examples + negative_examples

In [9]:
def download_box_feature_file(file_id):
    temp_file = str(uuid.uuid4())
    x = None
    try:
        with open(temp_file, "wb") as f:
            client.file(file_id).download_to(f)
        with np.load(temp_file) as data:
            x = {"features": data["features"], "start_time": data["start_time"]}
    finally:
        if os.path.exists(temp_file):
            os.remove(temp_file)
            
    return (file_id, x)

In [10]:
def download_epoch_files(buffer, file_ids):
    for file_id in file_ids:
        buffer.append(download_box_feature_file(file_id))

In [None]:
# Function to generate vector y corresponding to  binary classification of video clips
# intervals of duration = window_length annotation in seconds
# seizure array will receive dictionary of video name of seizure times

def ground_truth_label(seizure_array, window_start, window_length):

   # Check if sliding window overlaps with seizure window
   for k in seizure_array:

       # Here just hard-coded 10 sec as minimum duration of seizure
       if (window_start + window_length > k) and (window_start < k + 10):
            return 1
       # Windows after the 10 sec minimum duration of seizure and less than 120 secs after seizure start
       if (window_start >= k + 10) and (window_start < k + 120):
            return -1

   # Return 0 for non-seizure windows
   return 0

In [56]:
def epoch_positive_negative_times(epoch_examples, window_size=5, fps=29.97):
    epoch_file_names = list(epoch_examples.keys())
    pos_example_indices = []
    neg_example_indices = []

    for file_idx, file_name in enumerate(epoch_file_names):
        processed_chunk = epoch_examples[file_name]
        # video times (sec)
        vid_start_time = processed_chunk["start_time"]
        vid_length = int(processed_chunk["features"].shape[0]/fps)
        vid_end_time = vid_start_time + vid_length
        seizure_times = seizure_times_from_npz_filename(file_name, seizure_annotations)
        for i in range(vid_length - window_length):
            label = ground_truth_label(seizure_times, vid_start_time + i, window_length)
            if label == 0:
                neg_example_indices.append((file_idx, i))
            elif label == 1:
                pos_example_indices.append((file_idx, i))
                
    return epoch_file_names, pos_example_indices, neg_example_indices

In [57]:
def indices_to_example(epoch_examples, epoch_filenames, pair, window_size = 5, fps = 30):
    file_index = pair[0]
    time_index = pair[1]
    features = epoch_examples[epoch_filenames[file_index]]['features']
    return features[time_index:time_index+window_size*fps]

In [58]:
def generate_minibatch(batch_size, pos_p, pos_example_indices, neg_example_indices, epoch_examples, epoch_filenames):
    num_pos_examples = int(batch_size*pos_p)
    num_neg_examples = batch_size - num_pos_examples
    
    pos_examples = random.choices(pos_example_indices, k=num_pos_examples)
    neg_examples = random.choices(neg_example_indices, k=num_neg_examples)
    
    batch_x = np.zeros((batch_size, 150, 1536))
    batch_y = np.zeros((batch_size, 1))
    
    
    for i in range(batch_size):
        if i < num_pos_examples:
            pair = pos_examples[i]
            batch_y[i] = 1
        else:
            pair = neg_examples[i - num_pos_examples]
            batch_y[i] = 0
        batch_x[i, :, :] = indices_to_example(epoch_examples, epoch_file_names, pair)
    
    return batch_x, batch_y

In [59]:
#Training Loop
n_epoch_files = 2
n_epochs = 3
batch_size = 16
pos_p = 1/16

next_epoch_buf = []
next_epoch_files = choose_epoch_files(n_epoch_files, box_feature_file_ids, seizure_annotations)
t = threading.Thread(target=download_epoch_files, args=(next_epoch_buf, next_epoch_files))
t.start(); t.join(); # join before launching to not bias tqdm for initial download time
for epoch in tqdm(range(n_epochs)):
    t.join()
    epoch_examples = {}
    for vals in next_epoch_buf:
        epoch_examples[file_ids_to_file_names[vals[0]]] = vals[1]
    if epoch + 1 < n_epochs:
        next_epoch_buf = []
        next_epoch_files = choose_epoch_files(n_epoch_files, box_feature_file_ids, seizure_annotations)
        t = threading.Thread(target=download_epoch_files, args=(next_epoch_buf, next_epoch_files))
        t.start()
        
    epoch_file_names, pos_example_indices, neg_example_indices = epoch_positive_negative_times(epoch_examples)
    n_minibatches = (len(pos_example_indices)/pos_p)//batch_size
    for batch_i in tqdm(range(minibatches)):
        batch_X, batch_Y = generate_minibatch(batch_size, pos_p, 
                                              pos_example_indices, neg_example_indices, 
                                              epoch_examples, epoch_file_names)
        
    print(epoch)
    print(n_minibatches)
    print("Sleeping")
    # X = np.array [training_size, 150, 1536]
    # Y = np.array [training_size, 1]
    time.sleep(10)
    print("Done with Epoch")

[31m"GET https://api.box.com/2.0/files/407341222906/content" 401 0
{'Date': 'Thu, 21 Feb 2019 19:07:37 GMT', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Strict-Transport-Security': 'max-age=31536000', 'WWW-Authenticate': 'Bearer realm="Service", error="invalid_token", error_description="The access token provided is invalid."', 'BOX-REQUEST-ID': '0h8ocr4fbdn2ausurgtd1c98mga', 'Age': '0'}
b''
[0m


NameError: name 'tqdm' is not defined