In [1]:
%run ../chap05/dataset.ipynb

In [2]:
class VideoShotDataset(Dataset):
    def __init__(self, path, filenames, timesteps=5):
        super(VideoShotDataset, self).__init__('videoshot', 'binary')

        video_resize(path, filenames)
        self.frames, counts = video_load(path, filenames)
        self.shots = video_load_shot_start(path, filenames, counts)

        self.set_timesteps(timesteps)
        
    def set_timesteps(self, timesteps):
        self.timesteps = timesteps
        self.input_shape = [timesteps+1, 90, 120, 3]
        self.output_shape = [timesteps+1, 1]
        
    @property
    def train_count(self):
        return 2000
    
    def __str__(self):
        return '{}({}, {} frames, {} shots, {} train_data)'. \
               format(self.name, self.mode, len(self.frames), \
                      np.sum(self.shots), self.train_count)

In [3]:
def video_resize(path, filenames):
    cache_path = path + '/cache/'
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)
    counts = load_dict_file(cache_path+'framecount.dat')
    
    for filename in filenames:
        file_path = path + '/movies/' + filename
        image_path = cache_path + filename
        
        if not os.path.exists(file_path):continue
        if os.path.exists(image_path): continue
        
        os.mkdir(image_path)

        cap = cv2.VideoCapture(file_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
        count, unit = 0, 100

        print('Start resizing {} frames in {}'.format(frame_count, filename))
        while cap.isOpened():
            ret, frame = cap.read()
            count = count + 1
            thumb = cv2.resize(frame, (120, 90)) 
            cv2.imwrite(image_path + '/%#05d.jpg' % count, thumb)
            if count >= frame_count:
                cap.release()
                break
            if count % unit == 0:
                print('{} frames'.format(count))
                if count == unit * 10 and count * 10 < frame_count: unit = count
        counts[filename] = count
    
    print('Creating thumbnail cache is done')
    
    save_dict_file(cache_path+'framecount.dat', counts)

In [4]:
def load_dict_file(path):
    if not os.path.exists(path): return {}
        
    f = open(path,'r')
    data=f.read()
    f.close()
    return eval(data)

def save_dict_file(path, dic):
    f = open(path,'w')
    f.write(str(dic))

In [5]:
def video_load(path, filenames):
    cache_path = path + '/cache/'
    counts = load_dict_file(cache_path+'framecount.dat')
    
    idx, frame_count = 0, 0
    for filename in filenames:
        frame_count += counts[filename]
    
    buffer = np.zeros([frame_count, 90, 120, 3])

    for filename in filenames:
        image_path = cache_path + filename
        for n in range(counts[filename]):
            filepath = image_path + '/%#05d.jpg' % (n+1)
            img = cv2.imread(filepath)
            buffer[idx+n,:,:,:] = img
        idx += counts[filename]
        
    return buffer, counts

In [6]:
def video_load_shot_start(path, filenames, counts):
    idx, frame_count = 0, 0
    
    for filename in filenames:
        frame_count += counts[filename]
    
    ys = np.zeros([frame_count])

    for filename in filenames:
        csv_path = path + '/movies/' + filename + '.csv'
        if os.path.exists(csv_path):
            fin = open(csv_path, 'r')
            start_idxs = fin.read()
            for start_idx in start_idxs.split():
                ys[idx+(int(start_idx))] = 1.0
        idx += counts[filename]
    
    return ys

In [7]:
def video_shot_get_train_data(self, batch_size, nth):
    return self.create_seq(batch_size)

def video_shot_get_test_data(self):
    return self.create_seq(128)

def video_shot_get_validate_data(self, count):
    return self.create_seq(count)

VideoShotDataset.get_train_data = video_shot_get_train_data
VideoShotDataset.get_test_data = video_shot_get_test_data
VideoShotDataset.get_validate_data = video_shot_get_validate_data
VideoShotDataset.get_visualize_data = video_shot_get_validate_data

In [8]:
def video_create_seq(self, count):
    length = self.timesteps
    xs = np.zeros([count, length+1, 90, 120, 3])
    ys = np.zeros([count, length+1, 1])
    frame_count = len(self.frames)
    for n in range(count):
        xs[n, 0, 0, 0, 0] = length
        ys[n, 0, 0] = length
        pos = frame_count
        for k in range(length):
            if pos >= frame_count-1 or np.random.randint(2) == 0:
                pos = np.random.randint(frame_count)
                is_new = 1.0
            else:
                pos += 1
                is_new = self.shots[pos]
            xs[n, k+1, :, :, :] = self.frames[pos,:,:,:]
            ys[n, k+1, 0] = is_new
    return xs, ys

VideoShotDataset.create_seq = video_create_seq

In [9]:
def video_visualize(self, xs, est, ans):
    for n in range(len(xs)):
        draw_images_horz(xs[n][1:], [90,120,3])

    for n in range(len(xs)):
        print('Est: ' + ','.join(["%4.2f" % x for x in est[n,2:,0]]))
        print('Ans: ' + ','.join(["%4.2f" % x for x in ans[n,2:,0]]))

VideoShotDataset.visualize = video_visualize

In [10]:
def video_shot_forward_postproc(self, output, x, y, mode=None):
    y1, o1 = y[:,2:,:], output[:,2:,:]
    entropy = sigmoid_cross_entropy_with_logits(y1, o1)
    loss = np.mean(entropy)
    aux = [y, output]
        
    return loss, aux

def video_shot_backprop_postproc(self, G_loss, aux, mode=None):
    y, output = aux

    y1, o1 = y[:,2:,:], output[:,2:,:]
    g_entropy = sigmoid_cross_entropy_with_logits_derv(y1, o1)
    G_entropy = g_entropy / np.prod(y1.shape)

    G_output = np.zeros(output.shape)
    G_output[:,0,:] = output[:, 0,:]
    G_output[:,2:,:] = G_entropy

    return G_output

VideoShotDataset.forward_postproc = video_shot_forward_postproc
VideoShotDataset.backprop_postproc = video_shot_backprop_postproc

In [11]:
def video_shot_eval_accuracy(self, x, y, output, mode=None): #touched
    y1, o1 = y[:,2:,:], output[:,2:,:]
    answer = np.equal(y1, 1.0)
    estimate = np.greater(o1, 0)
    correct = np.sum(np.equal(estimate, answer))
    accuracy = correct / np.prod(y1.shape)
                             
    return accuracy

def video_shot_get_estimate(self, output, mode=None):
    estimate = np.zeros(output.shape)
    estimate[:,0,:] = output[:,0,:]
    estimate[:,2:,:] = sigmoid(output[:,2:,:])
        
    return estimate

VideoShotDataset.eval_accuracy = video_shot_eval_accuracy
VideoShotDataset.get_estimate = video_shot_get_estimate