# BatchExtraction 脚本

function：根据phase_gen提供不同的batch_extraction策略  
1⃣️ phase_gen == "test"：    
    input:测试视频的路径
    对视频逐帧提取frame以便送入网络  
2⃣️ phase_gen == "train":  
    input:训练视频的路径  
    video_train_paths里包含所有视频编号的video  
    每一个video里有三个文件夹   
    “image”:该视频的所有视频帧、"maps":该视频的连续的热度图、 "fixation/maps/":该视频离散的热度图

In [1]:
import numpy as np
import scipy.io
import scipy.ndimage
from scipy.misc import imread, imresize

def padding(img, shape_r=240, shape_c=320, channels=3):
    img_padded = np.zeros((shape_r, shape_c, channels), dtype=np.uint8)
    if channels == 1:
        img_padded = np.zeros((shape_r, shape_c), dtype=np.uint8)

    original_shape = img.shape
    rows_rate = original_shape[0]/shape_r
    cols_rate = original_shape[1]/shape_c

    if rows_rate > cols_rate:
        new_cols = (original_shape[1] * shape_r) // original_shape[0]
        img = imresize(img, (shape_r, new_cols))
        if new_cols > shape_c:
            new_cols = shape_c
        img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols),] = img
    else:
        new_rows = (original_shape[0] * shape_c) // original_shape[1]
        img = imresize(img, (new_rows,shape_c))
        if new_rows > shape_r:
            new_rows = shape_r
        img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img

    return img_padded


def resize_fixation(img, rows=480, cols=640):
    out = np.zeros((rows, cols))
    factor_scale_r = rows / img.shape[0]
    factor_scale_c = cols / img.shape[1]

    coords = np.argwhere(img)
    for coord in coords:
        r = int(np.round(coord[0]*factor_scale_r))
        c = int(np.round(coord[1]*factor_scale_c))
        if r == rows:
            r -= 1
        if c == cols:
            c -= 1
        out[r, c] = 1

    return out


def padding_fixation(img, shape_r=480, shape_c=640):
    img_padded = np.zeros((shape_r, shape_c))

    original_shape = img.shape
    rows_rate = original_shape[0]/shape_r
    cols_rate = original_shape[1]/shape_c

    if rows_rate > cols_rate:
        new_cols = (original_shape[1] * shape_r) // original_shape[0]
        img = resize_fixation(img, rows=shape_r, cols=new_cols)
        if new_cols > shape_c:
            new_cols = shape_c
        img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols),] = img
    else:
        new_rows = (original_shape[0] * shape_c) // original_shape[1]
        img = resize_fixation(img, rows=new_rows, cols=shape_c)
        if new_rows > shape_r:
            new_rows = shape_r
        img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img

    return img_padded


def preprocess_images(paths, shape_r, shape_c):
    ims = np.zeros((len(paths), shape_r, shape_c, 3))

    for i, path in enumerate(paths):
        # original_image = cv2.imread(path)
        # original_image = mpimg.imread(path)
        original_image = imread(path)
        if original_image.ndim == 2:
            copy = np.zeros((original_image.shape[0], original_image.shape[1], 3))
            copy[:, :, 0] = original_image
            copy[:, :, 1] = original_image
            copy[:, :, 2] = original_image
            original_image = copy
        padded_image = padding(original_image, shape_r, shape_c, 3)
        ims[i] = padded_image

# 3 个通道为什么要减数字做偏差？
    ims[:, :, :, 0] -= 103.939
    ims[:, :, :, 1] -= 116.779
    ims[:, :, :, 2] -= 123.68
    ims = ims[:, :, :, ::-1]
    # ims = ims.transpose((0, 3, 1, 2))

    return ims


def preprocess_maps(paths, shape_r, shape_c):
    ims = np.zeros((len(paths), shape_r, shape_c, 1))

    for i, path in enumerate(paths):
        # original_map = cv2.imread(path, 0)
        # original_map = mpimg.imread(path)
        original_map = imread(path)
        padded_map = padding(original_map, shape_r, shape_c, 1)
        ims[i, :, :, 0] = padded_map.astype(np.float32)
        ims[i, :, :, 0] /= 255.0

    return ims


def preprocess_fixmaps(paths, shape_r, shape_c):
    ims = np.zeros((len(paths), shape_r, shape_c, 1))

    for i, path in enumerate(paths):
        fix_map = scipy.io.loadmat(path)["I"]
        ims[i, :, :, 0] = padding_fixation(fix_map, shape_r=shape_r, shape_c=shape_c)

    return ims


def postprocess_predictions(pred, shape_r, shape_c):
    predictions_shape = pred.shape
    rows_rate = shape_r / predictions_shape[0]
    cols_rate = shape_c / predictions_shape[1]

    pred = pred / np.max(pred) * 255

    if rows_rate > cols_rate:
        new_cols = (predictions_shape[1] * shape_r) // predictions_shape[0]
        # pred = cv2.resize(pred, (new_cols, shape_r))
        pred = imresize(pred, (shape_r, new_cols))
        img = pred[:, ((pred.shape[1] - shape_c) // 2):((pred.shape[1] - shape_c) // 2 + shape_c)]
    else:
        new_rows = (predictions_shape[0] * shape_c) // predictions_shape[1]
        # pred = cv2.resize(pred, (shape_c, new_rows))
        pred = imresize(pred, (new_rows, shape_c))
        img = pred[((pred.shape[0] - shape_r) // 2):((pred.shape[0] - shape_r) // 2 + shape_r),:]

    img = scipy.ndimage.filters.gaussian_filter(img, sigma=7)
    img = img / np.max(img) * 255

    return img

In [14]:
VideoName= "/Users/yanhang/Downloads/animal_alpaca01.mp4"
Video_train_paths = "/Users/yanhang/Desktop/saliency/my_models/videotrain"
# path of training maps
maps_path = '/maps/'
# path of training fixation maps
fixs_path = '/fixation/maps/'
# path of training video frames
frames_path = '/images/'

import cv2
import numpy as np
import os
import random
import scipy.io
import scipy.ndimage
from scipy.misc import imread, imresize


#显著度视频的batch提取，batch_shape: (batch_size, frame_num, width, height, RGB)
#对一个视频随机抽取一段连续帧为frame_num的一组实例
def _VideoBatch_Generator(VideoName, batch_size, frame_num, input_shape, output_shape, phase_gen='train'):
    '''
    Args:
    VideoCap: cv2预读取视频的操作
    batch_size: batch的大小
    frame_num: 一个视频的连续帧
    resize_shape: 输出视频帧的大小
    Out: video_batch: (batch_size, frame_num, width, height, RGB)
    '''
    VideoCap = cv2.VideoCapture(VideoName)
    
    #frame_width = VideoCap.get(3)  #3:CV_CAP_PROP_FRAME_HEIGHT
    #frame_height = VideoCap.get(4) #4:CV_CAP_PROP_FRAME_HEIGHT
    if phase_gen == 'test':
        Video_frames = VideoCap.get(7)  #7:CV_CAP_PROP_FRAME_COUNT
        print(Video_frames)
        
        batch_size = Video_frames // frame_num
        if not Video_frames % frame_num == 0:
            batch_size = batch_size + 1
        print(batch_size)
        
        start_frame = 0
        Video_Batch = np.zeros(shape=[1, 1, 720, 1080, 3])
        
        
        for j in range(int(batch_size)):
            Video_Slice = np.zeros(shape=[1, 1, 720, 1080, 3])
            if start_frame + frame_num > Video_frames:
                start_frame = Video_frames - frame_num
            stop_frame = start_frame + frame_num 
            for i in range(start_frame, stop_frame):
                VideoCap.set(1, i)          #1:CV_CAP_PROP_POS_FRAMES
                _, frame = VideoCap.read()   #（frame_width, frame_height, BGR）
                frame = cv2.resize(frame, resize_shape) 
                frame = frame.astype(np.float32)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)#（resize_width, resize_height, RGB）
                frame = frame[np.newaxis, np.newaxis, ...]
                if np.shape(Video_Slice)[1] == 1 and i == start_frame:
                    Video_Slice = frame
                else:
                    Video_Slice = np.concatenate((Video_Slice, frame), axis=1)                
                print(np.shape(Video_Slice))
            if np.shape(Video_Batch)[0] == 1 and start_frame == 0:
                Video_Batch = Video_Slice
            else:
                Video_Batch = np.concatenate((Video_Batch, Video_Slice), axis=0)
            print(np.shape(Video_Batch))
            start_frame = stop_frame
    #training process 读取的是文件夹里的照片
    # video_trainpaths里包含的是按数字标号保存的video，每个video文件家里三个文件夹 “images” “maps” "dixations"
    if phase_gen == 'train':
        videos = [Video_train_paths + '/' + video_train_path for video_train_path 
                  in os.listdir(Video_train_paths) if os.path.isdir(Video_train_paths + '/' + video_train_path)]
        videos.sort()
        random.shuffle(videos)
        
        range_num = 0
        while True:
            Xims = np.zeros((video_b_s, num_frames, resize_shape[0], resize_shape[1], 3))

            Ymaps = np.zeros((video_b_s, num_frames, output_shape[0], output_shape[1], 1)) + 0.01
            Yfixs = np.zeros((video_b_s, num_frames, output_shape[0], output_shape[1], 1)) + 0.01

            for i in range(batch_size):
                video = videos[(range_num * batch_size + i) % len(videos)]
                images = [video + frames_path + f for f in os.listdir(video + frames_path) if f.endswith((".jpg", ".jpeg", ".png"))]
                maps = [video + maps_path + f for f in os.listdir(video + maps_path) if f.endswith((".jpg", ".jpeg", "png"))]
                fixs = [video + fixs_path + f for f in os.listdir(video + fixs_path) if f.endswith(".mat")]
                
                start = np.random.choice(max(1, len(images) - frame_num))
                X = preprocess_images(images[start:min((start + frame_num), len(images))], resize_shape[0], resize_shape[1])
                Y = preprocess_maps(maps[start:min((start + frame_num), len(images))], resize_shape[0], resize_shape[1])
                Y_fix = preprocess_fixs(fixs[start:min((start + frame_num), len(images))], resize_shape[0], resize_shape[1])
                
                Xims[i, 0:np.shape(X)[0], ...] = np.copy(X)
                Ymaps[i, 0:np.shape(Y)[0], ...] = np.copy(Y)
                Yfixs[i, 0:np.shape(Y_fix)[0], ...] = np.copy(Y_fix)
                
                Xims[i, X.shape[0]:num_frames, :] = np.copy(X[-1, :, :])
                Ymaps[i, Y.shape[0]:num_frames, :] = np.copy(Y[-1, :, :])
                Yfixs[i, Y_fix.shape[0]:num_frames, :] = np.copy(Y_fix[-1, :, :])
            yield [Xims, Ymaps, Yfixs]
            range_num = range_num + 1
        print(videos)

In [11]:
_VideoBatch_Generator(VideoName, 10, (1080,720))

['/Users/yanhang/Desktop/saliency/my_models/videotrain/animal_alpaca02', '/Users/yanhang/Desktop/saliency/my_models/videotrain/animal_alpaca01']


In [127]:
X = np.zeros([3, 4, 5])

In [128]:
print(np.shape(np.copy(X[-1, :, :])))

(4, 5)


In [18]:
print(np.arange(5))

[0 1 2 3 4]


In [12]:
def test():
    for i in range(10, 0, -1):
        yield i
y = test()
print(y.__next__())

10


In [25]:
import tensorflow as tf
a = np.ones([2, 3, 4])
print(a)
for i in range(3, 1, -1):
    a = tf.reduce_max(a, i)
    print(a)

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]


ValueError: Invalid reduction dimension 3 for input with 3 dimensions. for 'Max' (op: 'Max') with input shapes: [2,3,4], [] and with computed input tensors: input[1] = <3>.