## 1. Import Data

In [1]:
import pickle
import gzip
import numpy as np
import os
import cv2

In [2]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

In [3]:
def save_zipped_pickle(obj, filename):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, 2)

In [4]:
def evaluate(predictions, targets):
    ious = []
    for p, t in zip(predictions, targets):
        assert p['name'] == t['name']
        prediction = np.array(p['prediction'], dtype=bool)
        target = np.array(t['label'], dtype=bool)

        assert target.shape == prediction.shape
        overlap = prediction*target
        union = prediction + target

        ious.append(overlap.sum()/float(union.sum()))
    
    print("Median IOU: ", np.median(ious))

## 1.1 Load Data

In [5]:
# load data
train_data = load_zipped_pickle("train.pkl")
test_data = load_zipped_pickle("test.pkl")
samples = load_zipped_pickle("sample.pkl")



数据结构：  
    train_data[1-65]包含65个病人的数据, 前46个是amateur数据， 后19个是expert数据  
    train_data[1]['video','label','dataset'...] , video是图片数组， label是MV mask，dataset代表是amateur还是expert  
    train_data[1]['video'] shape: [IMG_WIDTH, IMG_HEIGHT, FRAME NUM]， label也一样

In [41]:
# test_img = train_data[60]['label'][:,:,24]
# zero_img = np.zeros(test_img.shape)
# zero_img[test_img] = 255
# zero_img_resize = cv2.resize(zero_img, (1000,1000))
# zero_img.shape

(583, 628)

In [42]:
# cv2.imshow("mask", zero_img_resize)
  
# cv2.waitKey(0) 

# cv2.destroyAllWindows() 

## 1.2 Data pre-processing

In [6]:
# 样本图片大小
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 1

In [37]:
# 找到含有标记的帧
#input: w*h*f_num
def is_labeled(mask_list):
    
    label_index = []
    for i in range((mask_list).shape[2]):
        if(True in mask_list[:,:,i]):
            label_index.append(i)
            continue
    return np.array(label_index)

#选出有标记的图片并resize到相应尺寸
#output: f_num*w*h*1
def image_size_normalize(raw_data, size):
    
    img_width, img_height, img_channel = size
    
    labeled_img_list = []
    mask_list = []
    
    for patient_i in range(len(raw_data)):

        img_i = raw_data[patient_i]['video'] # Shape: [IMG_WIDTH1, IMG_HEIGHT1, Frame_Num1]
        mask_i = raw_data[patient_i]['label']
        label_index = is_labeled(mask_i)
        labeled_img_i = img_i[:,:,label_index]
        labeled_mask_i = mask_i[:,:,label_index]
        
        for frame_j in range(labeled_mask_i.shape[2]):
            img_j = labeled_img_i[:,:,frame_j] # [IMG_WIDTH2, IMG_HEIGHT2, Frame_Num2]
            mask_j = labeled_mask_i[:,:,frame_j]
            mask_j_digi = np.zeros(mask_j.shape)
            mask_j_digi[mask_j] = 1
            if(img_j.shape[0]!= IMG_WIDTH or img_j.shape[1]!= IMG_HEIGHT):
                img_j = cv2.resize(img_j,(img_width, img_height))
                mask_j_digi = cv2.resize(mask_j_digi,(img_width, img_height))
            labeled_img_list.append(img_j)
            mask_list.append(mask_j_digi)
            
    return np.array(labeled_img_list).reshape(-1,img_width, img_height, img_channel),np.array(mask_list).reshape(-1,img_width, img_height, 1)

#选出所有的图片并resize到相应尺寸
#output: f_num*w*h*1
def image_size_normalize_no_label(raw_data, size):
    img_width, img_height, img_channel = size
    
    labeled_img_list = []
    mask_list = []
    
    for patient_i in range(len(raw_data)):

        img_i = raw_data[patient_i]['video'] # Shape: [IMG_WIDTH1, IMG_HEIGHT1, Frame_Num1]
        mask_i = raw_data[patient_i]['label']
        # label_index = is_labeled(mask_i)
        # labeled_img_i = img_i[:,:,label_index]
        # labeled_mask_i = mask_i[:,:,label_index]
        
        for frame_j in range(mask_i.shape[2]):
            img_j = img_i[:,:,frame_j] # [IMG_WIDTH2, IMG_HEIGHT2, Frame_Num2]
            mask_j = mask_i[:,:,frame_j]
            mask_j_digi = np.zeros(mask_j.shape)
            mask_j_digi[mask_j] = 1
            if(img_j.shape[0]!= IMG_WIDTH or img_j.shape[1]!= IMG_HEIGHT):
                img_j = cv2.resize(img_j,(img_width, img_height))
                mask_j_digi = cv2.resize(mask_j_digi,(img_width, img_height))
            labeled_img_list.append(img_j)
            mask_list.append(mask_j_digi)
            
    return np.array(labeled_img_list).reshape(-1,img_width, img_height, img_channel),np.array(mask_list).reshape(-1,img_width, img_height, 1)

#选出所有的图片并resize到相应尺寸
#input: f_num*w*h*1
def is_labeled_order2(mask_list):
    
    label_index = []
    for i in range((mask_list).shape[0]):
        if(True in mask_list[i,:,:,0]):
            label_index.append(i)
            continue
    return np.array(label_index)

In [172]:
# 从训练集中选出所有label过的图像并resize到指定大小
img_labeled, mask = image_size_normalize(train_data, (IMG_WIDTH,IMG_HEIGHT,IMG_CHANNELS))  # OutPut Shape: [Labeled_Frame_Num, IMG_WIDTH2, IMG_HEIGHT2, IMG_CHANNEL]

In [173]:
(train_data[0]['box'][train_data[0]['box'] == True]).shape

(1092,)

# 3 Visualization

In [174]:
import os

from PIL import Image

def video_make(name, image_list, mask_list,  img_size =(112, 112), only_MV = False):

    fps = 24 #视频每秒24帧
    size = img_size #需要转为视频的图片的尺寸

    #视频保存在当前目录下

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')    
    video = cv2.VideoWriter(name+".avi",fourcc, fps,size, False)
    #draw stuff that goes on every frame here
    for frame_i in range(image_list.shape[0]):
        img = (image_list[frame_i,:,:,0])
        mask = mask_list[frame_i,:,:,0]
        img[mask>0.5] = 255
        if(only_MV):
            img[mask<0.5] = 0
        img_mat = cv2.Mat(img)
        video.write(img)
    video.release()

def image_process(src,threshold, DF_kernel = np.ones((4, 4), dtype=np.uint8)):
    img_dst = np.zeros(src.shape,dtype=np.uint8)
    cv2.bilateralFilter(src,dst = img_dst,d=5,sigmaColor = 20, sigmaSpace = 2)
    img_threshold_idx = img_dst< threshold
    img_dst[img_threshold_idx] = 0
    # img_dst[~img_threshold_idx] = 100
    img_dilation = cv2.dilate(img_dst, DF_kernel+1, 1)
    img_erode = cv2.erode(img_dilation, DF_kernel, iterations=1)
    

    return img_erode

def video_make_filtering(name, image_list, mask_list,  img_size =(112, 112), threshold = 0):

    fps = 24 #视频每秒24帧
    size = img_size #需要转为视频的图片的尺寸

    #视频保存在当前目录下

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')    
    video = cv2.VideoWriter(name+".avi",fourcc, fps,size, False)
    #draw stuff that goes on every frame here
    for frame_i in range(image_list.shape[0]):
        img = (image_list[frame_i,:,:,0])
        img_processed = image_process(img,20)
        # img_processed = img
        mask = mask_list[frame_i,:,:,0]
        img_processed[mask>0.5] = 255
        video.write(img_processed)
    video.release()




In [175]:
output_train_data = train_data[50:52]
video_list,mask_list = image_size_normalize_no_label(output_train_data, (256,256,1))
# video_make("expert_show", video_list, mask_list,  img_size =(256, 256),only_MV=False)
video_make_filtering("expert_show", video_list, mask_list,  img_size =(256, 256), threshold = 10)


In [176]:
is_labeled_order2(mask_list)

array([  0,   6,  31, 107, 121, 159])

In [188]:
def LK_OpticalFlow(img1,img2,mask1, flate_size = 2):
    
    img_cur = img1
    img_next = img2
    mask_cur = mask1
    flow = cv2.calcOpticalFlowFarneback(img_cur,img2,img_next, pyr_scale = 0.5, levels= 3, winsize = 8, iterations =5, poly_n = 5, poly_sigma = 1.2, flags = cv2.OPTFLOW_FARNEBACK_GAUSSIAN)

    mask_next = np.zeros(mask_cur.shape)
    for y_i in range(mask_cur.shape[0]):
        for x_i in range(mask_cur.shape[1]):
            if(mask_cur[y_i,x_i]>0.5):
                new_y,new_x = int(y_i+flow[y_i,x_i,1]),int(x_i+flow[y_i,x_i,0])
                mask_next[new_y,new_x] = 1
    mask_next_DF = mask_next
    mask_next_DF = cv2.dilate(mask_next, np.ones((flate_size, flate_size), dtype=np.uint8), 1)
    mask_next_DF = cv2.erode(mask_next_DF, np.ones((flate_size, flate_size), dtype=np.uint8), 1)
    # print(img_next[mask_next_DF>0.5].shape)
    return mask_next_DF

def tracking_result_evaluation(img,mask):
    MV_points = img[mask>0.5]
    MV_points = MV_points.reshape(-1)
    return np.std(MV_points)

In [184]:
img_cur = video_list[0,:,:,0]
img_next = video_list[1,:,:,0]
mask_cur = mask_list[0,:,:,0]
mask_next = LK_OpticalFlow(img_cur,img_next,mask_cur, flate_size = 2)

(523,)


In [185]:
img_next_output = img_next
img_next_output[mask_next>0.5] = 255
cv2.imwrite('./img/img_next_tracked3.png',img_next_output)

True

In [146]:

mask_next = np.zeros(mask_cur.shape)
for y_i in range(mask_cur.shape[0]):
    for x_i in range(mask_cur.shape[1]):
        if(mask_cur[y_i,x_i]>0.5):
            new_y,new_x = int(y_i+flow[y_i,x_i,1]),int(x_i+flow[y_i,x_i,0])
            mask_next[new_y,new_x] = 1

In [192]:
for index in is_labeled_order2(mask_list):
    img_i = video_list[index,:,:,0]
    mask_i = mask_list[index,:,:,0]
    print('labeld frame: ', tracking_result_evaluation(img_i,mask_i))
    img_i_next = video_list[index+1,:,:,0]
    mask_next = LK_OpticalFlow(img_cur,img_next,mask_cur, flate_size = 2)
    print('tracked frame: ', tracking_result_evaluation(img_i_next,mask_next))

    img_i_labeld_output = img_i
    img_i_labeld_output[mask_i>0.5] = 255
    cv2.imwrite('./img/',index,'_cur.png',img_i_labeld_output)

    img_next_output[mask_next>0.5] = 255
    cv2.imwrite('./img/'+(index+1)+'_cur.png',img_i_next)
    

48.99447870186591
(508,)
65.68514508681983
42.04625266884282
(508,)
77.87362251897088
57.170052029978294
(508,)
32.875596642642236
47.14826560061548
(508,)
89.55753649807865
48.47703581447694
(508,)
82.55198556764483
50.3177457597662
(508,)
60.362790830386835


In [None]:
for index in is_labeled_order2(mask_list):
    img_i = video_list[index,:,:,0]
    mask_i = mask_list[index,:,:,0]
    img_i_next = mask_list[index,:,:,0]
    mask_next = LK_OpticalFlow(img_cur,img_next,mask_cur, flate_size = 2)
    print(tracking_result_evaluation(img_i,mask_i))

## 4 Conclusion（things to be done for improvement）  
4.1 如上可见，随着epoch增加, accuracy波动很大，网络不是很work。 不清楚是数据量不够，网络复杂度不够， 还是iter比较小（纯ml小白）  
    如果是数据量不够， 或许需要用tracking的办法增加数据量
4.2 需要做cross-validation的预测版本
4.3 由于数据集中， 属于MV的像素点很少，和类别不平衡问题很像， 最好能增大 MV区域误识别的错误权重
4.4 预测的可视化实现