In [1]:
# show images inline
%matplotlib inline

# automatically reload modules when they have changed
%load_ext autoreload
%autoreload 2

import sys
import os
import shutil
from PIL import Image, ImageFont, ImageDraw
import cv2

# 添加路径，以能正常导入mbsh、trainer
sys.path.insert(0, r'..\Lib\trainer')

from mbsh.core.yolo import YOLO
import numpy as np
import pandas as pd
import glob

2022-09-13 15:56:57,797 - INFO - __init__.py - init_logger - 58 - ini  logger file D:\projects\Vision\Lib\logs\kernel-facda0e1-259d-4e9b-ba9c-ce3f660ce996.log


use temp dir:d:\znyx\temp


2022-09-13 15:57:14,373 - DEBUG - tpu_cluster_resolver.py - <module> - 32 - Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
2022-09-13 15:57:17,775 - DEBUG - __init__.py - <module> - 47 - Creating converter from 7 to 5
2022-09-13 15:57:17,777 - DEBUG - __init__.py - <module> - 47 - Creating converter from 5 to 7
2022-09-13 15:57:17,777 - DEBUG - __init__.py - <module> - 47 - Creating converter from 7 to 5
2022-09-13 15:57:17,778 - DEBUG - __init__.py - <module> - 47 - Creating converter from 5 to 7
2022-09-13 15:57:29,517 - INFO - utils.py - _init_num_threads - 147 - Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-09-13 15:57:29,518 - INFO - utils.py - _init_num_threads - 159 - NumExpr defaulting to 8 threads.


In [2]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
  tf.config.experimental.set_memory_growth(device, True)

### 公共函数

In [3]:
delta = 4

# 统计一个视频模型标记息肉开始结束帧
def stat_ct_file(file_path):
    ct, start, end = 0, 0, 0
    results = []
    with open(file_path, 'r') as f:
        for line in f.readlines():
            arr = line.strip().split(',')
            if arr[1] == '1':
                if start == 0:
                    start = int(arr[0][:-4])
            else:
                if start != 0:
                    end = int(arr[0][:-4])
                    if end - start >= delta:
                        results.append([start, end])
                    start = 0
                    
        return results
    
def stat_ct(data):
    ct, start, end = 0, 0, 0
    results = []
    for arr in data:
        if arr[1] == 1:
            if start == 0:
                start = int(arr[0][:-4])
        else:
            if start != 0:
                end = int(arr[0][:-4])
                if end - start >= delta:
                    results.append([start, end])
                start = 0
                    
    return results


# 合并一个视频模型标记息肉开始结束帧
def merge_ct(arr):
    if len(arr) == 0:
        return []
    
    results = [arr[0]]
    for idx in range(1, len(arr)):
        if arr[idx][0] - results[-1][1]  < delta:
            results[-1][1] = arr[idx][1]
        else:
            results.append(arr[idx])
            
    return results

# 将目录的图片名称按照名称数字从小到大排序
def get_dir_sorted_imgs(dirs):
    names = []
    if isinstance(dirs, str):
        if os.path.exists(dirs):
            names = [int(filename[:-4]) for filename in os.listdir(dirs) if filename.endswith('.jpg')]
    elif isinstance(dirs, list):
        for dir in dirs:
            if os.path.exists(dir):
                names.extend([int(filename[:-4]) for filename in os.listdir(dir) if filename.endswith('.jpg')])
    names = sorted(names)
    img_names = ['%s.jpg' % name for name in names]
    return img_names


### 息肉检测模型

In [4]:
model_path = r'D:\projects\Vision\polyp\Models\trained_weights_final.h5'
anchors_path = r'D:\projects\Vision\polyp\Models\yolo_anchors_0311.txt'
classes_path = r'D:\projects\Vision\polyp\Models\polyp_classes.txt'
iou = 0.1

model_image_size = (352, 352)
gpu_num = 1 

In [5]:
# 模型检测标记息肉
def detect_polyp_by_model(input_path, target_path, score):
    """
    input_path：输入视频图片目录
    target_path：输出标记图片目录
    score：模型参数
    """
    if not os.path.exists(target_path):
        os.mkdir(target_path)

    # 初始化模型
    yolo = YOLO(model_path=model_path, anchors_path=anchors_path, classes_path=classes_path,
               score=score, iou=iou, model_image_size=model_image_size, gpu_num=gpu_num)

    # 模型预测
    results = []
    img_names = get_dir_sorted_imgs(input_path)
    for filename in img_names:
        img_path = os.path.join(input_path, filename)
        pred_results, image, c = yolo.predict_file(img_path, target_path, draw_rect=True, cut=False, 
                                                         one_box=True, timing=False, expand=1)
        if len(pred_results) == 1:
            #results.append('%s,%s,%s,%s,%s,%s,%s\n' % (filename, 1, pred_results[0][4], pred_results[0][0], pred_results[0][1],pred_results[0][2], pred_results[0][3]))
            results.append((filename, 1))
        else:
            results.append((filename, 0))

    return results

# 息肉统计结果写入文件
def write_stat_result(output_file, video_name, score, poly_results, video_len, fps):
    # 息肉个数
    r = stat_ct(poly_results)
    r = merge_ct(r)
    poly_num = len(r)
    
    with open(output_file, 'a+') as f:
    # 写息肉出现时间点
        for rr in r:
            f.write('%.3f\n' % (rr[0] / fps))
    
    # 写息肉个数和视频长度比
        f.write('%s	%.3f	%.3f\n' % (video_name, score, poly_num/video_len))
        
    return poly_num/video_len
        
# 根据视频图片数量统计视频长度
def get_video_len(video_imgs_dir, fps):
    imgs = list(os.listdir(video_imgs_dir))
    return len(imgs) / (fps * 60)
    

### 挑选真息肉

In [8]:
import shutil
base_dir = r'D:\项目资料\息肉假阳性\202208\有息肉肠镜视频'
from_path = os.path.join(base_dir, 'images_crop')
to_path = os.path.join(base_dir, 'images_truepoly_crop')

folder_imgs = [(3457, [(3731, 4667)]), (3471, [(3741, 4479)]), (3477, [(13, 2173), (4334, 5044)]), (3532, [(827, 1606), (3864, 5873), (7284, 8196)]), 
                (3400, [(3510, 3799)]), (3531, [(1, 2613)]), (3433, [(4971, 5742)]), (3496, [(2584, 3901), (4943, 5132)]), 
                (3460, [(2225, 2949)]), (3479, [(7194, 8313)]), (3515, [(1439, 2177), (2206, 3737), (7273, 7654)]), 
                (3538, [(5624, 5906)]), (3474, [(6551, 6684)]), (3481, [(7002, 9126)]), 
                (3497, [(1612, 2914)]), (3485, [(5334, 5481)]), (3424, [(8168, 8296)]), (3504, [(4016, 4763)]), (3459, [(3092, 4033)]), \
              (3520, [(1016, 1757), (3210, 3325), (3536, 4105)]), (3513, [(2080, 3350), (4065, 4253)]),
               (3476, [(368, 1706), (3247, 4225)]), (3488, [(5933, 6044)]), (3428, [(2886, 4608)]), (3410, [(4657, 5239)]), \
               (3526, [(700, 1893), (2696, 2857)]), (3478, [(2176, 3921), (4923, 5152)]), (3444, [(4915, 5769)])]
for folder, start_ends in folder_imgs:
    from_folder = os.path.join(from_path, str(folder))
    to_folder = os.path.join(to_path, str(folder))
    print('processing: %s' % folder)
    if not os.path.exists(to_folder):
        os.mkdir(to_folder)
        
    for start, end in start_ends:
        for i in range(start, end+1):
            img_name = '%s.jpg' % i
            from_img = os.path.join(from_folder, img_name)
            shutil.copy(from_img, os.path.join(to_folder, img_name))
            if os.path.exists(from_img):
                os.remove(from_img)
              


In [22]:
base_dir = r'D:\项目资料\息肉假阳性\202208\有息肉肠镜视频'
input_base_dir = os.path.join(base_dir, 'images_crop')
output_base_dir = os.path.join(base_dir, 'images_detect')
fps = 18

# 找到文件夹对应的score后缀
folder_score = {}
for folder in os.listdir(output_base_dir):
    arr = folder.split('_')
    folder_score[arr[0]] = arr[1]
    
for folder, score in folder_score.items():
    if folder != '3531':
        continue
    print(folder, score)
    results = []
    img_names = get_dir_sorted_imgs(os.path.join(input_base_dir, folder))
    det_img_names = set(get_dir_sorted_imgs(os.path.join(output_base_dir, '%s_%s' % (folder, score))))
    for img_name in img_names:
        if img_name not in det_img_names:
            results.append((img_name, 0))
        else:
            results.append((img_name, 1))
            
    val = write_stat_result(os.path.join(base_dir, 'stat.txt'), folder, float(score), results,
                      get_video_len(os.path.join(input_base_dir, folder), fps), fps)
    print(val)
    

## 假阳性设置score
# for video_name, score in [
#     ('3429', 0.008)
# ]:
## 真阳性检测
# for video_name in os.listdir(input_base_dir):
#     score = true_poly_score
    
#     print(video_name, score)
#     poly_results = detect_polyp_by_model(os.path.join(input_base_dir, video_name), 
#                                          os.path.join(output_base_dir, '%s_%s' % (video_name, score)), score)
#     val = write_stat_result(os.path.join(base_dir, 'stat.txt'), video_name, score, poly_results,
#                       get_video_len(os.path.join(input_base_dir, video_name), fps), fps)
#     print(val)
                  

3531 0.7
0.6115515288788221


### 运行息肉检测部分

In [7]:
fps = 18
# score = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
base_dir = r'D:\项目资料\息肉假阳性\202208\有息肉肠镜视频'
# base_dir = r'D:\项目资料\息肉假阳性\202208\无息肉肠镜视频'

## 假阳性设置score  
video_names_score = [('3428', 0.002), ('3497', 0.004)]
input_base_dir = os.path.join(base_dir, 'images_crop')
output_base_dir = os.path.join(base_dir, 'images_detect')
for video_name, score in video_names_score:

## 真阳性检测
# true_poly_score = 0.35
# input_base_dir = os.path.join(base_dir, 'images_truepoly_crop')
# output_base_dir = os.path.join(base_dir, 'images_truepoly_detect')
# for video_name in os.listdir(input_base_dir):
#     score = true_poly_score
    
    print(video_name, score)
    poly_results = detect_polyp_by_model(os.path.join(input_base_dir, video_name), 
                                         os.path.join(output_base_dir, '%s_%s' % (video_name, score)), score)
    val = write_stat_result(os.path.join(base_dir, 'stat.txt'), video_name, score, poly_results,
                      get_video_len(os.path.join(input_base_dir, video_name), fps), fps)
    print(val)
                  

2022-09-13 17:18:29,615 - INFO - yolo.py - generate - 132 - load yolo model D:\projects\Vision\polyp\Models\trained_weights_final.h5


3428 0.003
<class 'keras.engine.functional.Functional'> 111111111
D:\projects\Vision\polyp\Models\trained_weights_final.h5 model, anchors, and classes loaded.


2022-09-13 18:19:13,742 - INFO - yolo.py - generate - 132 - load yolo model D:\projects\Vision\polyp\Models\trained_weights_final.h5


13.823391263503993
3497 0.003
<class 'keras.engine.functional.Functional'> 111111111
D:\projects\Vision\polyp\Models\trained_weights_final.h5 model, anchors, and classes loaded.
15.09829619921363


In [None]:
### 运行记录
3400 0.01 12.839
3400 0.35 3.959

### 合成视频

In [None]:
# # 将生成的息肉图片和原始图片一起合成息肉标记视频
# def create_mp4(base_path, video_name, score):
#     input_img_path = os.path.join(base_path, r'input\imgs\%s_crop' % video_name)
#     input_img_path1 = os.path.join(base_path, r'input\imgs\%s_crop_select' % video_name)
#     input_img_path2 = os.path.join(base_path, r'input\imgs\%s_crop_select2' % video_name)
#     process_img_path = os.path.join(base_path, r'output\imgs\%s_%s' % (video_name, score))
    
#     image = cv2.imdecode(np.fromfile(os.path.join(input_img_path, r'1.jpg'), dtype=np.uint8), -1)
#     img_h, img_w, _ = image.shape

#     video_path = os.path.join(base_path, r'output\video\%s_%s.avi' % (video_name, score))
#     video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*"XVID"), fps, (img_w, img_h))
    
#     img_names = get_dir_sorted_imgs([input_img_path1, input_img_path2])
#     for f in img_names:
#         filepath1 = os.path.join(input_img_path1, f)
#         filepath2 = os.path.join(input_img_path2, f)
#         filepath3 = os.path.join(process_img_path, f)
#         if os.path.exists(filepath3):
#             filepath = filepath3
#         elif os.path.exists(filepath2):
#             filepath = filepath2
#         elif os.path.exists(filepath1):
#             filepath = filepath1
#         image = cv2.imdecode(np.fromfile(filepath, dtype=np.uint8),-1)
#         video.write(image)
#     video.release()

In [14]:
# 将生成的息肉图片和原始图片一起合成息肉标记视频
def create_mp4(raw_img_path, fake_polyp_path, true_polyp_path, output_video_path):
    image = cv2.imdecode(np.fromfile(os.path.join(raw_img_path, r'1.jpg'), dtype=np.uint8), -1)
    img_h, img_w, _ = image.shape

    video_path = output_video_path
    video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*"XVID"), fps, (img_w, img_h))
    
    img_names = get_dir_sorted_imgs([raw_img_path])
    for f in img_names:
        filepath1 = os.path.join(raw_img_path, f)
        filepath2 = os.path.join(fake_polyp_path, f)
        filepath3 = os.path.join(true_polyp_path, f)
        if os.path.exists(filepath3):
            filepath = filepath3
        elif os.path.exists(filepath2):
            filepath = filepath2
        elif os.path.exists(filepath1):
            filepath = filepath1
        image = cv2.imdecode(np.fromfile(filepath, dtype=np.uint8),-1)
        video.write(image)
    video.release()

In [16]:
for base_dir in [r'D:\项目资料\息肉假阳性\202208\有息肉肠镜视频', r'D:\项目资料\息肉假阳性\202208\无息肉肠镜视频']:
    raw_img_folder = os.path.join(base_dir, 'images_crop')
    fake_polyp_folder = os.path.join(base_dir, 'images_detect')
    true_polyp_folder = os.path.join(base_dir, 'images_truepoly_detect')
    output_video_folder = os.path.join(base_dir, 'videos_gen')
    
    # 文件夹名和分数
    video_names = os.listdir(raw_img_folder)
    folder_score = {video_name: '' for video_name in video_names}
    for folder in os.listdir(fake_polyp_folder):
        arr = folder.split('_')
        folder_score[arr[0]] = arr[1]
    
    # 循环处理
    for video_name, score in folder_score.items():
        print('processing: %s, %s' % (video_name, score))
        raw_img_path = os.path.join(raw_img_folder, video_name)
        fake_polyp_path = os.path.join(fake_polyp_folder, '%s_%s' % (video_name, score))
        true_polyp_path = os.path.join(true_polyp_folder, '%s_%s' % (video_name, 0.35))
        output_video_path = os.path.join(output_video_folder, r'%s_%s.avi' % (video_name, score))

        create_mp4(raw_img_path, fake_polyp_path, true_polyp_path, output_video_path)

processing: 3400, 0.35
processing: 3404, 
processing: 3406, 
processing: 3410, 0.003
processing: 3424, 0.01
processing: 3428, 0.001
processing: 3431, 
processing: 3433, 0.35
processing: 3436, 
processing: 3444, 0.01
processing: 3454, 
processing: 3455, 
processing: 3457, 0.35
processing: 3459, 0.03
processing: 3460, 0.05
processing: 3462, 
processing: 3469, 
processing: 3471, 0.45
processing: 3474, 0.05
processing: 3476, 0.001
processing: 3477, 0.9
processing: 3478, 0.01
processing: 3479, 0.15
processing: 3480, 
processing: 3481, 0.25
processing: 3485, 0.02
processing: 3488, 0.002
processing: 3496, 0.05
processing: 3497, 0.003
processing: 3499, 
processing: 3504, 0.003
processing: 3510, 
processing: 3513, 0.02
processing: 3515, 0.15
processing: 3520, 0.01
processing: 3526, 0.01
processing: 3531, 0.7
processing: 3532, 0.9
processing: 3533, 
processing: 3538, 0.02
processing: 3398, 
processing: 3399, 
processing: 3402, 0.004
processing: 3414, 0.004
processing: 3415, 0.002
processing: 341