# 3-2 Object Detection

## DataSet

### 1. [ADE20K Dataset](http://sceneparsing.csail.mit.edu/)
- 실내/실외 랜드마크(사물) 이미지

<img src='https://scontent-gmp1-1.xx.fbcdn.net/v/t31.0-8/19477579_338837089885626_5759019649209493861_o.jpg?_nc_cat=107&ccb=2&_nc_sid=8bfeb9&_nc_ohc=TUcVBgjq15cAX8fd-DD&_nc_ht=scontent-gmp1-1.xx&oh=30d7400648ee385253cfc906e0194db3&oe=60303FD1'>

### 2. [MS-COCO Dataset](https://cocodataset.org/#home)
- 일상 생활

<img src='https://cocodataset.org/images/coco-examples.jpg' width=100%>  

### 3. [PASCAL VOC(Visual Object Classes) Dataset](http://host.robots.ox.ac.uk/pascal/VOC/)
- 객체 인식을 위한 표준화된 이미지 데이터셋

### 4. [Cityspaces Dataset](https://www.cityscapes-dataset.com/dataset-overview/)
- 도시거리 장면 분석을 위한 데이터셋

<img src='https://www.cityscapes-dataset.com/wordpress/wp-content/uploads/2015/07/muenster00.png'>

### 5. [MHP(Multi Human Parsing) Dataset](https://lv-mhp.github.io/)
- 사람들

<img src='https://lv-mhp.github.io/static/images/2.png'>

## Object Detect

In [None]:
!pip3 install --upgrade mxnet > /dev/null
!pip3 install --upgrade gluoncv > /dev/null

In [None]:
!pip3 install --upgrade youtube-dl > /dev/null

In [None]:
def get_youtube_video(youtube_url):
  import youtube_dl

  ydl_opts = {  
    # 'format': 'mp4' 
    'format': 'best[height<=480][ext=mp4]' 
    } 
  with youtube_dl.YoutubeDL(ydl_opts) as ydl: 
      info_dict = ydl.extract_info(youtube_url, download=True)    
      filename = ydl.prepare_filename(info_dict)
  return filename

In [None]:
son = get_youtube_video('https://www.youtube.com/watch?v=C-CefuZ6h1k&feature=emb_err_woyt')
print(son)

In [None]:
def read_video(filename, output_path, start_ms = 0, end_ms = None, step=1):
  from tqdm.notebook import tqdm
  import os 
  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()
  # video_frames = [] 

  if not os.path.exists(output_path):
        os.makedirs(output_path)

  cap = cv2.VideoCapture(filename)

  cap.set(cv2.CAP_PROP_POS_MSEC, start_ms);

  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = cap.get(cv2.CAP_PROP_FPS)
  total_ms = frame_count * fps * 1000
  print('Frame width:', width)
  print('Frame height:', height)
  print('Frame count:', frame_count)
  print('FPS:', fps)
  print('total sec:', int(total_ms/1000))

  capture_count = 0
  
  if end_ms != None:
    time_gap_sec = (end_ms - start_ms)/1000
    pbar_count = round(time_gap_sec*fps)
  else:
    pbar_count = round(frame_count)

  with tqdm(total=pbar_count) as pbar:
    while cap.isOpened():
        ret, img = cap.read()
        if not ret:
            cap.release()
            break 
        frame_no = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        frame_msec = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # print('{}. msec:{}, endmsec:{}'.format(frame_no, frame_msec, end_ms)) 
        if end_ms != None and frame_msec >= end_ms:
            break

        # print('{}. msec:{}'.format(frame_no, frame_msec)) 

        # video_frames.append(img)
        cv2.imwrite(os.path.join(output_path, '%05d.jpg'%(frame_no)), img)
        capture_count += 1
        pbar.update(step)
        if step > 1:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no + (step-1));
  
  print('capture_count:', capture_count)

  return width, height, fps, frame_count

In [None]:
# !rm -rf europe_street_img

In [None]:
step=1

In [None]:
width, height, fps, frame_count = read_video(son, './son_img')

### folder 압축

In [None]:
from zipfile import ZipFile
import os
from os.path import basename

In [None]:
# create a ZipFile object
with ZipFile('son.zip', 'w') as zipObj:
   # Iterate over all the files in directory
   for folderName, subfolders, filenames in os.walk('./son_img'):
       for filename in filenames:
           #create complete filepath of file in directory
           filePath = os.path.join(folderName, filename)
           # Add file to zip
           zipObj.write(filePath, basename(filePath))

### 영상 분석
- 반복영상을 구간별로 분리

In [None]:
def read_video_by_frame(filename, output_path, start_frame = 1, end_frame = None, step=1):
  from tqdm.notebook import tqdm
  import os 
  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()
  # video_frames = [] 

  if not os.path.exists(output_path):
        os.makedirs(output_path)

  cap = cv2.VideoCapture(filename)
 
  cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame);

  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = cap.get(cv2.CAP_PROP_FPS)
  print('Frame width:', width)
  print('Frame height:', height)
  print('Frame count:', frame_count)
  print('FPS:', fps)

  capture_count = 0

  if end_frame != None:
    pbar_count = end_frame - start_frame
  else:
    pbar_count = frame_count
  
  with tqdm(total=pbar_count) as pbar:
    while cap.isOpened():
        ret, img = cap.read()
        if not ret:
            cap.release()
            break 
        frame_no = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        frame_msec = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # print('{}. msec:{}, end_frame:{}'.format(frame_no, frame_msec, end_frame)) 
        if end_frame != None and frame_no > end_frame:
            break

        # print('{}. msec:{}'.format(frame_no, frame_msec)) 

        # video_frames.append(img)
        cv2.imwrite(os.path.join(output_path, '%04d.jpg'%(frame_no)), img)
        capture_count += 1
        pbar.update(step)
        if step > 1:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no + (step-1));
  
  print('capture_count:', capture_count)

  return width, height, fps, frame_count

In [None]:
width, height, fps, frame_count = read_video_by_frame(son, './son_img_1', 1, 505, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_2', 506, 942, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_3', 943, 1380, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_4', 1381, 1814, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_5', 1815, 2272, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_6', 2273, 2701, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_7', 2702, 3202, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_8', 3203, 3640, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_9', 3641, 4004, 1)
width, height, fps, frame_count = read_video_by_frame(son, './son_img_10', 4005, 5009, 1)

### video 저장하기

In [None]:
output_path = './son_split'
import os 
if not os.path.exists(output_path):
        os.makedirs(output_path)

In [None]:
from gluoncv.utils.filesystem import try_import_cv2
cv2 = try_import_cv2()
fourcc = cv2.VideoWriter_fourcc(*'FMP4')

In [None]:
width=1280
height=720
fps=25.0
step=1

In [None]:
input_path='son_img_1'
file_list = os.listdir(input_path)
file_list.sort()

In [None]:
from tqdm.notebook import tqdm 

In [None]:
out = cv2.VideoWriter('{}/son_1.mp4'.format(output_path), fourcc, fps/step, (width, height))
for filename in file_list:
# for filename in tqdm(file_list):
  im_file = '{}/{}'.format(input_path,filename)
  print(im_file)
  img = cv2.imread(im_file)
  out.write(img) 
out.release()

In [None]:
def make_video_file(input_path, output_path, output_filename, width, height, fps, step=1): 
  import os   
  from gluoncv.utils.filesystem import try_import_cv2
  from tqdm.notebook import tqdm 
  cv2 = try_import_cv2()

  #  output path check
  if not os.path.exists(output_path):
          os.makedirs(output_path) 

  # image list
  file_list = os.listdir(input_path)
  file_list.sort()

  # output video codec
  fourcc = cv2.VideoWriter_fourcc(*'FMP4')

  out = cv2.VideoWriter('{}/{}'.format(output_path, output_filename), fourcc, fps/step, (width, height))
  for filename in tqdm(file_list):
  # for filename in tqdm(file_list, desc='{}'.format(output_filename)):
    im_file = '{}/{}'.format(input_path,filename)
    img = cv2.imread(im_file)
    out.write(img) 
  out.release()

In [None]:
make_video_file('son_img_2', 'son_split', 'son_2.mp4', 1280, 720, 25.0)

In [None]:
make_video_file('son_img_3', 'son_split', 'son_3.mp4', 1280, 720, 25.0)
make_video_file('son_img_4', 'son_split', 'son_4.mp4', 1280, 720, 25.0)
make_video_file('son_img_5', 'son_split', 'son_5.mp4', 1280, 720, 25.0)
make_video_file('son_img_6', 'son_split', 'son_6.mp4', 1280, 720, 25.0)
make_video_file('son_img_7', 'son_split', 'son_7.mp4', 1280, 720, 25.0)
make_video_file('son_img_8', 'son_split', 'son_8.mp4', 1280, 720, 25.0)
make_video_file('son_img_9', 'son_split', 'son_9.mp4', 1280, 720, 25.0)
make_video_file('son_img_10', 'son_split', 'son_10.mp4', 1280, 720, 25.0)

In [None]:
import time
from gluoncv import model_zoo, data, utils

In [None]:
  # model download from model_zoo
  net = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)

In [None]:
def display_detection(net, im_fname): 
  from gluoncv import data 
  x, img = data.transforms.presets.yolo.load_test(im_fname)
  # print('Shape of pre-processed image:', x.shape)

  # inference
  class_IDs, scores, bounding_boxes = net(x)

  # display
  img = utils.viz.cv_plot_bbox(img, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes)
 
  # cv BGR -> RGB
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  return img

In [None]:
!rm -rf son_detect

In [None]:
output_path = './son_detect'
import os 
if not os.path.exists(output_path):
        os.makedirs(output_path)

In [None]:
from gluoncv.utils.filesystem import try_import_cv2
cv2 = try_import_cv2()
fourcc = cv2.VideoWriter_fourcc(*'FMP4')

In [None]:
file_list = os.listdir('son_img_7')
file_list.sort()

In [None]:
width=1280
height=720
fps=25.0
step=1

In [None]:
from tqdm.notebook import tqdm 

In [None]:
out = cv2.VideoWriter('{}/son_7.mp4'.format(output_path), fourcc, fps/step, (width, height))
# for filename in file_list:
for filename in tqdm(file_list):
  filename = '{}/{}'.format('son_img_7',filename)
  # print(filename)
  img = display_detection(net, filename)
  frame = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_AREA)
  out.write(frame) 
out.release()

### object detect function

In [None]:
def get_detect_video(detector,image_path, output_path, output_filename, width, height, fps, step=1): 
  from tqdm.notebook import tqdm
  import os 
  from gluoncv.utils.filesystem import try_import_cv2
  cv2 = try_import_cv2()

  if not os.path.exists(output_path):
    os.makedirs(output_path) 

  file_list = os.listdir(image_path)
  file_list.sort() 
  
  fourcc = cv2.VideoWriter_fourcc(*'FMP4')

  out = cv2.VideoWriter('{}/{}'.format(output_path, output_filename), fourcc, fps/step, (width, height))
 
  index = 0 # image id(for skip)
  for filename in tqdm(file_list):
    # step 값으로 이미지 skip
    if (index % step) != 0: 
      index += 1
      continue
    try:
      im_file = '{}/{}'.format(image_path,filename)
      img = display_detection(detector, im_file) 
      frame = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_AREA)
      out.write(frame) 
    except Exception as ex:
      pass 
    index += 1 # image id(for skip)
  out.release()

In [None]:
get_detect_video(net,'son_img_7', 'son_detect', 'son_7_skip5.mp4', 1280, 720, 25.0, step=5)

## [Scene Detection](https://pyscenedetect.readthedocs.io/projects/Manual/en/latest/index.html)

In [None]:
!pip3 install scenedetect

In [None]:
goldeneye = get_youtube_video('https://www.youtube.com/watch?v=OMgIPnCnlbQ')
print(goldeneye)

In [None]:
# Standard PySceneDetect imports:
from scenedetect import VideoManager
from scenedetect import SceneManager

# For content-aware scene detection:
from scenedetect.detectors import ContentDetector

In [None]:
def find_scenes(video_path, threshold=30.0):
  # Create our video & scene managers, then add the detector.
  video_manager = VideoManager([video_path])
  scene_manager = SceneManager()
  scene_manager.add_detector(ContentDetector(threshold=threshold))

  # Improve processing speed by downscaling before processing.
  video_manager.set_downscale_factor()

  # Start the video manager and perform the scene detection.
  video_manager.start()
  scene_manager.detect_scenes(frame_source=video_manager)

  # Each returned scene is a tuple of the (start, end) timecode.
  return scene_manager.get_scene_list()

In [None]:
scene_list = find_scenes(goldeneye)

In [None]:
# Frame width: 640
# Frame height: 272
# Frame count: 1980
# FPS: 23.976023976023978

In [None]:
# !rm -rf goldeneye_img

In [None]:
width, height, fps, frame_count = read_video(goldeneye, './goldeneye_img')

In [None]:
# !rm -rf goldeneye_scene

In [None]:
output_path = './goldeneye_scene'
import os 
if not os.path.exists(output_path):
        os.makedirs(output_path)

In [None]:
from gluoncv.utils.filesystem import try_import_cv2
cv2 = try_import_cv2()
fourcc = cv2.VideoWriter_fourcc(*'FMP4')

In [None]:
def read_image(image_path, frame_no):
  im_file = os.path.join(image_path, '%05d.jpg'%(frame_no+1))
  img = cv2.imread(im_file)
  return img

In [None]:
for i, scene in  enumerate(scene_list ):  
  output_filename =  output_path + '/%05d.mp4'%(scene[0].get_frames())
  print(output_filename)
  
  out = cv2.VideoWriter(output_filename, fourcc, fps, (width, height))
  for frame_idx in range( scene[0].get_frames(),  scene[1].get_frames()): 
    out.write(read_image('./goldeneye_img', frame_idx)) 
  out.release()

In [None]:
def compress_folder(input_path):
  from zipfile import ZipFile
  import os
  from os.path import basename
  from tqdm.notebook import tqdm

  # create a ZipFile object
  with ZipFile('{}.zip'.format(input_path), 'w') as zipObj:
    # Iterate over all the files in directory
    for folderName, subfolders, filenames in os.walk(input_path):
        for filename in tqdm(filenames):
            #create complete filepath of file in directory
            filePath = os.path.join(folderName, filename)
            # Add file to zip
            zipObj.write(filePath, basename(filePath))

In [None]:
compress_folder('goldeneye_scene')

# 실습

## 1. 유럽거리 영상을 다운받아
- 5초~ 15초 구간 step1 으로 이미지 생성

```
def read_video(filename, output_path, start_ms = 0, end_ms = None, step=1):
-->
width, height, fps, frame_count = read_video(europe_street, 'europe_img_1', 5000, 15000, 1) 
```
- 30초 ~ 50초 구간 step4 로 이미지 생성

```
width, height, fps, frame_count = read_video(europe_street, 'europe_img_2', 30000, 50000, 4) 
```

## 2. 생성된 이미지를 Object Detect 를 수행하여 video 파일을 생성한다.
- europe_img_1

```
def get_detect_video(detector,image_path, output_path, output_filename, width, height, fps, step=1): 
--> 
from gluoncv import model_zoo, data, utils
# model download from model_zoo
yolo = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)

get_detect_video(yolo, 'europe_img_1', 'europe_detect', 'europe_1.mp4', 1280, 720, 1)
```
- europe_img_2
  - 주의 : 이미지 생성 시 step4를 적용하였으므로 생성시 step은 1로 설정한다.
  
```
get_detect_video(yolo, 'europe_img_2', 'europe_detect', 'europe_2.mp4', 1280, 720, 1)
```

In [None]:
europe_street = get_youtube_video('https://www.youtube.com/watch?v=AMu7QjE4qg0')
print(europe_street)

### 실행결과

In [None]:
width, height, fps, frame_count = read_video(europe_street, 'europe_img_1', 5000, 15000, 1) 

In [None]:
from gluoncv import model_zoo, data, utils
# model download from model_zoo
yolo = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)
 
get_detect_video(yolo, 'europe_img_1', 'europe_detect', 'europe_1.mp4', 1280, 720, 1)

In [None]:
width, height, fps, frame_count = read_video(europe_street, 'europe_img_2', 30000, 50000, 4) 

In [None]:
get_detect_video(yolo, 'europe_img_2', 'europe_detect', 'europe_2.mp4', 1280, 720, 1)