# MissOh Datasets
### set paths

In [8]:
import sys
sys.path.append("../") # go to parent dir

In [9]:
import os
from torch.utils.data import Dataset, DataLoader
import cv2
import pickle
import numpy as np
import glob
from torchvision.transforms import Compose, Resize
from PIL import Image
import json
import argparse
import matplotlib.pyplot as plt

from Yolo_v2_pytorch.src.utils import *

import ipdb

In [10]:
MissOh_CLASSES = ['person']
print(MissOh_CLASSES[0])
global colors
colors = pickle.load(open("../Yolo_v2_pytorch/src/pallete", "rb"))
print(colors[0])

person
(39, 129, 113)


In [11]:
def get_args():
    parser = argparse.ArgumentParser(
        "You Only Look Once:Unified, Real-Time Object Detection")
    parser.add_argument("--image_size", type=int,
                        default=448,
                        help="The common width and height for all images")
    parser.add_argument("--batch_size", type=int, default=1,
                        help="The number of images per batch")
    # Training base Setting
    parser.add_argument("--momentum", type=float, default=0.9)
    parser.add_argument("--decay", type=float, default=0.0005)
    parser.add_argument("--dropout", type=float, default=0.5)
    parser.add_argument("--num_epoches", type=int, default=100)
    parser.add_argument("--test_interval", type=int, default=1,
                        help="Number of epoches between testing phases")
    parser.add_argument("--object_scale", type=float, default=1.0)
    parser.add_argument("--noobject_scale", type=float, default=0.5)
    parser.add_argument("--class_scale", type=float, default=1.0)
    parser.add_argument("--coord_scale", type=float, default=5.0)
    parser.add_argument("--reduction", type=int, default=32)
    parser.add_argument("--es_min_delta", type=float, default=0.0,
                        help="Early stopping's parameter:minimum change loss to qualify as an improvement")
    parser.add_argument("--es_patience", type=int, default=0,
                        help="Early stopping's parameter:number of epochs with no improvement after which training will be stopped. Set to 0 to disable this technique.")

    parser.add_argument("--pre_trained_model_type",
                        type=str, choices=["model", "params"],
                        default="model")
    parser.add_argument("--pre_trained_model_path", type=str,
                        default="Yolo_v2_pytorch/trained_models/only_params_trained_yolo_voc") # Pre-training path

    parser.add_argument("--saved_path", type=str,
                        default="./checkpoint") # saved training path
    parser.add_argument("--conf_threshold", type=float, default=0.35)
    parser.add_argument("--nms_threshold", type=float, default=0.5)
    args = parser.parse_args(args=[]) # for jupyter 
    return args

In [12]:
opt = get_args()
print(opt)

Namespace(batch_size=1, class_scale=1.0, conf_threshold=0.35, coord_scale=5.0, decay=0.0005, dropout=0.5, es_min_delta=0.0, es_patience=0, image_size=448, momentum=0.9, nms_threshold=0.5, noobject_scale=0.5, num_epoches=100, object_scale=1.0, pre_trained_model_path='Yolo_v2_pytorch/trained_models/only_params_trained_yolo_voc', pre_trained_model_type='model', reduction=32, saved_path='./checkpoint', test_interval=1)


In [13]:
class MissOhDataset(Dataset):
    def __init__(self, image_size=448):
        img_path = '../data/AnotherMissOh/AnotherMissOh_images/AnotherMissOh01/'
        json_dir = '../data/AnotherMissOh/AnotherMissOh_Visual/AnotherMissOh01_visual.json'

        with open(json_dir, encoding='utf-8') as json_file:
            json_data = json.load(json_file)

        self.img_list = []
        self.anno_list = []

        for i in range(len(json_data['visual_results'])):
            for j in range(len(json_data['visual_results'][i]['image_info'])):
                label = []
                for k in range(len(json_data['visual_results'][i]['image_info'][j]['persons'])):
                    try:
                        id_name = json_data['visual_results'][i]['image_info'][j]['persons'][k]['person_id']
                        full_bbox = json_data['visual_results'][i]['image_info'][j]['persons'][k]['person_info'][
                            'full_rect']
                        if full_bbox['min_y'] == "" or full_bbox['max_y'] == "" or full_bbox['min_x'] == "" or full_bbox['max_x'] == "":
                            continue
                        else:
                            temp_label = [full_bbox['min_x'], full_bbox['min_y'], full_bbox['max_x'], full_bbox['max_y'], 0]
                            label.append(temp_label)
                    except:
                        id_name = []

                self.img_list.append(img_path + json_data['visual_results'][i]['vid'][-9:].replace('_', '/') + '/' +
                                     json_data['visual_results'][i]['image_info'][j]['frame_id'][-16:] + '.jpg')
                self.anno_list.append(label)


        self.image_size = (image_size, image_size)

        self.num_images = len(self.img_list)
        import ipdb; ipdb.set_trace()


    def __len__(self):
        return self.num_images

    def __getitem__(self, item):
        image_path = os.path.join(self.img_list[item])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        transformations = Compose([Resize(self.image_size)])

        objects = self.anno_list[item]
        #
        # image, objects = transformations((image, objects))
        image = transformations(Image.fromarray(image))

        return np.transpose(np.array(image, dtype=np.float32), (2, 0, 1)), np.array(objects, dtype=np.float32)

### json_data structure
- json_data['file_name'] : 'AnotherMissOh01.mp4'
- json_data['visual_results']
- json_data['visual_results'][0].keys() : dict_keys(['start_time', 'end_time', 'vid', 'image_info'])
- {
'start_time': '00:02:51;16', 
'end_time': '00:02:54;15', 
'vid': 'AnotherMissOh01_001_0078', 
'image_info': ...}
- json_data['visual_results'][0]['image_info']
- [{'frame_id': 'AnotherMissOh01_001_0078_IMAGE_0000004295', 
'place': 'none', 
'persons': [
{'person_id': 'Haeyoung1', 
'person_info': {
'face_rect': {'min_x': 515, 'min_y': 0, 'max_x': 845, 'max_y': 443}, 
'full_rect': {'min_x': 278, 'min_y': 2, 'max_x': 1025, 'max_y': 769}, 
'behavior': 'stand up', 
'predicate': 'none', 
'emotion': 'Neutral', 
'face_rect_score': '0.5', 
'full_rect_score': '0.9'}, 
'related_objects': []}], 
'objects': []}, 
- {'frame_id': 'AnotherMissOh01_001_0078_IMAGE_0000004311', 
'place': '', 
'persons': [{
'person_id':'Haeyoung1',
'person_info': {
'face_rect': {'min_x': 515, 'min_y': 0, 'max_x': 831, 'max_y': 411}, 
'full_rect': {'min_x': 270, 'min_y': 0, 'max_x': 1025, 'max_y': 768}, 
'behavior': 'stand up', 
'predicate': 'none', 
'emotion': 'Neutral', 
'face_rect_score': '0.5', 
'full_rect_score': '0.9'}, 
'related_objects': []}],
'objects': []},]

In [14]:
training_set = MissOhDataset(opt.image_size)


training_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "drop_last": True}

training_generator = DataLoader(training_set, **training_params)

for iter, batch in enumerate(training_generator):
    image, label = batch
    
    #----object bboxes--------
    height, width = (768, 1024)
    width_ratio = 1.0
    height_ratio = 1.0
    
    image = image[0].cpu().numpy()
    image = np.transpose(np.array(image, dtype=np.uint8), (1, 2, 0))
    output_image = cv2.cvtColor(np.float32(image), cv2.COLOR_RGB2BGR)
    output_image = cv2.resize(output_image, dsize=(1024, 768), interpolation=cv2.INTER_CUBIC)
    print(image.shape)
    for pred in label.cpu().numpy():
        if len(pred) > 0:
            pred = pred[0]
            xmin = int(max(pred[0] / width_ratio, 0))
            ymin = int(max(pred[1] / height_ratio, 0))
            xmax = int(min((pred[0] + pred[2]) / width_ratio, width))
            ymax = int(min((pred[1] + pred[3]) / height_ratio, height))
            color = colors[int(pred[4])]

            cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 2)
            #text_size = cv2.getTextSize(pred[4] + ' : %.2f' % pred[4], cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            #cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
            #              color, -1)
            #cv2.putText(output_image, pred[4] + ' : %.2f' % pred[4],
            #    (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1,
            #    (255, 255, 255), 1)
            
    #----image and bboxes-----
    
    output_image = np.uint8(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
    plt.imshow(np.uint8(output_image))
    plt.show()

--Return--
None
> [0;32m<ipython-input-13-2470c9be158d>[0m(36)[0;36m__init__[0;34m()[0m
[0;32m     35 [0;31m        [0mself[0m[0;34m.[0m[0mnum_images[0m [0;34m=[0m [0mlen[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mimg_list[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 36 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     37 [0;31m[0;34m[0m[0m
[0m
ipdb> json_data.keys()
dict_keys(['file_name', 'visual_results'])
ipdb> json_data.items()


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



ipdb> json_data['file_name']
'AnotherMissOh01.mp4'
ipdb> json_data['visual_results']


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



ipdb> json_data['visual_results'][0]
{'start_time': '00:02:51;16', 'end_time': '00:02:54;15', 'vid': 'AnotherMissOh01_001_0078', 'image_info': [{'frame_id': 'AnotherMissOh01_001_0078_IMAGE_0000004295', 'place': 'none', 'persons': [{'person_id': 'Haeyoung1', 'person_info': {'face_rect': {'min_x': 515, 'min_y': 0, 'max_x': 845, 'max_y': 443}, 'full_rect': {'min_x': 278, 'min_y': 2, 'max_x': 1025, 'max_y': 769}, 'behavior': 'stand up', 'predicate': 'none', 'emotion': 'Neutral', 'face_rect_score': '0.5', 'full_rect_score': '0.9'}, 'related_objects': []}], 'objects': []}, {'frame_id': 'AnotherMissOh01_001_0078_IMAGE_0000004311', 'place': '', 'persons': [{'person_id': 'Haeyoung1', 'person_info': {'face_rect': {'min_x': 515, 'min_y': 0, 'max_x': 831, 'max_y': 411}, 'full_rect': {'min_x': 270, 'min_y': 0, 'max_x': 1025, 'max_y': 768}, 'behavior': 'stand up', 'predicate': 'none', 'emotion': 'Neutral', 'face_rect_score': '0.5', 'full_rect_score': '0.9'}, 'related_objects': []}], 'objects': []}, 

BdbQuit: 