In [1]:
import cv2
import copy
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models
from collections import OrderedDict

In [2]:
def remove_box_in_box(test_list, log_enable=False):
    final_list = []
    bypass_flag_1 = 0
    for i in range(len(test_list)):
        bypass_flag = 0
        if bypass_flag_1:
            bypass_flag_1 = 0
            continue
        if log_enable:
            print('i:',i, test_list[i])
            print('*'*35)
        for j in range(len(test_list)-i-1):
            if log_enable:
                print('[j+i+1]:',j+i+1, test_list[j+i+1])
            # check box in box condition in 2 cases
            if (test_list[i][0] < test_list[j+i+1][0] and 
                test_list[i][1] < test_list[j+i+1][1] and 
                test_list[i][0] + test_list[i][2] >= test_list[j+i+1][0] + test_list[j+i+1][2] and 
                test_list[i][1] + test_list[i][3] >= test_list[j+i+1][1] + test_list[j+i+1][3]):
                if log_enable:
                    print("Found this box {} in current box {}!!".format(test_list[j+i+1], test_list[i]))
                    print("Drop this box {}".format(test_list[j+i+1]))
                bypass_flag_1 = 1
            elif (test_list[i][0] > test_list[j+i+1][0] and 
                  test_list[i][1] > test_list[j+i+1][1] and 
                  test_list[i][0] + test_list[i][2] <= test_list[j+i+1][0] + test_list[j+i+1][2] and 
                  test_list[i][1] + test_list[i][3] <= test_list[j+i+1][1] + test_list[j+i+1][3]):
                if log_enable:
                    print("Found current box {} in this box {}!!".format(test_list[i], test_list[j+i+1]))
                    print("Drop current box {}".format(test_list[i]))
                bypass_flag = 1
                break
        if log_enable:
            print('*'*35)
        if bypass_flag:
            continue
        final_list.append(test_list[i])
    return final_list

In [3]:
def box_filter(boxs):
    boxs_f = []
    if len(boxs) > 0:
        for box in boxs:
            if box[2] <80 and box[3]<80: #filter small size(<80) boxs
                continue
            elif box[3] > 350 or box[2]*box[3] > 50000:
                continue
            elif box[0] + box[2] > 620:
                continue
            elif box[0] < 20:
                continue
            elif box[1] + box[3] > 460:   
                continue
            if box[1] < 5 or box[0] > 600:
                continue
            else:
                boxs_f.append(box)
        #print('boxs_f:', boxs_f)
        return boxs_f
    else:
        print('no boxs found')

In [4]:
def six_box_color(box_cnt):
    if box_cnt % 6 == 0:
        color = (255, 0, 255)
    elif box_cnt % 6 ==1:
        color = (255, 255, 0)
    elif box_cnt % 6 ==2:
        color = (0, 255, 255)
    elif box_cnt % 6 ==3:
        color = (0, 255, 0)
    elif box_cnt % 6 ==4:
        color = (0, 0, 255)
    else:
        color = (255, 0, 0)
    return color

def plot_boxs(frame0, boxs):
    #new_frame = frame0.copy()
    box_cnt = 0
    if len(boxs):
        for box in boxs:
            color = six_box_color(box_cnt)
            text = 'object'+str(box_cnt+1)
            box_cnt+=1
            cv2.rectangle(frame0,(box[0],box[1]),(box[0]+box[2],box[1]+box[3]),color,2)
            cv2.rectangle(frame0,(box[0]-1,box[1]-18),(box[0]+box[2]//3*2,box[1]),color,-1)
            cv2.putText(frame0, text, (box[0], box[1]-5), cv2.FONT_HERSHEY_SIMPLEX,
                        0.4, (0, 0, 0), 1, cv2.LINE_AA)
        #return frame0
    else:
        print('no boxs found')

In [5]:
#crop_image >> process image with particular bounding box
def crop_image_pre(img, box):
    new_img = np.zeros_like(img)
    w = box[2]
    h = box[3]
    x = (img.shape[1] - w) //2
    y = (img.shape[0] - h) //2
    new_img[y:y+h,x:x+w,:] = img[box[1]:box[1]+box[3],box[0]:box[0]+box[2],:]
    return new_img
# wrap crop image with box list
def crop_img_with_box_list(img, box_list, model,
                           save_crop_image=False, 
                           show_img_on_jupyter=False,
                           prediction=False):
    result_img = img.copy()
    cls_list = []
    probs_list = []
    for idx, box in enumerate(box_list):
        img_crop = crop_image_pre(img, box)
        if save_crop_image:
            cv2.imwrite('./crop_'+str(idx+1)+'.jpg', img_crop)
        if show_img_on_jupyter:
            plt.figure(figsize=[7,7])
            #plt.subplot(4,3,idx+1)
            plt.imshow(cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB))
            plt.xticks([]), plt.yticks([])
        if prediction:
            print('Do prediction now')
            try: 
                probs, cls = model_predict(img_crop, model, topk=1)
                color = six_box_color(idx)
                cv2.rectangle(result_img,(box[0],box[1]),(box[0]+box[2],box[1]+box[3]),color,2)
                cv2.rectangle(result_img,(box[0]-1,box[1]-18),(box[0]+box[2]//4*3,box[1]),color,-1)
                cv2.putText(result_img, cls[0]+'_' + str(probs[0]), (box[0], box[1]-5), cv2.FONT_HERSHEY_SIMPLEX,
                            0.4, (0, 0, 0), 1, cv2.LINE_AA)
                cls_list.append(cls[0])
                probs_list.append(probs[0])
                
            except:
                print('prediction error')
    return (cls_list, probs_list), result_img

In [6]:
def rebuild_model(filename ='checkpoint.pth', device =torch.device("cpu")):
    
    if device == torch.device("cpu"):
        checkpoint = torch.load(filename, map_location=device)
    elif device == torch.device('cuda'):
        #checkpoint = torch.load(filename, map_location="cuda:0")
        checkpoint = torch.load(filename)
    else:
        print('Error!! checkpoint read fail, check your device setting.')
        return 1
       
    model = models.resnet18(pretrained=True) #recall pre-train mode vgg16
    for params in model.parameters():     #freeze pre-train model parameters 
        params.require_grad = False
    print('building resnet18 pretrain_model...')#message
    model.to(device)                      #set computaion unit  
    
    model.fc = nn.Sequential(
                        nn.Linear(512, 256),
                        nn.ReLU(),
                        nn.Dropout(),
                        nn.Linear(256, 14),
                        nn.LogSoftmax(dim=1)    
                    )
    print('building model classifier with full connection:', #message
          '\ninput_size:512',
          '\nhidden_size:256',
          '\noutput_size:14'
         )
    dummy ={} #dummy diction 
    if type(checkpoint) == type(dummy):   #comfirm checkpoint is type of diction
        if 'state_dict' in list(checkpoint.keys()):
            print('update model stat_dict...')
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print('no state_dict in this checkpoint')
        if 'class_to_idx' in list(checkpoint.keys()):
            print('update model class_to_idx...')
            model.class_to_idx = checkpoint['class_to_idx']
        else:
            print('no class_to_idx in this checkpoint')
    else:
        print('no stat_dict & class_to_idx could be updated',
              'you should retrain this model again.'
             )
    print('rebuild model finished!!')  #message
    return model

In [7]:
# cv2 resize to keep aspect ratio
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized

In [10]:
def img_preprocess_for_model(np_img_or_filepath, resize=256, 
                          center_crop_size=224, plot_img=False):
    '''image preprocess for pytorch model'''
    # load img from np.array or from file
    try:
        img_cv = cv2.imread(np_img_or_filepath)
    
    except TypeError:
        img_cv = np_img_or_filepath

    #resize shorter edge to 256 
    if img_cv.shape[0] <= img_cv.shape[1]:
        img_resize = image_resize(img_cv, height=resize, inter=cv2.INTER_AREA)
    else:
        img_resize = image_resize(img_cv, width=resize, inter=cv2.INTER_AREA)
        
    # plot original size img and crop img size    
    if plot_img:
        plt.figure(figsize=[20,30])
        plt.subplot(121)
        plt.imshow(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
        plt.title("original size:{}".format(img_cv.shape[:2]))
        plt.subplot(122)
        plt.imshow(cv2.cvtColor(img_resize, cv2.COLOR_BGR2RGB))
        plt.title("crop size:{}".format(img_resize.shape[:2]))

    #center crop to img_size (224, 224)
    y_min = (img_resize.shape[0] -center_crop_size) //2
    x_min = (img_resize.shape[1] -center_crop_size) //2
    y_max = (img_resize.shape[0] -center_crop_size) //2 + center_crop_size
    x_max = (img_resize.shape[1] -center_crop_size) //2 + center_crop_size
    img_center_crop = np.zeros((center_crop_size, center_crop_size, 3), dtype='uint8')
    img_center_crop = img_resize[y_min:y_max, x_min:x_max,:]
    
    # convet color chanel from BGR to RGB
    img_RGB = cv2.cvtColor(img_center_crop, cv2.COLOR_BGR2RGB)

    # Nomalization with means and stderr
    means = np.array([0.485, 0.456, 0.406])
    stderr = np.array([0.229, 0.224, 0.225])
    nor_img = (img_RGB/255 - means) / stderr
    
    if plot_img:
        plt.figure(figsize=[20,30])
        plt.subplot(121)
        plt.imshow(img_RGB)
        plt.title("Befor Nomarlization size:{}".format(img_RGB.shape[:2]))
        plt.subplot(122)
        # Image needs to be clipped between 0 and 1 or it looks like noise when displayed
        #image = np.clip(image, 0, 1)
        plt.imshow(np.clip(nor_img, 0, 1))
        plt.title("After Nomarlization size:{}".format(nor_img.shape[:2]))
    
    # transpose color chanel to first dimension for pytorch tensor
    img_trans = nor_img.transpose((2, 1, 0))
    
    return img_trans

In [8]:
def bounding_box_pre(img, save_path='',show_binary=False, binary_thred = 90,
                 can1=10, can2=150, can3=1, 
                 dilate0=0, erode1=0, 
                 dilate1=0, erode2=0, 
                 dilate2=0, choose=0):
    #img = cv2.imread(imgpath)
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    #_, binary = cv2.threshold(gray_img, binary_thred, 255,cv2.THRESH_BINARY)
    binary = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11,25)
    #_, binary = cv2.threshold(gray_img, binary_thred, 255,cv2.THRESH_BINARY)
    edges = cv2.Canny(img, can1, can2, can3)
    bin_edge = cv2.add(binary, edges)
    edge_0 = cv2.dilate(bin_edge, np.ones((3,3)), iterations=dilate0)
    edge_1 = cv2.erode(edge_0, np.ones((3,3)), iterations=erode1)
    edge_2 = cv2.dilate(edge_1, np.ones((3,3)), iterations=dilate1)
    edge_3 = cv2.erode(edge_2, np.ones((3,3)), iterations=erode2)
    edge_4 = cv2.dilate(edge_3, np.ones((3,3)), iterations=dilate2)
    
    #cv2.imshow('total', (edges, edge_0, edge_1, edge_2))

    if show_binary:
        cv2.imshow("Binary", binary)
        cv2.imshow("Edge", edges)
        cv2.imshow("Bin+Edge:", bin_edge)
        cv2.imshow('dilate0', edge_0)
        #cv2.imshow('errode1', edge_1)
        #cv2.imshow('dilate1', edge_2)
        #cv2.imshow('errode2', edge_3)
        #cv2.imshow('dilate2', edge_4)
    
    #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    _, cnts, _=cv2.findContours(edge_4, 1,2)
    box_list = []
    if len(cnts) > 0:
        for cnt in cnts:        
            x,y,w,h = cv2.boundingRect(cnt)
            box_list.append((x,y,w,h))

    return box_list

In [9]:
def model_predict(image_path, model, device=torch.device("cpu"), topk=5):
    ''' Predict the class (or classes) of an image using a trained deep learning model.
    '''
    
    # TODO: Implement the code to predict the class from an image file
    
    #image preprocessing by function process_image(), it return a numpy_image
    np_imgs = img_preprocess_for_model(image_path) 
    #transfer numpy_image to tensor_image (FloatTensor >> same datatype as model weight)
    tensor_imgs = torch.from_numpy(np_imgs).type(torch.FloatTensor)
    #incread to 4 dimemtion tensor using unsqueeze function 
    if device == torch.device("cpu"):
        tensor_imgs_1 = tensor_imgs.unsqueeze(0)
    elif device == torch.device("cuda"):
        tensor_imgs_0 = tensor_imgs.unsqueeze(0).cpu()
        tensor_imgs_1 = tensor_imgs_0.type(torch.cuda.FloatTensor)
    
    model.to(device)
    model.eval()
    #disable autograd for prediction (like validation and test)
    with torch.set_grad_enabled(False):
        output = model.forward(tensor_imgs_1)
        
    #convert output to probability by exponential func 
    probs = torch.exp(output)
    #choosing top5 probs and idxs with topk function
    probs_tp5, idx_tp5 = torch.topk(probs, topk)
    
    if device == torch.device("cuda"):
        probs_tp5_1 = probs_tp5.cpu()
        #probs_tp5_1 = probs_tp5_cpu.type(torch.cuda.FloatTensor)
        idx_tp5_1 = idx_tp5.cpu()
        #idx_tp5_1 = idx_tp5_cpu.type(torch.cuda.FloatTensor)
    else:
        probs_tp5_1 = probs_tp5
        idx_tp5_1 = idx_tp5
        
    #reload mapping dictionary from model attribute  
    class_to_idx = model.class_to_idx
    #reverse class_to_idx to idx_to_class
    idx_to_class = { str(value):key for key, value in class_to_idx.items()}
    
    #conver top5 idx to top5 class
    class_tp5 = [idx_to_class[str(i)] for i in idx_tp5_1[0].numpy()]
    
    #return top5 probability and class idx
    
    #reverse top5 class index to top5 name
    top5_name = [cat_to_name[i]  for i  in class_tp5]
    
    return probs_tp5_1[0].numpy(), top5_name

In [11]:
# classification
def object_classification(video_path, model):

    cap = cv2.VideoCapture(video_path)
    frame_cnt = 0
    box_frame_cnt = 0
    boxs_list={}
    if cap.isOpened:
        print("Camera is opened")
    else:
        print("Can't Open Camera")
    while(True):
        frame_cnt+=1
        ref, frame = cap.read()
        if ref == False:
            print('video_end')
            break
        frame0 = frame.copy()
        try:
            # find bounding box
            boxs = bounding_box_pre(frame0, save_path='', show_binary=True,
                         can1=250, can2=255, can3=3, 
                         dilate0=1, erode1=0, 
                         dilate1=0, erode2=0, 
                         dilate2=0, choose=0)
            # filter boxs
            boxs_f0 = box_filter(boxs)
            boxs_f1 = remove_box_in_box(boxs_f0)
            plot_boxs(frame0, boxs_f1)
            # add
            box_frame_cnt+=1
            #boxs_list[str(box_frame_cnt)] = boxs_f1
            
        except:# IndexError:
            # add
            box_frame_cnt+=1
            boxs_f1 = ''
            print("No object")
        k = cv2.waitKey(1)
        if k == ord(" "):
            boxs_list[str(box_frame_cnt)] = boxs_f1
            result_list, result_img = crop_img_with_box_list(frame, boxs_f1, model, prediction=True)
            name = ''
            for cls, prob in zip(result_list[0], result_list[1]):
                name += (cls +'_' + str(round(prob, 2)) + '-')
            name = name.rstrip('-')
            cv2.imwrite('./'+name +'.jpg', result_img) #change img save folder

        cv2.imshow("frame", frame0)    
        if k  == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    return boxs_list

In [12]:
with open('cat_to_name_13.json', 'r') as f:
    cat_to_name = json.load(f)
cat_to_name

{'0': 'Black_Tea',
 '1': 'Cheers',
 '2': 'Chrunchoco',
 '3': 'Coffee_Milk',
 '4': 'Family_Water',
 '5': 'Green_Milk_Tea',
 '6': 'LP33',
 '7': 'LS_SoyMilk',
 '8': 'Oats_Drink',
 '9': 'Oolong_Tea',
 '10': 'Oreo',
 '11': 'Puff',
 '12': 'Soy_Oats',
 '13': 'With_Kernel'}

In [13]:
model = rebuild_model(filename='resnet18_cuda_drinks_190211_ep110.pth')

building resnet18 pretrain_model...
building model classifier with full connection: 
input_size:512 
hidden_size:256 
output_size:14
update model stat_dict...
update model class_to_idx...
rebuild model finished!!


In [14]:
check = object_classification(video_path='./Demo_T3.mp4', model=model)
#check = object_classification(video_path=2, model=model)

Camera is opened
Do prediction now
Do prediction now
Do prediction now
