In [25]:
import pandas as pd
import json
import os
import cv2
import torch
from ultralytics import YOLO
from matplotlib import pyplot as plt
import time
import numpy as np

In [26]:
info_label = {
    'age': ['20-30s', '40-50s', 'Baby', 'Kid', 'Senior', 'Teenager'],
    'race': ['Caucasian', 'Mongoloid', 'Negroid'],
    'masked': ['masked', 'unmasked'],
    'skintone': ['dark', 'light', 'mid-dark', 'mid-light'],
    'emotion': ['Anger', 'Disgust', 'Fear', 'Happiness', 'Neutral', 'Sadness', 'Surprise'],
    'gender': ['Female', 'Male']
}

In [31]:
def imshow_np_image(img):
    plt.imshow(img[:,:,::-1])    
    plt.show()

def tensor_to_float(lis):
    # lis = [int(np.round(i.cpu().detach().numpy()) for i in lis)]
    lis = [float(i.cpu().detach().numpy()) for i in lis]
    return lis
    
def process_out_detect(img, results):
    bbox_lis = []
    for r in results:
        boxes = r.boxes
        for i, box in enumerate(boxes):
            x, y, w, h = box.xywh[0]

            bbox_lis.append(tensor_to_float([x-w/2, y-h/2, w, h]))

            cv2.rectangle(img, (int(x-w/2),int(y-h/2)), (int(x+w/2),int(y+h/2)), (0,255,0), 3)
            # cv2.putText(img, str(i+1) ,(x1,y1+20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 1)
    return bbox_lis

def sample_values(lis):
    print(lis)
    lis = list(lis)
    while len(lis) < 30:
        lis.append(-1)
    return lis

def convert_to_text(key, index):
    return info_label[key][index]

def procees_out_cls(results, index_box):
    preds = {}
    num_cate = iter([6,3,2,4,7,2])
    for key in results.keys():
        cur_cate = next(num_cate)
        pred = results[key]
        pred = pred[:, (index_box * cur_cate):((index_box + 1) * cur_cate)]
        pred = torch.softmax(pred, dim= -1)
        pred = torch.argmax(pred, dim= -1)
        index = int(pred.cpu().detach().numpy())
        preds[key] = convert_to_text(key, index)
    return preds

In [28]:
image_id = json.load(open('file_name_to_image_id.json'))

In [29]:
yolo = YOLO('best.pt')
img = cv2.imread('public_test/100147591.jpg')
a = yolo(img)


0: 224x320 1 face, 12.5ms
Speed: 5.2ms preprocess, 12.5ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 320)


In [32]:
path = 'public_test/'
all_preds = []
times = []
for index, name in enumerate(image_id.keys()):
    print(index)
    name_id = image_id[name]
    img = cv2.imread(path + name)
    s = time.time()
    results = yolo(img)
    e = time.time()
    times.append(e-s)

    bbox_lis = []
    for r in results[0]:
        boxes = r.boxes
        for i, box in enumerate(boxes):
            x, y, w, h = box.xywh[0]
            bbox_lis.append(tensor_to_float([x-w/2, y-h/2, w, h]))

    preds = {}
    for i in range(len(bbox_lis)):
        num_cate = iter([6,3,2,4,7,2])
        for key in results[1].keys():
            cur_cate = next(num_cate)
            pred = results[1][key]
            pred = pred[:, (i * cur_cate):((i + 1) * cur_cate)]
            pred = torch.softmax(pred, dim= -1)
            pred = torch.argmax(pred, dim= -1)
            index = int(pred.cpu().detach().numpy())
            preds[key] = convert_to_text(key, index)
        all_preds.append([name, bbox_lis[i], name_id, preds['race'], preds['age'], preds['emotion'], preds['gender'], preds['skintone'], preds['masked']])

    

0

0: 224x320 1 face, 113.1ms
Speed: 4.6ms preprocess, 113.1ms inference, 4.3ms postprocess per image at shape (1, 3, 224, 320)
1

0: 224x320 1 face, 19.8ms
Speed: 0.9ms preprocess, 19.8ms inference, 2.0ms postprocess per image at shape (1, 3, 224, 320)
2

0: 224x320 1 face, 12.9ms
Speed: 5.9ms preprocess, 12.9ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 320)
3

0: 224x320 1 face, 9.5ms
Speed: 1.0ms preprocess, 9.5ms inference, 5.6ms postprocess per image at shape (1, 3, 224, 320)
4

0: 224x320 (no detections), 8.7ms
Speed: 2.0ms preprocess, 8.7ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 320)
5

0: 224x320 1 face, 41.0ms
Speed: 5.3ms preprocess, 41.0ms inference, 1.9ms postprocess per image at shape (1, 3, 224, 320)
6

0: 224x320 1 face, 37.6ms
Speed: 1.0ms preprocess, 37.6ms inference, 3.1ms postprocess per image at shape (1, 3, 224, 320)
7

0: 224x320 1 face, 12.5ms
Speed: 0.8ms preprocess, 12.5ms inference, 4.6ms postprocess per image at shape (1

In [33]:
all_preds[500]

['49850110.jpg',
 [660.0537109375, 295.8719482421875, 167.07867431640625, 249.24441528320312],
 500,
 'Caucasian',
 '20-30s',
 'Neutral',
 'Female',
 'light',
 'unmasked']

In [34]:
# file_name,bbox,image_id,race,age,emotion,gender,skintone,masked
final = pd.DataFrame(columns= ['file_name', 'bbox', 'image_id', 'race', 'age', 'emotion', 'gender', 'skintone', 'masked'], data= all_preds)

In [35]:
final

Unnamed: 0,file_name,bbox,image_id,race,age,emotion,gender,skintone,masked
0,100147591.jpg,"[1149.007080078125, 460.760498046875, 247.2583...",1,Caucasian,20-30s,Neutral,Female,light,unmasked
1,100429351.jpg,"[591.6748046875, 303.05548095703125, 306.92004...",2,Mongoloid,20-30s,Neutral,Female,light,unmasked
2,100473997.jpg,"[646.132568359375, 282.62164306640625, 361.989...",3,Mongoloid,20-30s,Happiness,Female,light,unmasked
3,100868012.jpg,"[995.4942626953125, 237.6397247314453, 127.467...",4,Mongoloid,20-30s,Happiness,Female,light,unmasked
4,101061106.jpg,"[783.2681884765625, 229.55126953125, 302.63098...",6,Mongoloid,20-30s,Happiness,Female,light,unmasked
...,...,...,...,...,...,...,...,...,...
2165,image_9841.jpg,"[278.25, 213.11410522460938, 591.5426025390625...",2180,Caucasian,20-30s,Happiness,Female,light,unmasked
2166,image_9913.jpg,"[257.9833984375, 177.88156127929688, 558.03308...",2181,Caucasian,20-30s,Happiness,Female,light,unmasked
2167,image_9915.jpg,"[213.5572509765625, 176.28448486328125, 561.00...",2182,Caucasian,20-30s,Happiness,Female,light,unmasked
2168,image_9935.jpg,"[280.17083740234375, 163.39743041992188, 574.0...",2183,Caucasian,20-30s,Happiness,Female,light,unmasked


In [36]:
final.to_csv('answer.csv', index= False)