In [1]:
import sys
import time
import torch
from torch.backends import cudnn
from matplotlib import colors
import cv2
import numpy as np
import os
import statistics
from skimage import io, draw
from yolox.data.datasets import COCO_CLASSES
from yolox.utils import fuse_model, get_model_info, postprocess, vis
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from loguru import logger
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import yolo_detect as yd
import HelmetClassifier as hc
import convert
import copy

#categoty_name 기준으로 dict 만들어 주기
# 예시 : categoty_mAP = {"helmet" : {'TP':0,'FP':0,'FN':0,}, "head" : {'TP':0,'FP':0,'FN':0,}}
def making_mAP_dict(categoty_name):
    c_values = categoty_name.values()
    categoty_mAP = {}
    for i in c_values:
        categoty_mAP[i] = dict()
        categoty_mAP[i]['TP'] = 0 
        categoty_mAP[i]['FP'] = 0 
        categoty_mAP[i]['FN'] = 0 
    return categoty_mAP

#detector box값 도출 해주는것 
def visual(output, img_info,cls_conf=0.5):
    ratio = img_info["ratio"]
    #아무런 output이 나오지 않을경우 
    if output is None:
        return None, None, None
    output = output.cpu()
    bboxes = output[:, 0:4]
    # preprocessing: resize
    bboxes /= ratio
    cls = output[:, 6] #현재 이미지에서 잡힌 객체들의 번호 차 = 2번 , 신호등 = 9 번 
    scores = output[:, 4] * output[:, 5] #잡힌 객체들의 신뢰도 수준 
    original_bboxes = []
    original_cls = []
    original_scores = []
    for i in range(len(bboxes)):
        box = bboxes[i]
        cls_id = int(cls[i])
        score = scores[i]
        
        x_min = int(box[0])
        y_min = int(box[1])
        x_max = int(box[2])
        y_max = int(box[3])
        
        #사람 
        if cls_id == 0 and cls_conf < score :
            original_cls.append(cls_id)
            original_scores.append(score)
            original_bboxes.append([x_min,y_min,x_max,y_max])

    #output은 나왔지만 차량에 해당 하는 output이 없을경우 (사람만 포착이 되었을경우)     
    if len(original_bboxes) == 0 :
        return None, None, None

    return original_bboxes, original_cls, original_scores

#디텍션에서 나온 바운딩 박스값 잘라서 분류기 넣어 주기 위한것 
def crop_detect(frame,bboxes,scores):
    cut_img = []
    for i in range(len(bboxes)):
        box = bboxes[i]
        score = scores[i]
        if score < 0.5:
            continue
        x0 = abs(int(box[0]))
        y0 = abs(int(box[1]))
        x1 = abs(int(box[2]))
        y1 = abs(int(box[3]))
        img = frame[y0:y1,x0:x1]
        
        cut_img.append(img)
    return cut_img


#분류기용 자르기 (정답지에서 바운딩 박스값 잘라서 분류기 넣어 주기 위한것 
def crop_cls(frame,num_frame):
    cut_img_list = [] 
    box_list = []
    cnt = 0
    if num_frame in correct_json_key:
        corr = correct_json[num_frame]
        c_keys= corr.keys()
        for category in c_keys:
            cnt +=1 
            for idx in corr[category]:
                x_min = abs(idx[0])
                y_min = abs(idx[1])
                x_max = abs(idx[2])
                y_max = abs(idx[3])
                box_list.append([x_min,y_min,x_max,y_max])
                cut_frame = frame[y_min:y_max,x_min:x_max]
                cut_img_list.append(cut_frame)
    else : 
        cut_img_list = None
        box_list = None
    return cut_img_list, box_list


#바운딩 박스 그려주기 (영상 저장 하고 싶을때 )
def vis(img, boxes,predict,scores):
    for i in range(len(boxes)):
        box = boxes[i]
        if scores == None :
            score = 1
        else : 
            score = scores[i]
        if score < 0.5:
            continue
        x0 = int(box[0])
        y0 = int(box[1])
        x1 = int(box[2])
        y1 = int(box[3])
        pred = predict[i]
        if pred in categoty_name.keys() :
            category = categoty_name[pred]

            color = [(0,0,255),(0,255,0),(200,200,255),(100,100,255)]
            text = '{}:{:.1f}%'.format(f'{category}', score * 100)
            txt_color = (255, 255, 255)
            font = cv2.FONT_HERSHEY_SIMPLEX

            txt_size = cv2.getTextSize(text, font, 0.6, 1)[0]
            cv2.rectangle(img, (x0, y0), (x1, y1), color[pred], 2)

            txt_bk_color = (0, 0, 0)
            cv2.rectangle(
                img,
                (x0, y0 + 1),
                (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
                txt_bk_color,
                -1
            )
            cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
        else : pass

    return img

#예측한 json 저장해주기 
def save_pred_json(save_path,pred_json):
    file_path = save_path
    with open(file_path, 'w') as outfile:
        json.dump(pred_json, outfile)


# json 비교를 위한 흐름 코드 
def comparison(predict_json):
    #mAP dict 만들어주기 
    categoty_mAP = making_mAP_dict(categoty_name)
    correct_json_key = correct_json.keys()
    
    #카테고리별 mAP값 프레임단위로 돌면서 계산해주기 
    for frame_nb in correct_json_key:
        categoty_mAP = update_mAP(frame_nb,correct_json,predict_json,categoty_mAP)
    
    #결과 도출 
    for cate in categoty_mAP.keys():
        TP = categoty_mAP[cate]['TP']
        FP = categoty_mAP[cate]['FP']
        FN = categoty_mAP[cate]['FN']
        if not TP ==0 :
            precision = TP/(TP+FP)
            recall = TP/(TP+FN)
            accuracy = TP/(TP+FN+FP)
            F1_score = 2*(precision*recall/precision+recall)
            print(f'{cate}_accuracy : ',accuracy)
            print(f'{cate}_F1_score : ',F1_score)
            print(f'{cate}_recall : ',recall)
            print(f'{cate}_precision : ',precision)
            print(f'categoty_mAP[{cate}]',categoty_mAP[cate])
        else : 
            print('영상내 등장하는 카테고리 : ',c_category)
            print(f'{cate} TP is 0 ',categoty_mAP)
            print(f'{cate}_FP : 예측은{cate} 실제론 다른것 : ',FP)

#mAP 값 업데이트를 하기 위한 계산 (혼동행렬 계산)
def update_mAP(frame_nb,correct_json,predict_json,categoty_mAP):

    corr = correct_json[frame_nb] #해당 프레임의 정답 객체 
    predict_json_key = predict_json.keys()
    
    if frame_nb in predict_json_key :
        pred = predict_json[frame_nb] #해당 프레임의 예측 객체
        c_key = corr.keys()
        p_key = pred.keys()

        k_intersection = p_key&c_key #교집합

        differnet_keys = (p_key|c_key) - k_intersection  #차집합

        #교집합인 카테고리 돌면서 점수 올려주기 
        for category in k_intersection:
            best_iou = calculate_iou(corr[category], pred[category]) #iou 계산
            categoty_mAP[category]['FN'] += abs(len(best_iou) - len(corr[category])) #실제로 0번을 다른것으로 예측한것 
            categoty_mAP[category]['FP'] += abs(len(best_iou) - len( pred[category])) #예측이 0인데 실제론 다른것
            categoty_mAP[category]['TP'] += len(best_iou) #정답인것 

        #차집합이 존재
        if differnet_keys != None and differnet_keys in categoty_name.values(): 
            print(differnet_keys)
            #카테고리 돌기
            for i in differnet_keys : 
                #차집합이 정답지에 포함 될경우 
                if i in c_key and i in categoty_name.values():
                    categoty_mAP[i]['FN'] += len(corr[i])
                #차집합이 예상지에만 있을 경우 
                elif i in p_key and i in categoty_name.values():
                    categoty_mAP[i]['FP'] += len(pred[i])
                else : pass

    #정답지에 있는 프레임이 예측값에는 없을경우 
    elif frame_nb not in predict_json_key :
        inter_key = categoty_name.values()&corr.keys() #정답지 라벨과 지정 라벨의 교집합만 돔
        for category in inter_key:
            categoty_mAP[category]['FN'] += len(correct_json[frame_nb][category])
    
    return categoty_mAP



#IOU 계산
def IoU(box1, box2):
    # box = (x1, y1, x2, y2)
    #cbox 
    
    box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    # obtain x1, y1, x2, y2 of the intersection
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # compute the width and height of the intersection
    w = max(0, x2 - x1 + 1)
    h = max(0, y2 - y1 + 1)

    inter = w * h
    iou = inter / (box1_area + box2_area - inter)
#     print('c_box, p_box,iou',box1,box2,iou)
    if iou < 0.5 :
        return None
    return float(iou)                    

            
            
#박스 하나하나 비교하면 iou가 가장 높은 박스값 도출                     
def calculate_iou(correct, predict):
    #정답 박스와 예측한 박스 하나하나를 비교해 가면서 최상의 iou 값을 구하고 젤 높은 iou값의 바운딩 박스 쌍을 리스트로 뽑아주기 
    best = []
    for c_box in correct:
        best_iou = []
        for p_box in predict:
            iou = IoU(c_box, p_box)
            if iou == None : 
                pass
            elif len(best_iou) == 0  : 
                best_iou = list([[c_box, p_box],[iou]])
            elif iou > best_iou[1][0] :
                best_iou = list([[c_box, p_box],[iou]])
        if not len(best_iou) == 0 : 
            best.append(best_iou)
    
    #에측의 박스보다 매칭 박스가 많을떄는 예측박스가 두개의 정답지에 매칭되었기 떄문에 함수에서 변경해주기 
    if len(best) > len(predict) : 
        return compare_box(best)
    
    return best

#정답지 박스가 예측 박스보다 많을때 같은 박스 매칭하는걸 방지 
def compare_box(best):
    best_copy = copy.deepcopy(best)
    for i in range(len(best)):
        for j in range(i+1,len(best)): 
            if best[i][0][1] ==  best[j][0][1] and best[i][1][0] > best[j][1][0] :
                best_copy.remove(best[j])
    return best_copy   



def Classification(frame,num_frame,hcc,predict_json):
    cut_img_list, box_list = crop_cls(frame,num_frame)
    #결과값 받아오기 #[unique,pred[0],category]
    if cut_img_list == None:
        predict_json[num_frame] = None
        result_frame = frame
    else :                 
        predict = hcc.inference(cut_img_list)
        #예측값으로 영상 도출 해주기 
        result_frame = vis(frame, box_list,predict,None)
        for idx, box in zip(predict,box_list):
            if idx in categoty_name.keys():
                category = categoty_name[idx]
                if not num_frame in predict_json :
                    predict_json[num_frame] = dict()
                if not category in predict_json[num_frame] :
                    predict_json[num_frame][category] = list()
                predict_json[num_frame][category].append(box)
    return result_frame, predict_json



def Detection(frame,num_frame,det,hcc,predict_json):
    #detect 완료후 결과 받기
    outputs,img_info = det.inference(frame,num_frame)
    #특정 클래스만 받아오기 
    bboxes, class_id, scores = visual(outputs[0], img_info,det.confthre)

    #박스 그리기 
    if not bboxes == None :
        cut_img_list = crop_detect(frame,bboxes,scores)

        if cut_img_list == None:
            predict_label[now_frame] = None
            result_frame = frame 
        else :    
            predict = hcc.inference(cut_img_list)

            #예측값으로 영상 도출 해주기 
            result_frame = vis(frame, bboxes,predict,scores)

        if not num_frame in predict_json:
            predict_json[num_frame] = dict()

        #예측값 돌면서 predict_json 만들어주기 
        for i in range(len(predict)):
            pred = predict[i]
            box = bboxes[i]
            #원하는 라벨만 json 만들어 주기 원하는 라벨은 categoty_name에 명시하기 
            if pred in categoty_name.keys() :
                category = categoty_name[pred]
                if not category in predict_json[num_frame] :
                    predict_json[num_frame][category] = list()
                predict_json[num_frame][category].append(box)
            else : pass 
    else : result_frame = frame 
        
    return result_frame, predict_json




#비디오 읽고, 전체적인 코드 실행 
def proc(video_path, output_file):
    det = yd.YoloDetector_st() 
    
#     det = yd.YoloDetector_track() #detector 소환      
    det.load() #모델 load
    
    
    cap = cv2.VideoCapture(video_path)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    save_path = output_file
    logger.info(f"video save_path is {save_path}")
    vid_writer = cv2.VideoWriter(
        save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
    )

    hcc = hc.Classifier()
    predict_json = {}
    print('fps',fps)

    cnt = 0

    if cap.isOpened() :    
        while True:
            cnt+=1
            #원본이미지 = frame 
            ret, frame = cap.read()
            if ret == False :
                break
            if cnt < 0:
                continue
#             프레임수 특정값 이상일떄는 break 할때 
#             if cnt > 1:
#                 break
            #현재 프레임
            num_frame = round(cap.get(cv2.CAP_PROP_POS_FRAMES)) - 1
            print(num_frame)
            detection = True
            classification = False
            if detection  :
                result_frame, predict_json =Detection(frame,num_frame,det,hcc,predict_json)
                vid_writer.write(result_frame)
                ch = cv2.waitKey(1)
                if ch == 27 or ch == ord("q") or ch == ord("Q"):
                    break
            
            #분류기 진행 
            if classification :
                result_frame, predict_json = Classification(frame,num_frame,hcc,predict_json)
                vid_writer.write(result_frame)
                ch = cv2.waitKey(1)
                if ch == 27 or ch == ord("q") or ch == ord("Q"):
                    break
    #정답지와 예상지 비교해주기 
    comparison(predict_json)

#json 저장 하고싶을때 
#     save_pred_json('/DATA/source/ij/injung/pred.json',predict_json)

#############################################################################################################



input_list = [
    './hat_input/hat45.mp4'
]


output_list = [
    './hat_output/hat45.mp4'
]



# 컨버트 완료된 json 불러오기(정답지) 

#컨버트 완료된 json 불러오기(정답지) 
data_path = 'json/hat45.json'
correct_json, c_category = convert.json_convert(data_path)
correct_json_key = correct_json.keys()
categoty_name = {0 : "helmet", 1 : "head", 2 : "non"}

for idx, input_path in enumerate(input_list):
    print(f"{input_path} start!!")
    proc(input_list[idx],output_list[idx])
    print(f'####################{input_list[idx]} complete#########################')






./hat_input/hat45.mp4 start!!


2022-07-05 01:07:51.756 | INFO     | yolo_detect:load:145 - Model Summary: Params: 99.07M, Gflops: 281.93
2022-07-05 01:07:57.750 | INFO     | __main__:proc:374 - video save_path is ./hat_output/hat45.mp4


fps 25.0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
2

In [2]:
!pip install timm
best_iou = [[[497, 142, 604, 431], [778, 134, 866, 373]], [0.0]]
print(best_iou[1][0])

0.0


In [3]:
categoty_mAP = {"helmet" : {'TP':0,'FP':0,'FN':0,}, "head" : {'TP':0,'FP':0,'FN':0,}}
categoty_mAP["helmet"]['TP'] += 10
print(categoty_mAP["helmet"]['TP'])
categoty_mAP["helmet"]['TP'] += 5
print(categoty_mAP["helmet"]['TP'])

10
15


In [4]:
corr = {'helmet': [[721, 109, 83, 254], [901, 114, 81, 242]], 'head': [[901, 74, 59, 171]],'he': [[901, 74, 59, 171]]} 
pred = {'helmet': [[553, 123, 636, 360], [727, 114, 796, 351], [896, 111, 983, 352]]}
c_key = corr.keys()
p_key = pred.keys()

print(c_key)
print(p_key)
# a = corr['helmet']
# b = pred['helmet']
# print(max(len(a),len(b)))
# print(a)
# k_intersection = p_key&c_key
# print(k_intersection)
# differnet_keys = (p_key|c_key) - k_intersection
# print(differnet_keys)
# differnet_keys = (p_key|c_key) - k_intersection

dict_keys(['helmet', 'head', 'he'])
dict_keys(['helmet'])


In [5]:
categoty_name = {0 : "helmet", 1 : "head", 2 : "None_label"}
for name in categoty_name.values():
    print(name)

helmet
head
None_label


In [6]:
print(abs(3-4))

1


In [7]:
a = [[[2424, 204, 2709, 1149], [2384, 239, 2706, 1134]], [0.8275826055170433]]
print(a[0][1])
print(a[1])


[2384, 239, 2706, 1134]
[0.8275826055170433]


In [8]:
a = [['a'],['b'],['c'],['d'],['e'],['f']]
cnt = 0
for i in range(len(a)):
    for j in range(i+1,len(a)):
        print(a[i],a[j])

['a'] ['b']
['a'] ['c']
['a'] ['d']
['a'] ['e']
['a'] ['f']
['b'] ['c']
['b'] ['d']
['b'] ['e']
['b'] ['f']
['c'] ['d']
['c'] ['e']
['c'] ['f']
['d'] ['e']
['d'] ['f']
['e'] ['f']


In [9]:
best = [[[[1941, 726], [1941, 731]], [0.989]], [[[2166, 396], [2203, 392]], [0.74]], [[[2424, 204], [1941, 731]], [0.82]]]
best1 = copy.deepcopy(best)
print(len(best))
# for i in range(len(best)):
#     for j in range(i+1,len(best)): 
#         if best[i][0][1] ==  best[j][0][1] and best[i][1][0] > best[j][1][0] :
#             best1.remove(best[j])


3


In [10]:
categoty_name = {0 : "helmet", 1 : "head"}
print(categoty_name[0])

helmet


In [11]:
categoty_name = {0 : "helmet", 1 : "head"}
categoty_mAP = {"helmet" : {'TP':0,'FP':0,'FN':0,}, "head" : {'TP':0,'FP':0,'FN':0,}}

In [12]:
c_values = categoty_name.values()
categoty_mAP = {}
print(c_values)
for i in c_values:
    categoty_mAP[i] = dict()
    categoty_mAP[i]['TP'] = 0 
    categoty_mAP[i]['FP'] = 0 
    categoty_mAP[i]['FN'] = 0 

print(categoty_mAP)
    

dict_values(['helmet', 'head'])
{'helmet': {'TP': 0, 'FP': 0, 'FN': 0}, 'head': {'TP': 0, 'FP': 0, 'FN': 0}}


In [13]:
color = [(0,0,255),(0,255,0),(200,200,255),(100,100,255)]
print(color[0])

(0, 0, 255)


In [14]:
i = []
a = None
print(i)
print(len(i))
print(a)

[]
0
None


In [1]:
import sys
import time
import torch
from torch.backends import cudnn
from matplotlib import colors
import cv2
import numpy as np
import os
import statistics
from skimage import io, draw
from yolox.data.datasets import COCO_CLASSES
from yolox.utils import fuse_model, get_model_info, postprocess, vis
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from loguru import logger
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import yolo_detect as yd
import HelmetClassifier as hc
import convert
import copy



In [2]:
det = yd.YoloDetector_st() 

#     det = yd.YoloDetector_track() #detector 소환      
det.load() #모델 load

2022-09-19 07:15:17.397 | INFO     | yolo_detect:load:145 - Model Summary: Params: 99.07M, Gflops: 281.93


In [None]:
frame = cv2.imread('./worker.jpg')
outputs,img_info = det.inference(frame,num_frame)

In [2]:
a = 4
print(a%2)

0
