In [1]:
import os
import cv2
from tqdm import tqdm

In [2]:
dataset_path = "./datasets/beetle-tracking/Detection_data/"

In [3]:
##Get all .txt files
def _get_all_txt(folder_path):
    f = []
    idx = 0
    if os.path.exists(folder_path):
        for x in os.listdir(folder_path):
            name, ext = os.path.splitext(x)
            if ext == '.txt':
                #print(x)
                f.append(os.path.join(folder_path,x))
    else:
        print("%s does not exist!"%(folder_path))
    
    return f
    
txt_files = _get_all_txt(dataset_path)

In [4]:
##Read description file and transfer the content to py dict.
def _read_box_desc(file_name):
    f = open(file_name,'r')
    result = dict()
    res = dict()
    cnt = 0
    result['fn'] = file_name
    vname, _ = os.path.splitext(file_name)
    result['name'] = os.path.split(vname)[-1]
    result['name'] = result['name'].replace(" ", "")
    result['vfn'] = "%s.avi"%vname[:-6]
    
    line = f.readline()
    while line:
        cnt = cnt+1
        line = eval("[%s]"%line)
        if len(line)>2:
            print("err", line)
            return result
        
        res[line[0]] = line[1]
        line = f.readline()
    
    f.close()
    result['res'] = res
    result['cnt'] = cnt
    return result

r = _read_box_desc(txt_files[0])
print(r['fn'])
print(r['vfn'])
print(r['cnt'])
print(r['name'])
#print(r['res'])

./datasets/beetle-tracking/Detection_data/[CH03] 2017-08-12 21.15.00_x264_label.txt
./datasets/beetle-tracking/Detection_data/[CH03] 2017-08-12 21.15.00_x264.avi
300
[CH03]2017-08-1221.15.00_x264_label


In [5]:
## for debugging
def _pick_color(id):
    if id==0:
        cr = (0,0,200)
    elif id==1:
        cr = (0,200,0)
    elif id==2: 
        cr = (200,0,0)
    elif id==3: 
        cr = (0,100,100)
    elif id==4: 
        cr = (100,100, 0)
    else:
        cr = (255,255,255)
    
    return cr

In [7]:
from pascal_voc_writer import Writer

In [8]:
def _build_filename(vname, cnt, idx):
    ostr = "%s_f%05d_%05d"%(vname, cnt, idx)
    return ostr

In [9]:
## Read target video files and extract frames as image file according to the description file
def _capture_image_from_desc_voc(img_prefix, desc, out_txt_name, out_folder, wBox=True):
    ##{out_folder}/images
    ##{out_folder}/annotations
    ##{out_folder}/logs/{lname}
    
    print(desc['vfn'], desc['name'])
    
    #input video path
    vf = desc['vfn']
    #output training_set name
    lname = desc['name']
    
    if not os.path.isfile(vf):
        print("wrong video file")
        return
    
    #output log folder path
    out_log_folder = os.path.join(out_folder,'log', lname)
    if not os.path.isdir(out_log_folder):
        print("create out log  dir %s"%out_log_folder)
        os.makedirs(out_log_folder)
    else:
        print("%s already existed, pass this avi"%lname)
        return

    #output image folder
    out_img_folder = os.path.join(out_folder,'images')
    if not os.path.isdir(out_img_folder):
        #print("create out log  dir %s"%out_log_folder)
        os.makedirs(out_img_folder)

    #output anno folder
    out_anno_folder = os.path.join(out_folder,'annotations')
    if not os.path.isdir(out_anno_folder):
        os.makedirs(out_anno_folder)        
        
    #output training set index file
    if out_txt_name == '':
        out_txt_name = 'train.txt'
        out_txt_log_name = 'train_log.txt'
    else:
        out_txt_name = '%s.txt'%out_txt_name
        out_txt_log_name = '%s_log.txt'%out_txt_name
        
    out_txt_path = os.path.join(out_log_folder, out_txt_name)
    out_txt_log_path = os.path.join(out_log_folder, out_txt_log_name)
    
    #print("output img\n%s \ntxt\n%s \nlog\n%s"%(out_img_folder,out_txt_path,out_txt_log_path))
    
    fw = open(out_txt_path,'w')
    fw_log = open(out_txt_log_path,'w')
    
    #read video files
    cap = cv2.VideoCapture(vf)
    timeout = 0
    while not cap.isOpened():
        #cap = cv2.VideoCapture(vf)
        cv2.waitKey(1000)
        print("Wait for the header")
        timeout = timeout +1
        if(timeout>10):
            break
    #print(cap.isOpened())
    total_frames = cap.get(7) ## CV_CAP_PROP_FRAME_COUNT
    print("total_frames %d"%total_frames)    
    
    res = desc['res']
    fcnt = 0
    
    for f_idx in tqdm(res.keys()):#go through specific frames and save it
        if f_idx>total_frames:
            print("parse error. incorrect frame idx %d-%d"%(f_idx,total_frames))
            return
        
        #print(f_idx, res[f_idx])
        fw_log.write("%s %s\n"%(f_idx,str(res[f_idx])))
        cap.set(1, f_idx) #CV_CAP_PROP_POS_FRAMES, frame_num
        ret, frame = cap.read()
        if ret == False:
            print("%s read failed, ignore it"%vf)
            continue
        fcnt = fcnt +1
        img_path = os.path.join(out_img_folder, "%s.jpg"%(_build_filename(lname,fcnt,f_idx)))
        cv2.imwrite(img_path, frame)
        out_str = "%s"%(os.path.abspath(img_path))
        frame_box = frame
        TYPE_BUG = 0
        TYPE_UNKNOWN = 1
        writer = Writer(img_path, 1280, 720)
        for bug_id,left,right in res[f_idx]:
            #print(bug_id,left,right)
            cr = _pick_color(bug_id)
            cv2.rectangle(frame_box,left,right,cr,2)       
            cv2.putText(frame_box,str(bug_id),left,0,1,cr)
            out_str = "%s %d,%d,%d,%d,%d"%(out_str, left[0], left[1], right[0], right[1], TYPE_BUG)
            if bug_id!=5:
                writer.addObject('beetle%d'%bug_id, left[0], left[1], right[0], right[1])
        
        anno_path = os.path.join(out_anno_folder, "%s.xml"%(_build_filename(lname,fcnt,f_idx)))
        writer.save(anno_path)
        out_str = "%s\n%s\n"%(out_str,anno_path)
        fw.write(out_str)
        
        if wBox:
            img_path_b = os.path.join(out_img_folder, "%s_box.jpg"%(_build_filename(lname,fcnt,f_idx)))
            cv2.imwrite(img_path_b, frame_box)
    
    fw.close()
    fw_log.close()
    cap.release()    
    cv2.destroyAllWindows()
    print("capture done %s"%vf)
    return 

#ff = _capture_image_from_desc(r, '', './ds_train/')


In [10]:
##main
txt_files = _get_all_txt(dataset_path)
idx = 0
for txt in sorted(txt_files):
    idx = idx+1
    d = _read_box_desc(txt)
    _capture_image_from_desc_voc(idx, d, '', './ds_train_small_classes/',False)
    if idx>3:
        break

  0%|          | 0/300 [00:00<?, ?it/s]

./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-14 19.20.00_x264.avi [CH01]2016-10-1419.20.00_x264_label
create out log  dir ./ds_train_small_classes/log/[CH01]2016-10-1419.20.00_x264_label
total_frames 26967


100%|██████████| 300/300 [01:21<00:00,  4.45it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

capture done ./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-14 19.20.00_x264.avi
./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-20 19.50.00_x264.avi [CH01]2016-10-2019.50.00_x264_label
create out log  dir ./ds_train_small_classes/log/[CH01]2016-10-2019.50.00_x264_label
total_frames 26967


100%|██████████| 300/300 [04:11<00:00,  3.25it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

capture done ./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-20 19.50.00_x264.avi
./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-28 19.20.00_x264.avi [CH01]2016-10-2819.20.00_x264_label
create out log  dir ./ds_train_small_classes/log/[CH01]2016-10-2819.20.00_x264_label
total_frames 26967


100%|██████████| 300/300 [01:16<00:00,  4.48it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

capture done ./datasets/beetle-tracking/Detection_data/[CH01] 2016-10-28 19.20.00_x264.avi
./datasets/beetle-tracking/Detection_data/[CH01] 2016-11-05 20.20.00_x264.avi [CH01]2016-11-0520.20.00_x264_label
create out log  dir ./ds_train_small_classes/log/[CH01]2016-11-0520.20.00_x264_label
total_frames 26967


100%|██████████| 300/300 [02:47<00:00,  7.52s/it]


capture done ./datasets/beetle-tracking/Detection_data/[CH01] 2016-11-05 20.20.00_x264.avi
