Mini Mars

Reference: [dataset link](http://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)

What's included in the directories?
- `bbox_train`: 625 ids and 8298 tracklets
- `bbox_test`: 636 ids and 12180 tracklets

In [1]:
%cd ..

/home/ubuntu/dev/reid/pepper


In [7]:
# builtin
from collections import Counter, defaultdict
import json
import os.path as osp
import re
from shutil import copy2 as copy

# third-party
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

# mm
import mmcv

In [5]:
orig_root = "data/mars"

orig_train_dir = osp.join(orig_root, "bbox_train")
orig_test_dir = osp.join(orig_root, "bbox_test")
orig_gtPepper_dir = osp.join(orig_root, "gtPepper")

assert osp.exists(orig_train_dir)
assert osp.exists(orig_test_dir)
assert osp.exists(orig_gtPepper_dir)

In [6]:
mini_root = "tests/data/mini_mars"

mini_train_dir = osp.join(mini_root, "bbox_train")
mini_test_dir = osp.join(mini_root, "bbox_test")
mini_gtPepper_dir = osp.join(mini_root, "gtPepper")

mmcv.mkdir_or_exist(mini_train_dir)
mmcv.mkdir_or_exist(mini_test_dir)
mmcv.mkdir_or_exist(mini_gtPepper_dir)

In [17]:
train_json = osp.join(orig_gtPepper_dir, "train.json")
query_json = osp.join(orig_gtPepper_dir, "query.json")
gallery_json = osp.join(orig_gtPepper_dir, "gallery.json")

with open(train_json, 'r') as f:
    train_data = json.load(f)
    
with open(query_json, 'r') as f:
    query_data = json.load(f)
    
with open(gallery_json, 'r') as f:
    gallery_data = json.load(f)

In [18]:
# hard-coded variables for mini-mars
num_ids = 16
num_inst = 2
num_frames = 8

In [32]:
# train: filter by # of frames

filtered_train_data = []
for td in train_data:
    frames = td['tracklet_length']
    if frames >= num_frames:
        filtered_train_data.append(td)

print(len(train_data))
print(len(filtered_train_data))

counter = Counter()
for td in filtered_train_data:
    pid = td['pid']
    counter[pid] += 1

pids = [k for k, v in counter.items() if v > 1]
use_pids = pids[1:num_ids+1]
print(use_pids)

counter = Counter()
new_train_data = []
for data in filtered_train_data:
    pid = data['pid']
    if pid in use_pids:
        if counter[pid] < 2:
            new_train_data.append(data)
            counter[pid] += 1
        if sum(counter.values()) == 32:
            break

print(counter)
print(sum(counter.values()))

8298
8212
[1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
Counter({1: 2, 2: 2, 3: 2, 4: 2, 6: 2, 7: 2, 9: 2, 10: 2, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2})
32


In [36]:
for data in new_train_data:
    print(">>>", data['pid'])
    for img_path in data['img_paths']:
        _path = osp.dirname(img_path)
        mmcv.mkdir_or_exist(osp.join(mini_root, _path))
        copy(osp.join(orig_root, img_path), osp.join(mini_root, img_path))

>>> 1
>>> 1
>>> 2
>>> 2
>>> 3
>>> 3
>>> 4
>>> 4
>>> 6
>>> 6
>>> 7
>>> 7
>>> 9
>>> 9
>>> 10
>>> 10
>>> 11
>>> 11
>>> 12
>>> 12
>>> 13
>>> 13
>>> 14
>>> 14
>>> 15
>>> 15
>>> 16
>>> 16
>>> 17
>>> 17
>>> 18
>>> 18


In [38]:
# save json
mini_train_json = osp.join(mini_gtPepper_dir, 'train.json')
with open(mini_train_json, 'w') as f:
    json.dump(new_train_data, f, indent=4)

In [46]:
# query: filter by # of frames

filtered_query_data = []
for td in query_data:
    frames = td['tracklet_length']
    if frames >= num_frames:
        filtered_query_data.append(td)

print(len(query_data))
print(len(filtered_query_data))

filtered_gallery_data = []
for td in gallery_data:
    frames = td['tracklet_length']
    if frames >= num_frames:
        filtered_gallery_data.append(td)

query_counter = Counter()
for td in filtered_query_data:
    pid = td['pid']
    query_counter[pid] += 1
    
gallery_counter = Counter()
for td in filtered_gallery_data:
    pid = td['pid']
    gallery_counter[pid] += 1

    
pids = []
for q_id, q_cnt in query_counter.items():
    if q_id in list(gallery_counter.keys()):
        if q_cnt > 1 and gallery_counter[q_id] > 1:
            pids.append(q_id)

use_pids = pids[1:num_ids+1]
print(use_pids)

counter = Counter()
new_query_data = []
for data in filtered_query_data:
    pid = data['pid']
    if pid in use_pids:
        if counter[pid] < 2:
            new_query_data.append(data)
            counter[pid] += 1
        if sum(counter.values()) == 32:
            break

print(counter)
print(sum(counter.values()))
            
counter = Counter()
new_gallery_data = []
for data in filtered_gallery_data:
    pid = data['pid']
    if pid in use_pids:
        if counter[pid] < 2:
            new_gallery_data.append(data)
            counter[pid] += 1
        if sum(counter.values()) == 32:
            break

print(counter)
print(sum(counter.values()))

1980
1949
[4, 8, 16, 18, 22, 24, 26, 28, 32, 34, 36, 44, 46, 48, 50, 52]
Counter({4: 2, 8: 2, 16: 2, 18: 2, 22: 2, 24: 2, 26: 2, 28: 2, 32: 2, 34: 2, 36: 2, 44: 2, 46: 2, 48: 2, 50: 2, 52: 2})
32
Counter({4: 2, 8: 2, 16: 2, 18: 2, 22: 2, 24: 2, 26: 2, 28: 2, 32: 2, 34: 2, 36: 2, 44: 2, 46: 2, 48: 2, 50: 2, 52: 2})
32


In [47]:
for data in new_query_data:
    print(">>>", data['pid'])
    for img_path in data['img_paths']:
        _path = osp.dirname(img_path)
        mmcv.mkdir_or_exist(osp.join(mini_root, _path))
        copy(osp.join(orig_root, img_path), osp.join(mini_root, img_path))
        
for data in new_gallery_data:
    print(">>>", data['pid'])
    for img_path in data['img_paths']:
        _path = osp.dirname(img_path)
        mmcv.mkdir_or_exist(osp.join(mini_root, _path))
        copy(osp.join(orig_root, img_path), osp.join(mini_root, img_path))

>>> 4
>>> 4
>>> 8
>>> 8
>>> 16
>>> 16
>>> 18
>>> 18
>>> 22
>>> 22
>>> 24
>>> 24
>>> 26
>>> 26
>>> 28
>>> 28
>>> 32
>>> 32
>>> 34
>>> 34
>>> 36
>>> 36
>>> 44
>>> 44
>>> 46
>>> 46
>>> 48
>>> 48
>>> 50
>>> 50
>>> 52
>>> 52
>>> 4
>>> 4
>>> 8
>>> 8
>>> 16
>>> 16
>>> 18
>>> 18
>>> 22
>>> 22
>>> 24
>>> 24
>>> 26
>>> 26
>>> 28
>>> 28
>>> 32
>>> 32
>>> 34
>>> 34
>>> 36
>>> 36
>>> 44
>>> 44
>>> 46
>>> 46
>>> 48
>>> 48
>>> 50
>>> 50
>>> 52
>>> 52


In [48]:
# save json
mini_query_json = osp.join(mini_gtPepper_dir, 'query.json')
with open(mini_query_json, 'w') as f:
    json.dump(new_query_data, f, indent=4)
    
mini_gallery_json = osp.join(mini_gtPepper_dir, 'gallery.json')
with open(mini_gallery_json, 'w') as f:
    json.dump(new_gallery_data, f, indent=4)