In [7]:
from PIL import Image, ImageDraw
import numpy as np
import mmcv, cv2
import os, json, gc
import torch
from facenet_pytorch import MTCNN
import filetype

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [9]:
mtcnn = MTCNN(keep_all=True, device=device)

### Image

In [47]:
filename='dog.jpg'
ext = filename.split('.')
namer = ext[0] + '.json'
namer

'dog.json'

In [42]:
img = Image.open('dog.jpg')

boxes, probs = mtcnn.detect(img)

img.close()

print(boxes)

print(probs)

None
[None]


### Video

In [20]:
vid_path = '../data/elon.mp4'
vid_name = os.path.basename(vid_path)
print(vid_name)

elon.mp4


In [21]:
video = mmcv.VideoReader(vid_path)
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

In [22]:
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}/{}'.format(i + 1, len(frames)), end='')
    
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Add to frame list
    frames_tracked.append(boxes)
print('\nDone')

Tracking frame: 311/311
Done


In [24]:
face_dict = {}
for frame in enumerate(frames_tracked):
    # each frame is a tuple with (frame#, boxes)
    print(int(frame[0]))
    temp_facelist = []
    if(frame[1] is not None):
        for box in frame[1]:
            print(box)
            temp_facelist.append(box)
    face_dict[int(frame[0])] = temp_facelist

0
[ 872.1927   210.13211 1163.3075   612.4789 ]
1
[ 871.66046  209.80066 1163.5913   613.3041 ]
2
[ 872.1685   210.02463 1163.3002   612.3847 ]
3
[ 871.9495   209.17152 1163.4827   613.0178 ]
4
[ 872.06866  210.06238 1163.3947   612.5897 ]
5
[ 872.06696  210.05878 1163.3943   612.58685]
6
[ 872.0701   210.06163 1163.3954   612.58673]
7
[ 872.0701   210.06163 1163.3954   612.58673]
8
[ 872.1898   209.81075 1163.2458   612.39734]
9
[ 878.9331   231.29431 1140.8826   597.5287 ]
10
[ 877.9211  231.9748 1139.6221  597.1815]
11
[ 878.2493   231.99474 1139.6187   596.28925]
12
[ 878.30786  232.17398 1141.6085   599.73224]
13
[ 877.6448   232.06964 1141.3511   598.66547]
14
[ 876.8518   232.99149 1141.0894   597.8806 ]
15
[ 876.0941   232.83195 1140.4265   596.70087]
16
[ 874.4725   233.87839 1136.5305   596.9249 ]
17
[ 871.8136   232.13086 1138.9479   604.2512 ]
18
[ 872.40643  228.63495 1138.8982   600.3596 ]
19
[ 871.28     224.42575 1139.9456   607.8705 ]
20
[ 871.4439   212.02092 1142.363

In [27]:
face_dict[2]

[array([ 872.1685 ,  210.02463, 1163.3002 ,  612.3847 ], dtype=float32)]

In [28]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
                            np.int16, np.int32, np.int64, np.uint8,
                            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32,
                              np.float64)):
            return float(obj)
        elif isinstance(obj, (np.ndarray,)):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [None]:
json_name = vid_name.replace('.mp4', '.json')
print('Saving faces as:', json_name)

dumped = json.dumps(face_dict, cls=NumpyEncoder, indent=4)

del face_dict, boxes

with open(json_name, 'a') as f:
    f.write(dumped + '\n') 
    f.close()

gc.collect()