/
test.py
68 lines (62 loc) · 2.52 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import cv2
from segment_anything import sam_model_registry, SamPredictor
import mediapipe.python.solutions.face_mesh as mp_face_mesh
import mediapipe.python.solutions.drawing_utils as mp_drawing_utils
import time
sam_checkpoint = "sam_vit_b_01ec64.pth"
model_type = "vit_b"
device = "cpu"
face_mesh = mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
video_capture = cv2.VideoCapture("output.avi")
videoLenght = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
videoWriter = cv2.VideoWriter('output_seg2.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.3, (w,h))
sTime = time.time()
pTime = sTime
for i in range(videoLenght):
ret, frame = video_capture.read()
if ret:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#facemesh
results = face_mesh.process(frame).multi_face_landmarks[0]
if results:
x = [landmark.x for landmark in results.landmark]
y = [landmark.y for landmark in results.landmark]
w,h = frame.shape[:2]
x,y = np.transpose([mp_drawing_utils._normalized_to_pixel_coordinates(i,j,h,w) for i,j in zip(x,y)])
xc = int((x[291] + x[61])/2)
yc = int((y[17] + y[0])/2)
ry = int((y[17] - y[0])*0.5)
rx = int((x[291] - x[61])*0.5)
x1,y1,x2,y2 = xc-rx,yc-ry,xc+rx,yc+ry
input_box = np.array([x1,y1,x2,y2])
#segmentation
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
predictor.set_image(frame)
masks, _, _ = predictor.predict(
point_coords=None,
point_labels=None,
box=input_box,
multimask_output=False,
)
h, w = masks.shape[-2:]
color = np.array([100, 50, 0])
mask_image = masks.reshape(h, w, 1) * color.reshape(1, 1, -1)
mask_image = mask_image.astype(np.uint8)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
combined_mask = cv2.add(frame, mask_image)
videoWriter.write(frame)
nTime = time.time()
print("frame: "+str(i+1)+"/"+str(videoLenght)+" duration:"+str(nTime-pTime))
pTime = nTime
print("total duration: "+str(pTime-sTime))
videoWriter.release()
video_capture.release()