<a href="https://colab.research.google.com/github/jiruneko/3Dpeg/blob/master/Facial_Expression_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install timm==0.6.7
!pip install moviepy==0.2.3.5 imageio==2.4.1

In [None]:
%cd /content

!git clone https://github.com/HSE-asavchenko/face-emotion-recognition.git

In [None]:
%cd /content/face-emotion-recognition/src

import os
import time
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import glob
from tqdm import tqdm
from google.colab import files

from moviepy.editor import *
from moviepy.video.fx.resize import resize

import torch
from torchvision import transforms

from facial_analysis import FacialImageProcessing

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('using device:', DEVICE)

In [None]:
%cd /content/face-emotion-recognition/src

# インデックスと分類クラスの対応表(dict)
idx_to_class = {
    0: 'Anger',     # 怒り
    1: 'Contempt',  # 軽蔑
    2: 'Disgust',   # 嫌悪感
    3: 'Fear',      # 恐れ
    4: 'Happiness', # 幸福
    5: 'Neutral',   # ニュートラル
    6: 'Sadness',   # 悲しみ
    7: 'Surprise'   # 驚き
    }


IMG_SIZE = 260
MODEL_PATH = '/content/face-emotion-recognition/models/affectnet_emotions/enet_b2_8.pt'

In [None]:
imgProcessing = FacialImageProcessing(False)

img_transforms = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE,IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
            )
    ]
)

In [None]:
model = torch.load(MODEL_PATH)
model=model.to(DEVICE)
model.eval()

In [None]:
%cd /content/face-emotion-recognition/src

!wget -c https://www.pakutaso.com/shared/img/thumb/kuchikomi1134_TP_V4.jpg \
      -O ../test_images/angry.jpg

!wget -c https://www.pakutaso.com/shared/img/thumb/AL003-ocyaitadakujyoushi20140722_TP_V4.jpg \
      -O ../test_images/two_persons.jpg

In [None]:
def predict(input_file_path, text_size=None, print_time=False):
  # 画像をOpenCV2でロード
  frame_bgr = cv2.imread(input_file_path)
  # BGR->RGB
  frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
  # 顔検出
  bounding_boxes, points = imgProcessing.detect_faces(frame)
  # 描画文字サイズ算出
  if text_size == None:
    h, w, _ = frame.shape
    text_size = w/800

  points = points.T
  for bbox,p in zip(bounding_boxes, points):
    # 顔画像取得
    box = bbox.astype(np.int64)
    x1,y1,x2,y2 = box[0:4]    
    face_img = frame[y1:y2,x1:x2,:]
    
    # 前処理
    img_tensor = img_transforms(Image.fromarray(face_img))
    img_tensor.unsqueeze_(0)

    # Facial Expression Recognition
    if print_time:
      start = time.time()
    scores = model(img_tensor.to(DEVICE))
    if print_time:
      end = time.time()
      print('processing time: %2f ms'% ((end - start)*1000.))

    # 予測結果描画
    scores=scores[0].data.cpu().numpy()

    # 検出した顔の矩形を描画
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), thickness = 2)
    # 表情認識結果を描画
    text = idx_to_class[np.argmax(scores)]
    # 視認性を上げるため白字で文字の外枠を囲む
    cv2.putText(frame, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), thickness=9)
    cv2.putText(frame, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, text_size, (0, 0, 0), thickness=5)

  return frame, scores

In [None]:
%cd /content/face-emotion-recognition/src

input_imgs = glob.glob("../test_images/*.jpg")

for img_path in input_imgs:
  frame, scores = predict(img_path, text_size=None, print_time=True)

  plt.figure(figsize=(10, 10))
  plt.axis('off')
  plt.imshow(frame)

In [None]:
#@markdown 動画の切り抜き範囲(秒)を指定してください。\
#@markdown 30秒以上の場合OOM発生の可能性が高いため注意
start_sec =  1#@param {type:"integer"}
end_sec =  10#@param {type:"integer"}

In [None]:
%cd /content/face-emotion-recognition
!rm -rf test_video
!mkdir -p test_video/frames
!mkdir -p test_video/outputs
%cd test_video

# 動画アップロードは、このタイミングで下部のファイル選択から行い、100%になればアップロード完了
uploaded = files.upload()
uploaded = list(uploaded.keys())
file_name = uploaded[0]

upload_path = os.path.join("/content/face-emotion-recognition/test_video", file_name)
print("upload file here:", upload_path)

In [None]:
subclip_path = "/content/face-emotion-recognition/test_video/subclip.mp4"

with VideoFileClip(upload_path) as video:
    subclip = video.subclip(start_sec, end_sec)
    subclip.write_videofile(subclip_path)

In [None]:
!ffmpeg -i {subclip_path} frames/%06d.png

frames = glob.glob("/content/face-emotion-recognition/test_video/frames/*.png")
print("num of frames:", len(frames))

In [None]:
start = time.time()

# フレーム画像すべてをFacial Expression Recognition
for img_path in tqdm(frames):
  frame, scores = predict(img_path, text_size=1.0)
  save_path = os.path.join(
      "/content/face-emotion-recognition/test_video/outputs",
      os.path.basename(img_path) )

  # RGB->BGR
  cv2.imwrite(save_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

end = time.time()
print('processing time: %2f sec'% ((end - start)))

In [None]:
!ffmpeg -i "/content/face-emotion-recognition/test_video/outputs/%06d.png" \
        -c:v libx264 -vf "format=yuv420p" "/content/face-emotion-recognition/test_video/outputs/result.mp4"

In [None]:
clip = VideoFileClip("/content/face-emotion-recognition/test_video/outputs/result.mp4")
clip = resize(clip, height=640)
clip.ipython_display()