<a href="https://colab.research.google.com/github/eyaler/avatars4all/blob/master/facevidcrop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#FaceVidCrop

### Notebook by [Eyal Gruss](https://eyalgruss.com), [@eyaler](twitter.com/eyaler)

Crop a video to center on a face

Optionally upscale/restore with [GFPGAN](https://github.com/TencentARC/GFPGAN)

More notebooks: [github.com/eyaler/avatars4all](https://github.com/eyaler/avatars4all)

Shortcut here: [tfi.la/face](https://tfi.la/face)

In [None]:
#@title Setup

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install git+https://github.com/ytdl-org/youtube-dl
!pip install imageio
!pip install git+https://github.com/1adrianb/face-alignment

%cd /content
!git clone --depth=1 https://github.com/TencentARC/GFPGAN
%cd /content/GFPGAN
!pip install basicsr
!pip install facexlib
!pip install -r requirements.txt
!python setup.py develop
!pip install realesrgan
%cd /content

In [None]:
#@title Crop video to face

video_url = 'https://www.youtube.com/watch?v=B6-kcz8zZg4' #@param {type:"string"}
face_num = 0 #@param {type: 'integer'}
#@markdown (use face_num = 0 to select the largest face, face_num >= 1 to select a face from faces ordered top to bottom and left to right)
min_conf = .8 #@param {type: 'number'}
temporal_smoothing = 0.9 #@param {type: 'number'}
#@markdown use temporal_smoothing = 0 to track head without smoothing, temporal_smoothing = 1 will not track head movement
top_extend_frac = .7 #@param {type: 'number'}
bottom_extend_frac = .7 #@param {type: 'number'}
aspect_ratio = 1.333 #@param {type: 'number'}
start_seconds = 0 #@param {type: 'number'}
duration_seconds = 15 #@param {type: 'number'}
#@markdown (use duration_seconds = 0 for unrestricted duration)
GFPGAN_model = '1.4' #@param ['1', '1.2', '1.3', '1.4', 'RestoreFormer']
GFPGAN_factor = 0 #@param {type: 'integer'}
#@markdown (use GFPGAN_factor = 0 to skip GFPGAN, GFPGAN_factor = 1 for restore without upscaling, GFPGAN_factor >= 2 for upscaling)
max_width = 0 #@param {type: 'integer'}
#@markdown (use max_width = 0 for unrestricted width)
max_height = 0 #@param {type: 'integer'}
#@markdown (use max_height = 0 for unrestricted height)
temporal_mode = 'forward' #@param ['forward', 'reverse', 'forward + reverse']
output_filename = 'output.mp4' #@param {type: 'string'}

%cd /content

need_dl = True
try:
  if video_url == save_url:
    need_dl = False
except:
  pass
if need_dl:
  !rm -f /content/video.mp4
  !rm -f /content/full_video.mp4
  !youtube-dl -f "bestvideo[ext=mp4][vcodec!*=av01]+bestaudio[ext=m4a]/mp4[vcodec!*=av01]/mp4[vcodec!*=av01]/mp4" "$video_url" --merge-output-format mp4 -o /content/video.mp4
  !cp /content/video.mp4 /content/full_video.mp4

need_fix_duration = True
try:
  if video_url == save_url and start_seconds == save_start_seconds and duration_seconds == save_duration_seconds:
    need_fix_duration = False
except:
  pass
if need_fix_duration:
  if start_seconds or duration_seconds:
    !ffmpeg -y -ss $start_seconds -t $duration_seconds -i /content/full_video.mp4 -f mp4 /content/video.mp4
  else:
    !cp /content/full_video.mp4 video.mp4

save_url = video_url
save_start_seconds = start_seconds
save_duration_seconds = duration_seconds

import imageio.v3 as iio
from IPython.display import Video, clear_output
import face_alignment

fa = face_alignment.FaceAlignment(landmarks_type=1)
fps = iio.immeta('/content/video.mp4')['fps']

faces = []
good_faces = []
for im in iio.imiter('/content/video.mp4'):
  *_, bboxes = fa.get_landmarks_from_image(im, return_bboxes=True)
  found = False
  if bboxes is None:
    bboxes = []
  if min_conf:
    bboxes = [b for b in bboxes if b[-1] >= min_conf]
  if face_num:
    bboxes = sorted(bboxes, key=lambda p: (p[1], p[3], p[0], p[2]))[face_num - 1 : face_num]
  if bboxes:
    bbox = sorted(bboxes, key=lambda p: (p[2]-p[0]) * (p[3]-p[1]))[-1]
    faces.append(bbox[:-1])
    good_faces.append(faces[-1])
  else:
    faces.append(None)

median = sorted(good_faces, key=lambda p: (p[2]-p[0]) * (p[3]-p[1]))[len(good_faces) // 2]
top = (median[3] - median[1]) * (.5 + top_extend_frac)
h = top + (median[3] - median[1]) * (.5 + bottom_extend_frac)
if h > im.shape[0]:
  top = top / h * im.shape[0]
  h = im.shape[0]
w = min(round(h * aspect_ratio), im.shape[1])

!rm -rf /content/in_frames
!rm -rf /content/out_frames
!mkdir -p /content/in_frames
!mkdir -p /content/out_frames

prev_face = good_faces[0]
for i, (im, face) in enumerate(zip(iio.imiter('/content/video.mp4'), faces)):
  x0, y0, x1, y1 = prev_face = prev_face if face is None else face

  if i:
    x = x*temporal_smoothing + (x0+x1)/2*(1-temporal_smoothing)
    y = y*temporal_smoothing + (y0+y1)/2*(1-temporal_smoothing)
  else:
    x = (x0+x1) / 2
    y = (y0+y1) / 2

  x0 = max(x - w/2, 0)
  x1 = int(min(x0 + w, im.shape[1]))
  x0 = int(x1 - w)

  y0 = max(y - top, 0)
  y1 = int(min(y0 + h, im.shape[0]))
  y0 = int(y1 - h)

  im = im[y0 : y1, x0 : x1]
  iio.imwrite(f'/content/in_frames/frame_{i:06d}.png', im, compress_level=1)

reverse_filename = 'reverse_' + output_filename
if GFPGAN_factor:
  %cd /content/GFPGAN
  !python inference_gfpgan.py -i /content/in_frames -o /content/out_frames -v $GFPGAN_model -s $GFPGAN_factor
  %cd /content
  im_folder = 'out_frames/restored_imgs'
else:
  im_folder = 'in_frames'

if 'forward' in temporal_mode:
  !ffmpeg -y -framerate $fps -thread_queue_size 0 -i /content/$im_folder/frame_%06d.png -i /content/video.mp4 -c:v libx264 -c:a aac -map 0:v -map 1:a? -vf "scale=min(iw\,$max_width):min(ih\,$max_height):force_original_aspect_ratio=decrease:force_divisible_by=2" -pix_fmt yuv420p -profile:v baseline -movflags +faststart "/content/$output_filename"
if 'reverse' in temporal_mode:
  !ffmpeg -y -framerate $fps -thread_queue_size 0 -i /content/$im_folder/frame_%06d.png -i /content/video.mp4 -c:v libx264 -c:a aac -map 0:v -map 1:a? -vf "scale=min(iw\,$max_width):min(ih\,$max_height):force_original_aspect_ratio=decrease:force_divisible_by=2,reverse" -af areverse -pix_fmt yuv420p -profile:v baseline -movflags +faststart "/content/$reverse_filename"

if temporal_mode == 'reverse':
  !mv "/content/$reverse_filename" "/content/$output_filename"
elif temporal_mode == 'forward + reverse':
  !echo file "/content/$output_filename" > list.txt
  !echo file "/content/$reverse_filename" >> list.txt
  concat_filename = 'concat_' + output_filename
  !ffmpeg -y -f concat -safe 0 -i list.txt -c copy "/content/$concat_filename"
  !mv "/content/$concat_filename" "/content/$output_filename"
clear_output()

meta = iio.immeta('/content/' + output_filename)
print(f'w={meta["size"][0]} h={meta["size"][1]} t={meta["duration"]}, fps={meta["fps"]}')
Video('/content/' + output_filename, embed=True, html_attributes="autoplay controls loop")

In [None]:
#@title Download

from google.colab import files
files.download('/content/' + output_filename)