<a href="https://colab.research.google.com/github/eyaler/avatars4all/blob/master/facevidcrop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#FaceVidCrop

### Notebook by [Eyal Gruss](https://eyalgruss.com), [@eyaler](twitter.com/eyaler)

Crop a video to center on a face + optionally upscale/restore with [GFPGAN](https://github.com/TencentARC/GFPGAN)

More notebooks: [github.com/eyaler/avatars4all](https://github.com/eyaler/avatars4all)

Shortcut here: [tfi.la/face](https://tfi.la/face)

Something not working? Open an [issue](https://github.com/eyaler/avatars4all/issues)

If you find my work useful please consider supporting me via [GitHub Sponsors](https://github.com/sponsors/eyaler) or [PayPal](https://www.paypal.com/donate/?hosted_button_id=LNJ6F3FR79ARE)

In [None]:
#@title Setup

import locale
locale.getpreferredencoding = lambda: 'UTF-8'

!pip install git+https://github.com/ytdl-org/youtube-dl
!pip install git+https://github.com/1adrianb/face-alignment

%cd /content
!git clone --depth=1 https://github.com/TencentARC/GFPGAN
%cd /content/GFPGAN
!pip install basicsr
!pip install facexlib
!pip install -r requirements.txt
!python setup.py develop
!pip install realesrgan
%cd /content

In [None]:
#@title Optionally mount Google Drive (MARK CHECKBOX) { run: "auto" }
mount_google_drive = False #@param {type:"boolean"}
if mount_google_drive:
  from google.colab import drive
  drive.mount('/content/drive')
  print('path is /content/drive/MyDrive')

In [None]:
#@title Crop video to face

video_url = 'https://www.youtube.com/watch?v=vfIHsurwTMo' #@param {type: 'string'}
#@markdown (leave empty to upload file - A BUTTON WILL APPEAR BELOW, or link to youtube / vimeo / video url / path to video on mounted drive [/content/drive/MyDrive/...] / path to video on colab)
i_just_uploaded_a_file_and_i_want_to_reuse_that_instead_of_uploading_a_new_one = False  #@param {type: 'boolean'}
face_order = 'left-to-right then top-to-bottom' #@param ['left-to-right then top-to-bottom', 'top-to-bottom then left-to-right']
#@markdown (one of these orderings may be more stable for your video, as order is recalculated per frame)
face_num = 0 #@param {type: 'integer'}
#@markdown (use face_num = 0 to select the largest face, face_num >= 1 to select a face from all the faces ordered as above)
min_conf = 0.9 #@param {type: 'number'}
#@markdown (lower min_conf if faces are not detected, raise min_conf if there are false detections)
temporal_smoothing = 0.9 #@param {type: 'number'}
#@markdown (use temporal_smoothing = 0 to track head without smoothing, temporal_smoothing = 1 will not track head movement)
top_extend_frac = 0.33 #@param {type: 'number'}
bottom_extend_frac = 0.33 #@param {type: 'number'}
aspect_ratio = 1.333 #@param {type: 'number'}
start_seconds = 0 #@param {type: 'number'}
duration_seconds = 0 #@param {type: 'number'}
#@markdown (use duration_seconds = 0 for unrestricted duration)
GFPGAN_model = '1.4' #@param ['1', '1.2', '1.3', '1.4', 'RestoreFormer']
GFPGAN_factor = 0 #@param {type: 'integer'}
#@markdown (use GFPGAN_factor = 0 to skip GFPGAN, GFPGAN_factor = 1 for restore without upscaling, GFPGAN_factor >= 2 for upscaling)
max_width = 0 #@param {type: 'integer'}
#@markdown (use max_width = 0 for unrestricted width)
max_height = 0 #@param {type: 'integer'}
#@markdown (use max_height = 0 for unrestricted height)
temporal_mode = 'forward' #@param ['forward', 'reverse', 'forward + reverse']
output_filename = 'output.mp4' #@param {type: 'string'}

from time import time
start_time = time()

%cd /content

import os
from google.colab import files

need_dl = True
try:
  if video_url == save_url and (video_url or i_just_uploaded_a_file_and_i_want_to_reuse_that_instead_of_uploading_a_new_one):
    need_dl = False
except:
  pass
if need_dl:
  if not video_url:
    %cd /content/sample_data
    try:
      uploaded = files.upload()
    except Exception:
      %cd /content
      raise
    for fn in uploaded:
      orig_video = os.path.abspath(fn)
      break
    %cd /content
  elif os.path.isfile(video_url):
    orig_video = os.path.abspath(video_url)
  elif os.path.isfile('/content/drive/MyDrive/' + video_url):
    orig_video = '/content/drive/MyDrive/' + video_url
  else:
    orig_video = '/content/orig_video.mp4'
    !rm -f $orig_video
    !youtube-dl --no-playlist -f "bestvideo[ext=mp4][vcodec!*=av01]+bestaudio[ext=m4a]/mp4[vcodec!*=av01]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo[ext=mp4]+bestaudio/mp4" "$video_url" --merge-output-format mp4 -o $orig_video
    if not os.path.exists(orig_video):
      orig_video = '/content/orig_video.' + video_url.rsplit('.', 1)[1]
      !wget "$video_url" -O $orig_video
    assert os.path.exists(orig_video)
  input_video = '/content/input_video' + os.path.splitext(orig_video)[-1]

need_fix_duration = True
try:
  if not need_dl and start_seconds == save_start_seconds and duration_seconds == save_duration_seconds:
    need_fix_duration = False
except:
  pass
if need_fix_duration:
  if start_seconds or duration_seconds:
    !ffmpeg -y -ss $start_seconds -t $duration_seconds -i "$orig_video" -f mp4 $input_video
  else:
    !cp "$orig_video" $input_video

save_url = video_url
save_start_seconds = start_seconds
save_duration_seconds = duration_seconds

import imageio.v3 as iio
from IPython.display import display, Image, Video, clear_output
import face_alignment
import cv2

fa = face_alignment.FaceAlignment(landmarks_type=1)
fps = iio.immeta(input_video)['fps']

faces = []
good_faces = []
have_faces = False
max_face_conf = 0
max_face_conf_below = 0
min_face_conf_above = 1
for im in iio.imiter(input_video):
  *_, bboxes = fa.get_landmarks_from_image(im, return_bboxes=True)
  if bboxes is None:
    bboxes = []
  if len(bboxes):
    max_face_conf = max(max_face_conf, *[b[-1] for b in bboxes])
  if min_conf:
    for b in bboxes:
      if b[-1] < min_conf:
        max_face_conf_below = max(max_face_conf_below, b[-1])
    bboxes = [b for b in bboxes if b[-1] >= min_conf]
  if len(bboxes):
    min_face_conf_above = min(min_face_conf_above, *[b[-1] for b in bboxes])
  bboxes = sorted(bboxes, key=lambda p: (p[0], p[2], p[1], p[3]) if face_order == 'left-to-right then top-to-bottom' else (p[1], p[3], p[0], p[2]))
  if len(bboxes) > 1 and not have_faces:
    have_faces = True
    for i, (x0, y0, x1, y1, _) in enumerate(bboxes):
        cv2.putText(im, str(i + 1), (min(int(x1), im.shape[1]) - 20, max(int(y1), 20)), 0, .7, (0, 255, 0), 2)
    iio.imwrite('/content/numbers.png', im, compress_level=1)
    clear_output()
    display(Image('/content/numbers.png'))
  if face_num:
    bboxes = bboxes[face_num - 1 : face_num]
  if bboxes:
    bbox = sorted(bboxes, key=lambda p: (p[2]-p[0]) * (p[3]-p[1]))[-1]
    faces.append(bbox[:-1])
    good_faces.append(faces[-1])
  else:
    faces.append(None)

print(f'{orig_video=} {max_face_conf=:.2f}')
assert good_faces, f'No faces found. Consider decreasing min_conf below {max_face_conf:.2f}'
median = sorted(good_faces, key=lambda p: (p[2]-p[0]) * (p[3]-p[1]))[len(good_faces) // 2]
top = (median[3] - median[1]) * (.5 + top_extend_frac)
h = top + (median[3] - median[1]) * (.5 + bottom_extend_frac)
if h > im.shape[0]:
  top = top / h * im.shape[0]
  h = im.shape[0]
w = min(round(h * aspect_ratio), im.shape[1])

!rm -rf /content/in_frames
!rm -rf /content/out_frames
!mkdir -p /content/in_frames
!mkdir -p /content/out_frames

prev_face = good_faces[0]
for i, (im, face) in enumerate(zip(iio.imiter(input_video), faces)):
  x0, y0, x1, y1 = prev_face = prev_face if face is None else face

  if i:
    x = x*temporal_smoothing + (x0+x1)/2*(1-temporal_smoothing)
    y = y*temporal_smoothing + (y0+y1)/2*(1-temporal_smoothing)
  else:
    x = (x0+x1) / 2
    y = (y0+y1) / 2

  x0 = max(x - w/2, 0)
  x1 = int(min(x0 + w, im.shape[1]))
  x0 = int(x1 - w)

  y0 = max(y - top, 0)
  y1 = int(min(y0 + h, im.shape[0]))
  y0 = int(y1 - h)

  im = im[y0:y1, x0:x1]
  iio.imwrite(f'/content/in_frames/frame_{i:06d}.png', im, compress_level=1)

reverse_filename = 'reverse_' + output_filename
if GFPGAN_factor:
  %cd /content/GFPGAN
  !python inference_gfpgan.py -i /content/in_frames -o /content/out_frames -v $GFPGAN_model -s $GFPGAN_factor
  %cd /content
  im_folder = 'out_frames/restored_imgs'
else:
  im_folder = 'in_frames'

if 'forward' in temporal_mode:
  !ffmpeg -y -framerate $fps -thread_queue_size 0 -i /content/$im_folder/frame_%06d.png -i $input_video -c:v libx264 -c:a aac -map 0:v -map 1:a? -vf "scale=min(iw\,$max_width):min(ih\,$max_height):force_original_aspect_ratio=decrease:force_divisible_by=2" -pix_fmt yuv420p -profile:v baseline -movflags +faststart "/content/$output_filename"
if 'reverse' in temporal_mode:
  !ffmpeg -y -framerate $fps -thread_queue_size 0 -i /content/$im_folder/frame_%06d.png -i $input_video -c:v libx264 -c:a aac -map 0:v -map 1:a? -vf "scale=min(iw\,$max_width):min(ih\,$max_height):force_original_aspect_ratio=decrease:force_divisible_by=2,reverse" -af areverse -pix_fmt yuv420p -profile:v baseline -movflags +faststart "/content/$reverse_filename"

if temporal_mode == 'reverse':
  !mv "/content/$reverse_filename" "/content/$output_filename"
elif temporal_mode == 'forward + reverse':
  !echo file "/content/$output_filename" > list.txt
  !echo file "/content/$reverse_filename" >> list.txt
  concat_filename = 'concat_' + output_filename
  !ffmpeg -y -f concat -safe 0 -i list.txt -c copy "/content/$concat_filename"
  !mv "/content/$concat_filename" "/content/$output_filename"

clear_output()
if have_faces:
  display(Image('/content/numbers.png'))
meta = iio.immeta('/content/' + output_filename)
print(f'took {(time()-start_time) / 60 :.1f} min. {orig_video=} output_video=/content/{output_filename} w={meta["size"][0]} h={meta["size"][1]} t={meta["duration"]} fps={meta["fps"]} {max_face_conf_below=:.2f} {min_face_conf_above=:.2f} {max_face_conf=:.2f}')
print('if video does not show below, you can still download it!')
display(Video('/content/' + output_filename, embed=True, html_attributes="autoplay controls loop"))

In [None]:
#@title Download

from google.colab import files
files.download('/content/' + output_filename)