## 1. Installation

Run this block to install the necessary dependencies.

In [1]:
!git clone https://github.com/indianajson/wav2lip-HD.git
basePath = "/content/wav2lip-HD"
%cd {basePath}

wav2lipFolderName = 'Wav2Lip-master'
gfpganFolderName = 'GFPGAN-master'
wav2lipPath = basePath + '/' + wav2lipFolderName
gfpganPath = basePath + '/' + gfpganFolderName

!wget 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth' -O {wav2lipPath}'/face_detection/detection/sfd/s3fd.pth'

!wget 'https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA' -O {wav2lipPath}'/checkpoints/wav2lip_gan.pth'
#!wget 'https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?e=TBFBVW' -O {wav2lipPath}'/checkpoints/wav2lip.pth'

!gdown https://drive.google.com/uc?id=1fQtBSYEyuai9MjBOF8j7zZ4oQ9W2N64q --output {wav2lipPath}'/checkpoints/'

!pip install -r requirements.txt
!pip install -U librosa==0.8.1 # The process will fail without downgrading librosa
!mkdir inputs

!cd $gfpganFolderName && python setup.py develop
!wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth -P {gfpganFolderName}'/experiments/pretrained_models'

%cd {basePath}

from IPython.display import clear_output
clear_output()

print("Installation complete.")

Installation complete.


In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## 2. Synchronize Video and Speech

In [13]:


import os
outputPath = basePath+'/outputs'
inputAudioPath = '/content/gdrive/MyDrive/AudioFiles/clip7.wav'
#inputAudioPath
inputVideoPath = '/content/gdrive/MyDrive/Face/low7.mp4'
#inputVideoPath
lipSyncedOutputPath = basePath + '/outputs/result.mp4'
model = "wav2lip"


if not os.path.exists(outputPath):
  os.makedirs(outputPath)

from IPython.display import clear_output
clear_output()

!cd $wav2lipFolderName && python inference.py \
--checkpoint_path checkpoints/{model}.pth \
--face {inputVideoPath} \
--audio {inputAudioPath} \
--outfile {lipSyncedOutputPath}



#print("Video synthesis complete.")

Using cuda for inference.
Reading video frames...
Number of frames available for inference: 397
(80, 1280)
Length of mel chunks: 397
  0% 0/4 [00:00<?, ?it/s]
  0% 0/25 [00:00<?, ?it/s][A
  4% 1/25 [00:38<15:32, 38.86s/it][A
  8% 2/25 [00:41<10:45, 28.08s/it][A
 12% 3/25 [00:44<07:28, 20.38s/it][A
 16% 4/25 [00:46<05:16, 15.09s/it][A
 20% 5/25 [00:49<03:46, 11.33s/it][A
 24% 6/25 [00:51<02:43,  8.62s/it][A
 28% 7/25 [00:54<02:01,  6.73s/it][A
 32% 8/25 [00:56<01:31,  5.41s/it][A
 36% 9/25 [00:58<01:11,  4.50s/it][A
 40% 10/25 [01:01<00:58,  3.92s/it][A
 44% 11/25 [01:03<00:48,  3.44s/it][A
 48% 12/25 [01:05<00:40,  3.10s/it][A
 52% 13/25 [01:08<00:34,  2.85s/it][A
 56% 14/25 [01:10<00:29,  2.70s/it][A
 60% 15/25 [01:13<00:26,  2.61s/it][A
 64% 16/25 [01:15<00:23,  2.60s/it][A
 68% 17/25 [01:17<00:20,  2.53s/it][A
 72% 18/25 [01:20<00:17,  2.47s/it][A
 76% 19/25 [01:22<00:14,  2.42s/it][A
 80% 20/25 [01:24<00:12,  2.41s/it][A
 84% 21/25 [01:27<00:09,  2.48s/it][A
 

## 3. Boost the Resolution of the Synthesized Video



In [15]:
import cv2
from tqdm import tqdm
from os import path

import os

inputVideoPath = outputPath+'/result.mp4'
unProcessedFramesFolderPath = outputPath+'/frames'

if not os.path.exists(unProcessedFramesFolderPath):
  os.makedirs(unProcessedFramesFolderPath)

vidcap = cv2.VideoCapture(inputVideoPath)
numberOfFrames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = vidcap.get(cv2.CAP_PROP_FPS)
print("FPS: ", fps, "Frames: ", numberOfFrames)

for frameNumber in tqdm(range(numberOfFrames)):
    _,image = vidcap.read()
    cv2.imwrite(path.join(unProcessedFramesFolderPath, str(frameNumber).zfill(4)+'.jpg'), image)


!cd $gfpganFolderName && \
  python inference_gfpgan.py -i $unProcessedFramesFolderPath -o $outputPath -v 1.3 -s 2 --only_center_face --bg_upsampler None

import os
restoredFramesPath = outputPath + '/restored_imgs/'
processedVideoOutputPath = outputPath

dir_list = os.listdir(restoredFramesPath)
dir_list.sort()

import cv2
import numpy as np

#Get FPS of original video for writer
inputVideoPath = outputPath+'/result.mp4'
vidcap = cv2.VideoCapture(inputVideoPath)
fps = vidcap.get(cv2.CAP_PROP_FPS)
print("The video is "+str(fps)+" FPS.")

batch = 0
batchSize = 1300
from tqdm import tqdm
for i in tqdm(range(0, len(dir_list), batchSize)):
  img_array = []
  start, end = i, i+batchSize
  print("processing ", start, end, end="\r")
  for filename in  tqdm(dir_list[start:end]):
      filename = restoredFramesPath+filename;
      img = cv2.imread(filename)
      if img is None:
        continue
      height, width, layers = img.shape
      size = (width,height)
      img_array.append(img)
  out = cv2.VideoWriter(processedVideoOutputPath+'/output_'+str(batch).zfill(4)+'.mp4',cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
  batch = batch + 1

  for i in range(len(img_array)):
    out.write(img_array[i])
  out.release()

from IPython.display import clear_output
clear_output()

print("Video upscaling complete.")

Video upscaling complete.


## 4. Clear Cached Files

Run this block once you've downloaded your final video file. This will empty /inputs and /outputs, so you can start again, fresh.


In [8]:
%cd /content/wav2lip-HD/

#@markdown Choose whether to remove both inputs and outputs, or just one of the two. You may want to preserve inputs if you are only changing one of the two inputs.

removeInputs = True #@param {type:"boolean"}
removeOutputs = True #@param {type:"boolean"}

if removeInputs == True:
  %rm inputs/*
if removeOutputs == True:
  %rm outputs/frames/*
  %rm outputs/restored_imgs/*
  %rm outputs/*


from IPython.display import clear_output
clear_output()

print("Cleared cached files.")


Cleared cached files.


## 4. Stitching the individual videos together


In [26]:
from moviepy.editor import VideoFileClip, AudioFileClip


video_file_path = '/content/gdrive/MyDrive/Faceno/low6.mp4'
audio_file_path = '/content/gdrive/MyDrive/AudioFiles/clip6.wav'


video_clip = VideoFileClip(video_file_path)
audio_clip = AudioFileClip(audio_file_path)


video_clip = video_clip.set_audio(audio_clip)


output_file_path = '/content/gdrive/MyDrive/output_video6.mp4'


video_clip.write_videofile(output_file_path, codec='libx264')


video_clip.close()
audio_clip.close()


t:   0%|          | 0/226 [00:00<?, ?it/s, now=None]

Moviepy - Building video /content/gdrive/MyDrive/output_video6.mp4.
MoviePy - Writing audio in output_video6TEMP_MPY_wvf_snd.mp3
MoviePy - Done.
Moviepy - Writing video /content/gdrive/MyDrive/output_video6.mp4





Moviepy - Done !
Moviepy - video ready /content/gdrive/MyDrive/output_video6.mp4


In [2]:
from moviepy.editor import VideoFileClip, concatenate_videoclips


directory = '/content/gdrive/MyDrive/'


video_clips = []


start_number = 1
end_number = 7

for number in range(start_number, end_number + 1):

    file_path = directory + f'output_video{number}.mp4'


    video_clip = VideoFileClip(file_path)
    video_clips.append(video_clip)


final_video = concatenate_videoclips(video_clips, method="compose")


output_file_path = '/content/gdrive/MyDrive/output_combined_video.mp4'


final_video.write_videofile(output_file_path, codec='libx264')


for clip in video_clips:
    clip.close()



Moviepy - Building video /content/gdrive/MyDrive/output_combined_video.mp4.
MoviePy - Writing audio in output_combined_videoTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/gdrive/MyDrive/output_combined_video.mp4






Moviepy - Done !
Moviepy - video ready /content/gdrive/MyDrive/output_combined_video.mp4
