In [1]:
import sys
%load_ext autoreload
%autoreload 2
sys.path.insert(0, '../..')

In [46]:
import os
import pandas as pd
from src.utils.preprocess import preprocess_video, split_video, split_audio
import librosa
from datetime import datetime
from src.utils.model import get_audio_model, get_visual_model, find_ckpt
from src.utils.transforms import get_video_transforms
from src.utils.engine import yaml_search, get_recording_filename, read_frames, get_recording_paths, predict, \
    predict_with_files

In [43]:
cwd = f"{os.getcwd()}/../../src/resources"
run_model = 48
# 3434 - ambil sepuluh telepon merah di kantor
rid = 3435
experiment = yaml_search(f"{cwd}/experiments/video", run_model)

hparams = experiment["hyperparameters"]
config = experiment["config"]
data = experiment["data"]

seed, batch_size, learning_rate = hparams["seed"], hparams["batch_size"], float(hparams["learning_rate"])
model_conf = experiment["model"]
config = experiment["config"]
data = experiment["data"]
transforms = get_video_transforms(data["transform"], data["color"])
arr_size = [150, 150]

visual_model = get_visual_model(model_conf["version"], learning_rate, model_conf["name"], experiment)
audio_model = get_audio_model(data["audio_version"], learning_rate, data["audio_run"])

In [48]:
def load_model(model, cwd, _type, frames, run_name):
    default_ckpt_path = f"{cwd}/models/ilmsg-{_type}/f{frames}"
    ckpt_filename = find_ckpt(f"{default_ckpt_path}/{run_name}/")
    ckpt_path = f"{default_ckpt_path}/{run_name}/{ckpt_filename}"
    model = model.load_from_checkpoint(ckpt_path).cuda()
    return model

# Load Audio CKPT
audio_model = load_model(audio_model, cwd, "audio", data["frames"], data["audio_run"])
# Load Video CKPT
visual_model = load_model(visual_model, cwd, "video", data["frames"], model_conf["name"])

In [4]:
from moviepy.video.io.VideoFileClip import VideoFileClip
def cvt_25fps(input_file, output_file):
    # Load the video clip
    clip = VideoFileClip(input_file)
    # Set the target frame rate to 25 FPS
    target_fps = 25
    # Convert the clip to 25 FPS
    clip_25fps = clip.set_fps(target_fps)
    # Write the converted clip to a new file
    clip_25fps.write_videofile(output_file, codec="libx264")
    # Close the clip
    clip.close()

def extract_audio(input_file, output_file):
    # Load the video clip
    clip = VideoFileClip(input_file)
    # Extract the audio from the video
    audio = clip.audio
    # Save the audio as a WAV file
    audio.write_audiofile(output_file, codec="pcm_s16le")
    # Close the clip
    clip.close()

In [6]:
# Early Preprocessing Video
cvt_25fps(
    input_file="../resources/data/YouTube/Jokowi/Jokowi.mp4",
    output_file="../resources/data/YouTube/Jokowi/Jokowi_25FPS.mp4"
)
extract_audio(
    input_file="../resources/data/YouTube/Jokowi/Jokowi.mp4",
    output_file="../resources/data/YouTube/Jokowi/Jokowi.WAV"
)


Moviepy - Building video ../resources/data/YouTube/Jokowi/Jokowi_25FPS.mp4.
MoviePy - Writing audio in Jokowi_25FPSTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/Jokowi_25FPS.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/Jokowi_25FPS.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/Jokowi.WAV


                                                                      

MoviePy - Done.




In [8]:
from pathlib import Path
person = "Jokowi"
base_dir = f"../resources/data/YouTube/{person}"
cut_dir = f"{base_dir}/cut"
processed_dir = f"{base_dir}/processed"
Path(processed_dir).mkdir(parents=True, exist_ok=True)

In [10]:
for file in os.listdir(cut_dir):
    basename = file.split(".")[0]
    cvt_25fps(
        input_file=f"{cut_dir}/{basename}.mp4",
        output_file=f"{processed_dir}/{basename}.mp4"
    )
    extract_audio(
        input_file=f"{cut_dir}/{basename}.mp4",
        output_file=f"{processed_dir}/{basename}.wav"
    )

Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/1.mp4.
MoviePy - Writing audio in 1TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/1.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/1.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/1.wav


                                                                  

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/10.mp4.
MoviePy - Writing audio in 10TEMP_MPY_wvf_snd.mp3


                                                                   

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/10.mp4



                                                               

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/10.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/10.wav


                                                                  

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/2.mp4.
MoviePy - Writing audio in 2TEMP_MPY_wvf_snd.mp3


                                                                 

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/2.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/2.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/2.wav


                                                               

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/3.mp4.
MoviePy - Writing audio in 3TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/3.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/3.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/3.wav


                                                                  

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/4.mp4.
MoviePy - Writing audio in 4TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/4.mp4



                                                               

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/4.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/4.wav


                                                                  

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/5.mp4.
MoviePy - Writing audio in 5TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/5.mp4



                                                               

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/5.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/5.wav


                                                                   

MoviePy - Done.




Moviepy - Building video ../resources/data/YouTube/Jokowi/processed/9.mp4.
MoviePy - Writing audio in 9TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ../resources/data/YouTube/Jokowi/processed/9.mp4



                                                               

Moviepy - Done !
Moviepy - video ready ../resources/data/YouTube/Jokowi/processed/9.mp4
MoviePy - Writing audio in ../resources/data/YouTube/Jokowi/processed/9.wav


                                                                   

MoviePy - Done.




In [39]:
videos = []
audios = []
for file in os.listdir(cut_dir):
    # Load Video & Audio
    basename = file.split(".")[0]
    ori_video, (w,h) = read_frames(f"{processed_dir}/{basename}.mp4")
    ori_audio, _ = librosa.load(f"{processed_dir}/{basename}.wav", sr=16000)
    videos.append(ori_video)
    audios.append(ori_audio)

In [16]:
def generate_filenames():
    # Filenames for Target
    dt = datetime.now()
    time = int(dt.strftime("%Y%m%d%H%M%S"))
    filepath = f"{cwd}/results"
    filename = f"{time}"
    filename_prediction = f"{filename}_Prediction.MP4"
    filepath_prediction = f"{filepath}/{filename_prediction}"
    filename_latent = f"{filename}_Latent.MP4"
    filepath_latent = f"{filepath}/{filename_latent}"
    filename_ori = f"{filename}_Original.MP4"
    filepath_ori = f"{filepath}/{filename_ori}"
    return filepath_prediction, filepath_latent, filepath_ori

In [17]:
from torchmetrics.functional.audio.stoi import short_time_objective_intelligibility
from torchmetrics.functional.audio.pesq import perceptual_evaluation_speech_quality
from tqdm import tqdm

In [49]:
scores = {}
scores["PESQ"] = scores["STOI"] = scores["ESTOI"] = [] 
# for i in tqdm(range(1, len(videos))):
for i in tqdm(range(1, 2)):
    filepath_prediction, filepath_latent, filepath_ori = generate_filenames()
    ori_video = videos[i]
    ori_audio = audios[i]
    preprocessed_frames = preprocess_video(
        rid=rid,
        transforms=transforms,
        frames=ori_video,
        vid_size=arr_size,
        local=False,
        to_gray=True
    )
    # Split Video
    video_batch = split_video(
        frames=preprocessed_frames,
        split_frames=data["frames"],
        stride=data["frames"],
        total_frames=len(preprocessed_frames)
    ).cuda()

    # Predict Target for Validation
    label_batch = split_audio(
        y=ori_audio,
        split_frames=data["frames"],
        stride=data["frames"],
        fps=25,
        sr=16000,
        total_frames = len(preprocessed_frames)
    ).cuda()
    label_batch = label_batch.unsqueeze(1)
    label_batch = audio_model.encoder(label_batch)

    (ori_mels, ori_latent, ori_audio), (target_mels, target_latent, target_wav) = predict(
        visual_model=visual_model,
        filepaths=(filepath_prediction, filepath_latent, filepath_ori),
        ori_video=ori_video,
        video_batch=video_batch,
        audio_model=audio_model,
        ori_audio=ori_audio,
        label_batch=label_batch
    )
    pesq = perceptual_evaluation_speech_quality(target_wav, ori_audio, 16000, "wb")
    stoi = short_time_objective_intelligibility(target_wav, ori_audio, 16000).float()
    estoi = short_time_objective_intelligibility(target_wav, ori_audio, 16000, extended=True).float()
    scores["PESQ"].append(pesq)
    scores["STOI"].append(stoi)
    scores["ESTOI"].append(estoi)
    print("------------------")
    print("PESQ:", pesq)
    print("STOI:", stoi)
    print("ESTOI:", estoi)
    print("------------------")
    

  0%|          | 0/1 [00:00<?, ?it/s]

<_dlib_pybind11.fhog_object_detector object at 0x000002CAC6FDCCF0> <_dlib_pybind11.shape_predictor object at 0x000002CC7B17AD70>
Calculate Centroid


100%|██████████| 206/206 [02:22<00:00,  1.44it/s]


[574.22330097 243.00970874]
{'cx': 574.2233009708738, 'cy': 243.0097087378641}


  0%|          | 0/1 [02:32<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 6.00 GiB total capacity; 5.28 GiB already allocated; 0 bytes free; 5.30 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [38]:
import matplotlib.pyplot as plt
import numpy as np

def save_array_as_image(array, filename):
    # Create a figure and axis
    fig, ax = plt.subplots()

    # Display the array as an image
    ax.imshow(array)

    # Remove the axis ticks
    ax.set_xticks([])
    ax.set_yticks([])

    # Save the figure as an image file
    plt.savefig(filename)

    # Close the figure
    plt.close(fig)

array = videos[0][0]  # Replace with your NumPy array
filename = "output_image.png"
save_array_as_image(array, filename)

In [None]:
videos[0][0]