In [1]:
from detector.detector import LandmarksDetector
from model.model import LipNet
from utils.transform import VideoProcess
import torch
import numpy as np

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
device

'cuda'

In [3]:
landmarks_detector = LandmarksDetector(device=device)

In [16]:
path = './custom_data/nabina/debate.mp4'

In [17]:
%%time

landmarks = landmarks_detector(filename=path)

CPU times: user 2.92 s, sys: 37.1 ms, total: 2.96 s
Wall time: 1.14 s


In [18]:
len(landmarks)

29

In [19]:
video_processor = VideoProcess(mean_landmarks_path='../mean_landmarks/20words_mean_face.npy')

In [20]:
mouth_roi = video_processor(path, landmarks)
n = len(mouth_roi) if mouth_roi is not None else 0

if n == 0:
    mouth_roi = np.zeros((29,96,96))
elif n<29:
    padding = np.zeros((29-n, 96,96))
    mouth_roi = np.concatenate((mouth_roi, padding), axis=0)
# Assume mouth_roi exist
mouth_roi = torch.tensor(mouth_roi).to(torch.float32)

In [21]:
model = LipNet(in_channels=1, output_size=50).to(device)

In [22]:
model.load_state_dict(torch.load('pretrained/LipNet_20.pth'))

<All keys matched successfully>

In [23]:
labels={0: 'HUNDREDS',
 1: 'DEBATE',
 2: 'LIVES',
 3: 'UNTIL',
 4: 'COULD',
 5: 'SYRIA',
 6: 'PLANS',
 7: 'BRING',
 8: 'TEMPERATURES',
 9: 'GIVEN',
 10: 'SENIOR',
 11: 'CONFLICT',
 12: 'ORDER',
 13: 'INDUSTRY',
 14: 'NATIONAL',
 15: 'QUESTIONS',
 16: 'ENGLAND',
 17: 'NIGHT',
 18: 'WORST',
 19: 'MATTER',
 20: 'CONCERNS',
 21: 'TIMES',
 22: 'LEVEL',
 23: 'TAKEN',
 24: 'MAJOR',
 25: 'CHANGES',
 26: 'LEADERSHIP',
 27: 'STAND',
 28: 'POWER',
 29: 'POSITION',
 30: 'MEMBERS',
 31: 'THOUGHT',
 32: 'GERMANY',
 33: 'GIVING',
 34: 'WRONG',
 35: 'AGAIN',
 36: 'WELFARE',
 37: 'SCHOOL',
 38: 'TERMS',
 39: 'STAGE',
 40: 'THROUGH',
 41: 'SPEAKING',
 42: 'RECORD',
 43: 'CHARGE',
 44: 'BECOME',
 45: 'WANTED',
 46: 'REFERENDUM',
 47: 'NORTH',
 48: 'GROUP',
 49: 'GREAT'}

In [24]:
from collections import deque
import numpy as np

queue = deque(maxlen=29)
# 9, 219, 1804, 6049
count = 0
model.eval()
with torch.inference_mode():
    for idx,mouth in enumerate(mouth_roi):
        queue.append(mouth)
        count += 1
        if len(queue) == 29 and (count >= 0):
            X = torch.stack(list(queue)).unsqueeze(0).unsqueeze(1)
            y_pred = model(X.to(device))
            print(labels[torch.softmax(y_pred, dim=1).argmax().item()])
            count = 0

DEBATE


In [13]:
from collections import deque
import numpy as np
from tqdm.auto import tqdm

queue = deque(maxlen=29)
# 9, 76, 90, 93

model.eval()
best_seed = 100
best_output = None
percent = 0
for seed in tqdm(range(0, 2000)):
    outputs = []
    np.random.seed(seed)
    p = 0.8
    count = 0
    with torch.inference_mode():
        for idx,mouth in enumerate(mouth_roi):
            rand_num = np.random.rand()
            # print(rand_num)
            if rand_num < p:
                queue.append(mouth)
                count += 1
            if len(queue) == 29 and (count >= 0):
                X = torch.stack(list(queue)).unsqueeze(0).unsqueeze(1)
                y_pred = model(X.to(device))
                outputs.append(labels[torch.softmax(y_pred, dim=1).argmax().item()])
                count = 0

        if outputs.count('DEBATE')/len(outputs) > percent:
            best_seed = seed
            percent = outputs.count('DEBATE')/len(outputs)
            best_output = outputs
print(best_seed, percent)

  0%|          | 0/2000 [00:00<?, ?it/s]

ZeroDivisionError: division by zero

In [180]:
best_output.count('HUNDREDS')

4

In [15]:
np.random.seed(1760)
numbers = np.array([np.random.rand() for _ in range(55)])
p = 0.8
check_value = np.array(numbers) < p
check_value

array([ True,  True,  True, False,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False, False, False,  True, False,
        True,  True, False,  True, False,  True,  True,  True, False,
        True,  True,  True, False,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True, False, False,  True,  True,
        True])

In [118]:
torch.softmax(y_pred, dim=1).argmax()

tensor(3, device='cuda:0')

In [119]:
torch.argmax(y_pred, dim=1)

tensor([3], device='cuda:0')

In [142]:
'''
{0: 'HUNDREDS',
 1: 'DEBATE',
 2: 'LIVES',
 3: 'UNTIL',
 4: 'COULD',
 5: 'SYRIA',
 6: 'PLANS',
 7: 'BRING',
 8: 'TEMPERATURES',
 9: 'GIVEN',
 10: 'SENIOR',
 11: 'CONFLICT',
 12: 'ORDER',
 13: 'INDUSTRY',
 14: 'NATIONAL',
 15: 'QUESTIONS',
 16: 'ENGLAND',
 17: 'NIGHT',
 18: 'WORST',
 19: 'MATTER',
 20: 'CONCERNS',
 21: 'TIMES',
 22: 'LEVEL',
 23: 'TAKEN',
 24: 'MAJOR',
 25: 'CHANGES',
 26: 'LEADERSHIP',
 27: 'STAND',
 28: 'POWER',
 29: 'POSITION',
 30: 'MEMBERS',
 31: 'THOUGHT',
 32: 'GERMANY',
 33: 'GIVING',
 34: 'WRONG',
 35: 'AGAIN',
 36: 'WELFARE',
 37: 'SCHOOL',
 38: 'TERMS',
 39: 'STAGE',
 40: 'THROUGH',
 41: 'SPEAKING',
 42: 'RECORD',
 43: 'CHARGE',
 44: 'BECOME',
 45: 'WANTED',
 46: 'REFERENDUM',
 47: 'NORTH',
 48: 'GROUP',
 49: 'GREAT'}
'''

"\n{'HUNDREDS': 0,\n 'DEBATE': 1,\n 'LIVES': 2,\n 'UNTIL': 3,\n 'COULD': 4,\n 'SYRIA': 5,\n 'PLANS': 6,\n 'BRING': 7,\n 'TEMPERATURES': 8,\n 'GIVEN': 9,\n 'SENIOR': 10,\n 'CONFLICT': 11,\n 'ORDER': 12,\n 'INDUSTRY': 13,\n 'NATIONAL': 14,\n 'QUESTIONS': 15,\n 'ENGLAND': 16,\n 'NIGHT': 17,\n 'WORST': 18,\n 'MATTER': 19,\n 'CONCERNS': 20,\n 'TIMES': 21,\n 'LEVEL': 22,\n 'TAKEN': 23,\n 'MAJOR': 24,\n 'CHANGES': 25,\n 'LEADERSHIP': 26,\n 'STAND': 27,\n 'POWER': 28,\n 'POSITION': 29,\n 'MEMBERS': 30,\n 'THOUGHT': 31,\n 'GERMANY': 32,\n 'GIVING': 33,\n 'WRONG': 34,\n 'AGAIN': 35,\n 'WELFARE': 36,\n 'SCHOOL': 37,\n 'TERMS': 38,\n 'STAGE': 39,\n 'THROUGH': 40,\n 'SPEAKING': 41,\n 'RECORD': 42,\n 'CHARGE': 43,\n 'BECOME': 44,\n 'WANTED': 45,\n 'REFERENDUM': 46,\n 'NORTH': 47,\n 'GROUP': 48,\n 'GREAT': 49}\n"

In [70]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def enhance_brightness(input_video_path, output_video_path, brightness_factor=1.5):
    # Open the video file
    cap = cv2.VideoCapture(input_video_path)

    # Get video properties
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    frames_per_second = 25

    # Create VideoWriter object to write the output video
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), frames_per_second, (frame_width, frame_height))
    count = 0
    while True:
        ret, frame = cap.read()

        if not ret:
            break

        # Write the enhanced frame to the output video
        if check_value[count] and count > 10 and count < 49:
            out.write(frame)
        count += 1
    # Release the video capture and writer objects
    cap.release()
    out.release()

    # Close all OpenCV windows
    cv2.destroyAllWindows()

# Example usage
input_video_path = 'debate1 (online-video-cutter.com).mp4'
output_video_path = 'output_video.mp4'
enhance_brightness(input_video_path, output_video_path, brightness_factor=1.5)


In [71]:
from IPython.display import Video

# Specify the path to the video file
video_path = 'output_video.mp4'

# Display the video
Video(video_path)

In [76]:
check_value

array([ True,  True,  True,  True,  True,  True, False, False,  True,
       False,  True, False,  True, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True, False, False, False,  True,  True,  True,  True,  True,
       False, False,  True,  True, False,  True, False,  True, False,
        True,  True, False,  True, False,  True,  True,  True,  True])