In [1]:
import numpy as np

import torchvision.transforms as transforms
from torchvision.utils import make_grid

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch

In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_features):
        super(ResidualBlock, self).__init__()
        
        self.block = nn.Sequential(
            nn.ReflectionPad2d(1), # Pads the input tensor using the reflection of the input boundary
            nn.Conv2d(in_features, in_features, 3),
            nn.InstanceNorm2d(in_features), 
            nn.ReLU(inplace=True),
            nn.ReflectionPad2d(1),
            nn.Conv2d(in_features, in_features, 3),
            nn.InstanceNorm2d(in_features)
        )

    def forward(self, x):
        return x + self.block(x)


class GeneratorResNet(nn.Module):
    def __init__(self, input_shape, num_residual_block):
        super(GeneratorResNet, self).__init__()
        
        channels = input_shape[0]
        
        # Initial Convolution Block
        out_features = 64
        model = [
            nn.ReflectionPad2d(channels),
            nn.Conv2d(channels, out_features, 7),
            nn.InstanceNorm2d(out_features),
            nn.ReLU(inplace=True)
        ]
        in_features = out_features
        
        # Downsampling
        for _ in range(2):
            out_features *= 2
            model += [
                nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
        
        # Residual blocks
        for _ in range(num_residual_block):
            model += [ResidualBlock(out_features)]
            
        # Upsampling
        for _ in range(2):
            out_features //= 2
            model += [
                nn.Upsample(scale_factor=2), # --> width*2, heigh*2
                nn.Conv2d(in_features, out_features, 3, stride=1, padding=1),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
            
        # Output Layer
        model += [nn.ReflectionPad2d(channels),
                  nn.Conv2d(out_features, channels, 7),
                  nn.Tanh()
                 ]
        
        # Unpacking
        self.model = nn.Sequential(*model) 
        
    def forward(self, x):
        return self.model(x)

In [3]:
input_shape = (3, 240, 200)
n_residual_blocks = 9 

G_AB = GeneratorResNet(input_shape, n_residual_blocks)
# G_BA = GeneratorResNet(input_shape, n_residual_blocks)
# D_A = Discriminator(input_shape)
# D_B = Discriminator(input_shape)

G_AB.load_state_dict(torch.load('model/G_AB1.pth', map_location=torch.device('cpu')))

G_AB.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
G_AB.to(device)

GeneratorResNet(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): ResidualBlock(
      (block): Sequential(
        (0): ReflectionPad2d((1, 1, 1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
        (2): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        (3): ReLU(inplace=True)
        (4): ReflectionPad2d((1, 1, 1, 1))
        

In [None]:
import cv2
from torchvision.transforms import ToTensor
# live test from camera
cap = cv2.VideoCapture(0)
width  = cap.get(3)  # float `width`
height = cap.get(4)  # float `height`


while True:
    ret, frame = cap.read()
    if not ret:
        break

    # preprocess
    fr = cv2.resize(frame, (240,200))
    fr = cv2.cvtColor(fr, cv2.COLOR_BGR2RGB)
    frameTensor = ToTensor()(fr).unsqueeze(0).to(device)

    # predict
    with torch.no_grad():
        gen_img = G_AB(frameTensor)

    # postprocess
    gen_img = gen_img.squeeze(0).cpu().numpy()
    gen_img = (gen_img.transpose(1,2,0)+1)/2
    gen_img = (gen_img * 255).astype(np.uint8)

    # resize to original size
    gen_img = cv2.cvtColor(gen_img, cv2.COLOR_RGB2BGR)
    gen_img = cv2.resize(gen_img,(int(width),int(height)))

    cv2.imshow('frame', frame)
    cv2.imshow('gen_frame', gen_img)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import os

def split_video_into_frames(video_path, output_directory):
    """function that loads a video from a given path and splits it into frames
    and saves it in the output directory"""
    
    video = cv2.VideoCapture(video_path) # loads the video

    frame_count = 0
    success = True

    while success:
        success, frame = video.read()# splits the video into frames
        print(success)

        if success:
            frame_path = os.path.join(output_directory, f"frame_{frame_count}.png") # saves in directory
            cv2.imwrite(frame_path, frame)
            print(frame_path)

            # Increment frame count
            frame_count += 1

    video.release()

    print(f"Split {frame_count} frames and saved them in '{output_directory}'")

    return frame_count

video_path = r"C:\Users\Tzach\Desktop\Computer Science\Computer Vision using Deep Learning\final project\Video\Tzach_test.mp4"
output_directory = r"C:\Users\Tzach\Desktop\Computer Science\Computer Vision using Deep Learning\final project\Video"

frame_count = split_video_into_frames(video_path, output_directory)

In [6]:
import cv2
import os

def merge_images_to_video(image_directory, output_video_path, fps):
    """takes images and merges them into one video"""
    
    frames = [f"frame_{i}.png" for i in range(len(os.listdir(image_directory)))] # loads all images
    print(len(frames))
    first_image_path = os.path.join(image_directory, frames[0])
    print(first_image_path)

    first_image = cv2.imread(first_image_path)
    height, width, _ = first_image.shape

    fourcc = cv2.VideoWriter_fourcc(*"mp4v") #defines video format
    video = cv2.VideoWriter(output_video_path, fourcc, fps, (240, 200)) #constructs the video class

    # Merge images into video
    for image_file in frames:
        image_path = os.path.join(image_directory, image_file)
        image = cv2.imread(image_path)
        image = TransformImageThroughModel(G_AB, image) # runs image through model
        video.write(image)

    # Release the VideoWriter
    video.release()

    print(f"Merged {len(frames)} images into '{output_video_path}'")

merge_images_to_video(image_directory=r"C:\Users\Tzach\Desktop\Computer Science\Computer Vision using Deep Learning\final project\Video\extraction",
                      output_video_path=r"C:\Users\Tzach\Desktop\Computer Science\Computer Vision using Deep Learning\final project\Video\vid.mp4", fps=30)

264
C:\Users\Tzach\Desktop\Computer Science\Computer Vision using Deep Learning\final project\Video\extraction\frame_0.png
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through resha

after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through reshaping
after model
after sizing back
through re

In [5]:
from torchvision.transforms import ToTensor
import cv2

def TransformImageThroughModel(model, img):
    """takes image transforms it, sends to model, resizes and transforms back the image and returns it"""
    img = cv2.resize(img,(240,200))
    #faces = extract_faces(face_detector, img)
    
        # preprocess face
    #original_size = (coordinates[3] - coordinates[2], coordinates[1] - coordinates[0])
    face = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #face = cv2.resize(face,(256,256))
    face_tensor = ToTensor()(face).unsqueeze(0).to(device)
    print("through reshaping")
        
        # predict
    with torch.no_grad():
        gen_face = model(face_tensor)
    print("after model")

        # postprocess face
    gen_face = gen_face.squeeze(0).cpu().numpy()
    gen_face = (gen_face.transpose(1,2,0)+1)/2
    gen_face = (gen_face * 255).astype(np.uint8)

        # resize to original size
    #gen_face = cv2.resize(gen_face, original_size)
    gen_face = cv2.cvtColor(gen_face, cv2.COLOR_RGB2BGR)
    print("after sizing back")
        
        # put back in frame
    #img[coordinates[0]:coordinates[1], coordinates[2]:coordinates[3]] = gen_face
    return gen_face