In [1]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#download watchyourwelsh.org data to drive
import requests

# Base URL
base_url = "https://www.astro.cf.ac.uk/~spxap/aow"

# Directory in Google Drive to save the videos
save_dir = "/content/drive/MyDrive/welsh-data"  # Replace with your folder path

# Loop over participants and sets
for participant in range(1, 21):
    for set in range(1, 11):
        # Skip participant 1, set 1
        if participant == 1 and set == 1:
            continue

        # Form the complete URL
        url = f"{base_url}/participants/{str(participant).zfill(2)}/sets/{str(set).zfill(2)}.mp4#t=0.1"

        # Make a request to the URL
        r = requests.get(url, stream=True)

        # Check if the request was successful
        if r.status_code == 200:
            # Define the local path to save the file
            local_path = f"{save_dir}/participant_{str(participant).zfill(2)}_set_{str(set).zfill(2)}.mp4"

            # Write the content to the file in chunks
            with open(local_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)


In [5]:
#GPU information
!nvidia-smi

Thu Dec  7 16:35:28 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    24W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
#install dependencies
!pip install torch torchvision
!pip install numpy opencv-python



In [7]:
import torch
import numpy as np
import cv2

print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("NumPy Version:", np.__version__)
print("OpenCV Version:", cv2.__version__)


PyTorch Version: 2.1.0+cu118
CUDA Available: True
NumPy Version: 1.23.5
OpenCV Version: 4.8.0


In [8]:
%cd /content/drive/MyDrive/
!git clone https://github.com/xinntao/Real-ESRGAN.git
!cd Real-ESRGAN
!git clone https://github.com/xinntao/BasicSR
# Install basicsr - https://github.com/xinntao/BasicSR
# We use BasicSR for both training and inference
!pip install basicsr
# facexlib and gfpgan are for face enhancement
!pip install facexlib
!pip install gfpgan
!python setup.py develop

/content/drive/MyDrive
Cloning into 'Real-ESRGAN'...
remote: Enumerating objects: 755, done.[K
remote: Total 755 (delta 0), reused 0 (delta 0), pack-reused 755[K
Receiving objects: 100% (755/755), 5.37 MiB | 6.32 MiB/s, done.
Resolving deltas: 100% (410/410), done.
Cloning into 'BasicSR'...
remote: Enumerating objects: 5919, done.[K
remote: Counting objects: 100% (5919/5919), done.[K
remote: Compressing objects: 100% (1998/1998), done.[K
remote: Total 5919 (delta 3780), reused 5676 (delta 3729), pack-reused 0[K
Receiving objects: 100% (5919/5919), 4.12 MiB | 15.50 MiB/s, done.
Resolving deltas: 100% (3780/3780), done.
Updating files: 100% (303/303), done.
Collecting basicsr
  Downloading basicsr-1.4.2.tar.gz (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.5/172.5 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting addict (from basicsr)
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)


In [10]:
!python3 /content/drive/MyDrive/Real-ESRGAN/setup.py

Traceback (most recent call last):
  File "/content/drive/MyDrive/Real-ESRGAN/setup.py", line 83, in <module>
    write_version_py()
  File "/content/drive/MyDrive/Real-ESRGAN/setup.py", line 60, in write_version_py
    with open('VERSION', 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'VERSION'


In [11]:
!pip install -r Real-ESRGAN/requirements.txt



In [12]:
#split videos of welsh data to images frame by frame

import cv2
import os
import glob

# Directory containing the videos
video_dir = '/content/drive/MyDrive/welsh-data'

# Directory where the images will be saved
image_dir = '/content/drive/MyDrive/welsh-data-images'
if not os.path.exists(image_dir):
    os.makedirs(image_dir)

# Function to extract all frames
def extract_all_frames(video_path, target_dir):
    # Load the video
    video = cv2.VideoCapture(video_path)

    frame_count = 0
    while True:
        success, frame = video.read()
        if not success:
            break

        frame_name = os.path.basename(video_path).split('.')[0] + f'_frame{frame_count:04d}.png'
        cv2.imwrite(os.path.join(target_dir, frame_name), frame)
        frame_count += 1

    video.release()
    return frame_count

# Iterate over all MP4 files in the directory
total_frame_count = 0
for video_file in glob.glob(os.path.join(video_dir, '*.mp4')):
    count = extract_all_frames(video_file, image_dir)
    total_frame_count += count
    print(f"Extracted {count} frames from {os.path.basename(video_file)}")

print(f"Total frames extracted from all videos: {total_frame_count}")


Extracted 1770 frames from participant_01_set_02.mp4
Extracted 2321 frames from participant_01_set_03.mp4
Extracted 3399 frames from participant_01_set_04.mp4
Extracted 4468 frames from participant_01_set_05.mp4
Extracted 3946 frames from participant_01_set_06.mp4
Extracted 2323 frames from participant_01_set_07.mp4
Extracted 1081 frames from participant_01_set_08.mp4
Extracted 1260 frames from participant_01_set_09.mp4
Extracted 1424 frames from participant_01_set_10.mp4
Extracted 1565 frames from participant_02_set_01.mp4
Extracted 1667 frames from participant_02_set_02.mp4
Extracted 2328 frames from participant_02_set_03.mp4
Extracted 3435 frames from participant_02_set_04.mp4
Extracted 4459 frames from participant_02_set_05.mp4
Extracted 3968 frames from participant_02_set_06.mp4
Extracted 2503 frames from participant_02_set_07.mp4
Extracted 1187 frames from participant_02_set_08.mp4
Extracted 1268 frames from participant_02_set_09.mp4
Extracted 1414 frames from participant_02_set_

KeyboardInterrupt: ignored

In [None]:
#length of videos in seconds

import cv2
import os
import glob

# Directory containing the videos
video_dir = '/content/drive/MyDrive/welsh-data'

# Function to get the length of the video in seconds
def get_video_length(video_path):
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)  # Frames per second
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    length = frame_count / fps
    video.release()
    return length

# Iterate over all video files in the directory
for video_file in glob.glob(os.path.join(video_dir, '*.mp4')):
    video_length = get_video_length(video_file)
    print(f"{os.path.basename(video_file)}: {video_length:.2f} seconds")


In [None]:
#split frames into train and test mixing all frames
#training and test can contain images from the same video or consecutive frames

import os
import glob
import shutil
import random

# Seed for random generator to ensure consistency
random.seed(42)

# Directories
image_dir = '/content/drive/MyDrive/welsh-data-images'
train_dir = '/content/drive/MyDrive/welsh-data-train-byimage'
test_dir = '/content/drive/MyDrive/welsh-data-test-byimage'

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get all frame file paths
all_frames = glob.glob(os.path.join(image_dir, '*.png'))

# Define split ratio (e.g., 80% for training, 20% for testing)
split_ratio = 0.8
split_index = int(len(all_frames) * split_ratio)

# Randomly select frames for the training set
train_frames = random.sample(all_frames, split_index)

# The rest of the frames will be for testing
test_frames = [frame for frame in all_frames if frame not in train_frames]

# Function to move files
def move_files(files, target_dir):
    for file in files:
        shutil.move(file, target_dir)

# Move files to respective directories
move_files(train_frames, train_dir)
move_files(test_frames, test_dir)

print(f"Total frames: {len(all_frames)}")
print(f"Training frames: {len(train_frames)}")
print(f"Testing frames: {len(test_frames)}")


In [None]:
#split images of videos of participants into train and test
#training and test canNOT contain images from the same video or consecutive frames

import os
import glob
import shutil
import random

# Seed for random generator to ensure consistency
random.seed(42)

# Directories
image_dir = '/content/drive/MyDrive/welsh-data-images'
train_dir = '/content/drive/MyDrive/welsh-data-train-byvideo'
test_dir = '/content/drive/MyDrive/welsh-data-test-byvideo'

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get all frame file paths
all_frames = glob.glob(os.path.join(image_dir, '*.png'))

# Extract participant numbers
participants = set([os.path.basename(frame).split('_')[1] for frame in all_frames])

# Randomly select 16 participants for training
train_participants = set(random.sample(participants, 16))

# The rest of the participants will be for testing
test_participants = participants - train_participants

# Function to move files
def move_files(participant_set, target_dir):
    for participant in participant_set:
        for frame in glob.glob(os.path.join(image_dir, f'*{participant}*')):
            shutil.move(frame, target_dir)

# Move files to respective directories
move_files(train_participants, train_dir)
move_files(test_participants, test_dir)

print(f"Training participants: {train_participants}")
print(f"Testing participants: {test_participants}")


In [None]:
#meta info creator
import os

def create_meta_info_txt(directory, output_file):
    """
    Create a text file containing the paths of all files in the specified directory.

    Parameters:
    directory (str): The directory to scan for files.
    output_file (str): The path to the output text file.
    """
    # Get a list of file paths in the directory
    file_paths = [os.path.join(directory, filename) for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))]

    # Write the file paths to the output file
    with open(output_file, 'w') as file:
        for path in file_paths:
            file.write(path + '\n')

    print(f"Meta information written to {output_file}")


In [None]:
create_meta_info_txt("./", "Real-ESRGAN/meta_info.txt")

In [None]:
#create model.yml file editing parameters

In [None]:
#train command

!python3 Real-ESRGAN/realesrgan/train.py -opt Real-ESRGAN/options/mri-wyw-model.yml --auto_resume
