In [1]:
import os
from PIL import Image
from pathlib import Path
import pandas as pd

from config_file import config
from dataset import RetroGames

In [2]:
dataset_path = config.DATASET_PATH
train_captions_path = dataset_path / "train_captions.txt"
test_captions_path = dataset_path / "test_captions.txt"

retro_test = RetroGames(dataset_path / "test", test_captions_path)
retro_train = RetroGames(dataset_path / "train", train_captions_path)

print(" === TEST === ")
for game in retro_test.get_games():
    print(f"{game}: {len(retro_test.get_frames(game))} frames")

print()
print(" === TRAIN === ")
for game in retro_train.get_games():
    print(f"{game}: {len(retro_train.get_frames(game))} frames")

 === TEST === 
Bayonetta: 630 frames
Call of Juarez_ Bound In Blood: 681 frames
Demon's Souls: 392 frames
F.E.A.R_: 563 frames
Kingdom Hearts II Final Mix: 849 frames
Ninja Gaiden Black: 745 frames
Prototype: 378 frames
The Lord of the Rings_ War in the North: 435 frames
The Saboteur: 702 frames
Trine: 361 frames
Yakuza 3: 780 frames

 === TRAIN === 
Assassin_s Creed: 327 frames
Assassin_s Creed II: 779 frames
Batman_ Arkham Asylum: 296 frames
BioShock: 141 frames
Burnout 3_ Takedown: 631 frames
Call of Duty 4_ Modern Warfare: 547 frames
Company of Heroes: 89 frames
Crysis: 417 frames
Dead Space: 193 frames
Deus Ex_ Invisible War: 298 frames
Devil May Cry 2: 83 frames
Devil May Cry 3_ Dante_s Awakening: 193 frames
Devil May Cry 4: 630 frames
Dragon Age_ Origins: 557 frames
Final Fantasy X HD Remaster: 321 frames
God of War: 145 frames
Grand Theft Auto_ San Andreas: 570 frames
Grand Theft Auto_ Vice City: 548 frames
Guild Wars: 140 frames
Half-Life: 92 frames
Half-Life 2: 632 frames
Hal

In [3]:
output_dir = config.IMAGES_PATH / "retro-games-shorts"
train_dir = output_dir / "train"
test_dir = output_dir / "test"
metadata_filepath_csv = output_dir / "metadata.csv"

In [4]:
test_games = ["Bayonetta"]
train_games = ["Devil May Cry 2", "Prince of Persia_ Warrior Within", "BioShock"]

max_size = 200

metadata = []

for game in test_games:
    frames = retro_test.get_frames(game)[:max_size]
    captions = [retro_test.get_caption(frame) for frame in frames]

    os.makedirs(output_dir / "test" / game, exist_ok=True)

    for frame in frames:
        frame_name = frame.parts[-1]

        caption = retro_test.get_caption(frame)
        rel_frame_path = str(Path(os.path.join(*frame.parts[-3:]))) # Getting relative path from metadata.txt directory...
        metadata.append([rel_frame_path, caption])

        if not (output_dir / "test" / game / frame_name).exists:
            img = retro_test.get_image_frame(frame)
            img.save(output_dir / "test" / game / frame_name)

for game in train_games:
    frames = retro_train.get_frames(game)[:max_size]
    captions = [retro_train.get_caption(frame) for frame in frames]

    os.makedirs(output_dir / "train" / game, exist_ok=True)
    
    for frame in frames:
        frame_name = frame.parts[-1]

        caption = retro_train.get_caption(frame)
        rel_frame_path = str(Path(os.path.join(*frame.parts[-3:]))) # Getting relative path from metadata.txt directory...
        metadata.append([rel_frame_path, caption])

    if not (output_dir / "train" / game / frame_name).exists:
        img = retro_train.get_image_frame(frame)
        img.save(output_dir / "train" / game / frame_name)

In [None]:
# df = pd.DataFrame(metadata, columns=["file_name", "caption"])
# df.to_csv(metadata_filepath_csv, sep="\t", index=False)

In [8]:
big_metadata = []

with open(dataset_path / "test_captions.txt") as f:
    for line in f:
        file_name, caption = line.split("\t")
        file_name = str(Path(os.path.join(*frame.parts[-3:])))
        caption = caption.strip()

        big_metadata.append([file_name, caption])

with open(dataset_path / "train_captions.txt") as f:
    for line in f:
        file_name, caption = line.split("\t")
        file_name = str(Path(os.path.join(*frame.parts[-3:])))
        caption = caption.strip()

        big_metadata.append([file_name, caption])

In [None]:
# df = pd.DataFrame(big_metadata, columns=["file_name", "caption"])
# df.to_csv(dataset_path / "metadata.csv", sep="\t", index=False)