In [10]:
import os
import subprocess
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# import imageio.v3 as iio
# from pymediainfo import MediaInfo

In [88]:
anime = 'fma'
ep_path = f"mta_data/episodes/{anime}/"
abs_ep_path = os.path.abspath(f"mta_data/episodes/{anime}/")
episodes = [os.path.join(ep_path, file) for file in os.listdir(ep_path)][2]

# media_info = MediaInfo.parse(episodes[0])
# duration_sec = media_info.tracks[0].duration / 1000
# frame_rate = media_info.video_tracks[0].frame_rate

In [46]:
episodes[2]

'mta_data/episodes/fma/fma_04.mkv'

In [91]:
# Iframes and Pframes
filename = episodes

def get_frame_types(video_fn):
	command = 'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1'.split()
	out = subprocess.check_output(command + [video_fn]).decode()
	frame_types = out.replace('pict_type=','').split()
	return zip(range(len(frame_types)), frame_types)

def save_i_keyframes(video_fn):
	frame_types = get_frame_types(video_fn)
	i_frames = [x[0] for x in frame_types if x[1]=='I']
	if i_frames:
		basename = os.path.splitext(os.path.basename(video_fn))[0]
		cap = cv2.VideoCapture(video_fn)
		for frame_no in i_frames:
			cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
			ret, frame = cap.read()
			outname = os.path.join(f"mta_data/frames/{anime}/Iframes_greys", f"{basename}_i_frame_{str(frame_no)}.jpg")
			frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
			cv2.imwrite(outname, frame)
		cap.release()
	else:
		print ('No I-frames in '+video_fn)

if __name__ == '__main__':
	save_i_keyframes(filename)
# Pframes: 8m, Iframes: 1m

In [89]:
from decord import VideoReader

vr = VideoReader(episodes)
key_indices = vr.get_key_indices()
key_frames = vr.get_batch(key_indices)
for index, frame in enumerate(key_frames.asnumpy()):
	save_path = os.path.join(f"mta_data/frames/{anime}/Iframes_greys", f"fma_key_frame_{index}.jpg")
	cv2.imwrite(save_path, cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))

In [16]:
import av

content = episodes
with av.open(content) as container:
	# Signal that we only want to look at keyframes.
	stream = container.streams.video[0]
	stream.codec_context.skip_frame = "NONREF"

	for frame in container.decode(stream):

		# We use `frame.pts` as `frame.index` won't make must sense with the `skip_frame`.
		save_path = os.path.join(f"mta_data/frames/{anime}/av_NONREF_greys", f"fma_key_frame_{frame.pts}.jpg")
		# frame.to_image().save(save_path)
		
		 # Convert the frame to grayscale.
		gray_frame = frame.to_image().convert("L")
		gray_frame.save(save_path)


In [5]:
import imagehash
from PIL import Image

def with_ztransform_preprocess(hashfunc, hash_size=8):
	def function(path):
		image = Image.open(path)
		image = image.convert("L").resize((hash_size, hash_size), Image.Resampling.LANCZOS)
		data = image.getdata()
		quantiles = np.arange(100)
		quantiles_values = np.percentile(data, quantiles)
		zdata = (np.interp(data, quantiles_values, quantiles) / 100 * 255).astype(np.uint8)
		image.putdata(zdata)
		return hashfunc(image)
	return function

dhash_z_transformed = with_ztransform_preprocess(imagehash.dhash, hash_size = 8)
out_folder = f"mta_data/frames/{anime}/Pframes_filtered"
pics = [os.path.join(out_folder, file) for file in os.listdir(out_folder)]
hashes = [dhash_z_transformed(pic) for pic in pics]

df = pd.DataFrame({"image_ids": pics, "hash_values": hashes, })
df_clean = df[df.duplicated(['hash_values'], keep=False)]
for image in df_clean["image_ids"].values:
	os.remove(image)

### Panel segmentation

In [3]:
anime = 'fma'
manga_path = f"mta_data/manga/{anime}/"
manga = [os.path.join(manga_path, file) for file in os.listdir(manga_path)]
test = manga[6:11]

In [18]:
import numpy as np
import cv2

def alternative_extractor(im_path, save_path, panel_percent=0.05):
	img = cv2.imread(im_path)
	imgrey = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
	im_y = imgrey.shape[0]
	im_x = imgrey.shape[1]

	ret,thresh1 = cv2.threshold(imgrey,127,255,cv2.THRESH_BINARY_INV)
	contours,heirarchy = cv2.findContours(thresh1,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)

	for contour in contours:
		hull = cv2.convexHull(contour)
		cv2.drawContours(imgrey,[hull],-1,0,-1)

	ret,thresh1 = cv2.threshold(imgrey,127,255,cv2.THRESH_BINARY_INV)
	contours,heirarchy = cv2.findContours(thresh1,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)

	# Sort the contours by their y coordinate
	sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[1])

	panel_index = 0
	for contour in sorted_contours:
		# Find the bounding rectangle of the contour
		x, y, w, h = cv2.boundingRect(contour)

		# Crop the image using Numpy slicing
		panel = img[y:y+h, x:x+w]
		panel_y = panel.shape[0]
		panel_x = panel.shape[1]

		pp = panel_percent
		if (panel_y > pp * im_y) and (panel_x > pp * im_x):
			# Save the cropped panel image, using the current panel index
			output_path = os.path.join(save_path, f'panel_{panel_index}.png')
			cv2.imwrite(output_path, panel)
			panel_index += 1

In [19]:
from kumiko import kumikolib

def kumiko_extractor(im_path, save_path, white_thresh=95):
	k = kumikolib.Kumiko()
	# info = k.parse_images(test)
	info = k.parse_image(im_path)
	panels = info["panels"]
	img = cv2.imread(im_path, cv2.IMREAD_GRAYSCALE)

	for index, panel in enumerate(panels):
		crop_img = img[panel[1]:panel[1]+panel[3], panel[0]:panel[0]+panel[2]]

		white = np.sum(crop_img >= 254)
		total_pix = crop_img.shape[0] * crop_img.shape[1]
		white_percent = (white / total_pix) * 100
		if (white_percent < white_thresh):
			output_path = os.path.join(save_path, f'panel_{index}.png')
			cv2.imwrite(output_path, crop_img)
		else:
			return 0
	return 1


In [None]:
im_path = test[3]
save_path = f"mta_data/panels/{anime}"

if not kumiko_extractor(im_path, save_path, white_thresh=95):
	alternative_extractor(im_path, save_path, panel_percent=0.05)

### panel-anime correlation

In [16]:
anime = 'fma'
frame_path = f"mta_data/frames/{anime}/Pframes_greys"
anime_frames = [os.path.join(frame_path, file) for file in os.listdir(frame_path)]

In [None]:
import imagehash
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def with_ztransform_preprocess(hashfunc, image, hash_size=8):
	image = image.convert("L").resize((hash_size, hash_size), Image.Resampling.LANCZOS)
	data = image.getdata()
	quantiles = np.arange(100)
	quantiles_values = np.percentile(data, quantiles)
	zdata = (np.interp(data, quantiles_values, quantiles) / 100 * 255).astype(np.uint8)
	image.putdata(zdata)

	hash = hashfunc(image)

	return hash

def compare_panel_to_frames(hashfunc, panel, frame_hashes, anime_frames, hash_size=8):
	panel_hash = with_ztransform_preprocess(hashfunc, panel, hash_size)

	panel_distances = []
	# Compare the manga panel to each anime frame
	for frame_hash in frame_hashes:
		distance = panel_hash - frame_hash
		panel_distances.append(distance)
	# Find the index of the minimum distance for the current manga panel
	min_index = panel_distances.index(min(panel_distances))
	# The min_index will be the index of the anime frame that is most similar to the current manga panel
	most_similar_frame = anime_frames[min_index]
	# Return the most similar frame and the minimum distance
	return (most_similar_frame, min(panel_distances))

panels = ['mta_data/panels/fma/panel_4.jpg']

# Calculate the hash function for the anime frames using pHash
hashfunc = imagehash.phash

# Calculate the hashes of the anime frames
frame_hashes = []
for frame_path in anime_frames:
	frame = Image.open(frame_path)
	frame_hash = with_ztransform_preprocess(hashfunc, frame)
	frame_hashes.append(frame_hash)

# Initialize an empty list to store the distances
distances = []

# Iterate over the manga panels
for panel_path in panels:
	panel = Image.open(panel_path)
	# Compare the manga panel to the anime frames
	result = compare_panel_to_frames(hashfunc, panel, frame_hashes, anime_frames)
	# Append the result to the distances list
	distances.append(result)

img = mpimg.imread(distances[0][0])
imgplot = plt.imshow(img, cmap='gray')
plt.show()
distances

In [None]:
anime_frame = cv2.imread(anime_frame_path)
frame_height, frame_width, _ = anime_frame.shape
manga_panel_gray = cv2.resize(manga_panel_gray, (frame_width, frame_height))

In [17]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image

def calculate_mean_std(image_paths, my_batch_size=32):
	# Define a dataset that reads the images one at a time
	class ImageDataset(torch.utils.data.Dataset):
		def __init__(self, image_paths, transform=None):
			self.image_paths = image_paths
			self.transform = transform

		def __getitem__(self, index):
			image = Image.open(self.image_paths[index])
			if self.transform:
				image = self.transform(image)
			return image

		def __len__(self):
			return len(self.image_paths)

	# Define a transform that converts the images to tensors
	transform = transforms.Compose([
		transforms.ToTensor(),
	])

	# Create a dataset and a dataloader
	dataset = ImageDataset(image_paths, transform=transform)
	dataloader = DataLoader(dataset, batch_size=my_batch_size, num_workers=4)

	# Calculate mean and std
	mean = 0.0
	std = 0.0
	count = 0
	for images in dataloader:
		images = images.view(images.size(0), -1)
		mean += images.mean(dim=1).sum()
		std += images.std(dim=1).sum()
		count += images.size(0)
	mean /= count
	std /= count

	return mean, std

image_paths = anime_frames
mean, std = calculate_mean_std(image_paths)

In [18]:
import cv2
import os
import spatial
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

def resize(img, size):
	# Check if the image is a tensor
	if isinstance(img, torch.Tensor):
		# Remove the extra dimensions if the image is 4D
		if img.ndim == 4:
			img = img.squeeze(0)
		# Convert the tensor to a PIL image
		img = transforms.functional.to_pil_image(img)
	# Resize the image
	img = img.resize(size)
	return img

def get_feature_vector(img, model, mean_std):
	# Resize the image to the input size of the model
	img = resize(img, (224, 224))
	# Convert the image to a tensor and normalize it using the model's mean and std
	img_tensor = transforms.functional.to_tensor(img)
	if img_tensor.ndim == 4:
		# Remove the extra dimensions
		img_tensor = img_tensor.squeeze(0)
	img_tensor = img_tensor.view(1, 3, 224, 224)
	img_tensor = transforms.functional.normalize(img_tensor, mean=mean_std[0], std=mean_std[1])
	# Extract the features from the image using the model
	features = model(img_tensor).detach().numpy()
	return features

def calculate_similarity(vector1, vector2):
	# Convert the vectors to PyTorch tensors
	vector1 = torch.tensor(vector1)
	vector2 = torch.tensor(vector2)
	
	# Flatten the tensors to 1D
	vector1 = vector1.view(-1)
	vector2 = vector2.view(-1)
	
	# Calculate the dot product and norms of the two feature vectors
	dot_product = torch.dot(vector1, vector2)
	norm1 = torch.norm(vector1)
	norm2 = torch.norm(vector2)
	
	# Calculate the cosine similarity between the two feature vectors
	similarity = dot_product / (norm1 * norm2)
	return similarity.tolist()


# Define the mean and standard deviation values for ImageNet
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]
# Define the mean and standard deviation values from my images
mean_std = (mean, std)

# Create the normalization transform
transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize(mean=mean, std=std)
])

# Load the ResNet50 model from PyTorch's model zoo
model = torchvision.models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
model.eval()

# Extract the features from the model's second-to-last fully-connected layer
model = torch.nn.Sequential(*list(model.children())[:-1])

# Read the anime frame images
frame_paths = anime_frames

# Extract the features for all anime frames
frame_features_list = []
for i, frame_path in enumerate(frame_paths):
	frame = cv2.imread(frame_path)
	frame = transform(frame).unsqueeze(0)
	frame_features = get_feature_vector(frame, model, mean_std)
	frame_features_list.append(frame_features)
	# print(f"{i} frames.")

frame_features_list_squeezed = [np.squeeze(f) for f in frame_features_list]

# 11 min
# change my normalization technique using this MinMaxScaler

In [19]:
# Read the manga panel image
panel_path = 'mta_data/panels/fma/panel_4.png'
panel = cv2.imread(panel_path)
panel = transform(panel).unsqueeze(0)
panel_features = get_feature_vector(panel, model, mean_std)

# Initialize the most similar frame and its similarity score
most_similar_frame = None
highest_similarity = -1

# Compare the manga panel with all anime frames
for i, frame_features in enumerate(frame_features_list):
	similarity = calculate_similarity(panel_features, frame_features)
	if similarity > highest_similarity:
		highest_similarity = similarity
		most_similar_frame = frame_paths[i]

# Print the most similar frame and its similarity score
print(f"Most similar frame for panel {panel_path}, {most_similar_frame} (similarity: {highest_similarity:.2f})")

Most similar frame for panel mta_data/panels/fma/panel_4.png, mta_data/frames/fma/Pframes_greys/fma_04_i_frame_8888.jpg (similarity: 0.58)
