In [24]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 
import shutil
import json
import re
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.utils.data as data_utils
from torch.nn.modules import MSELoss, L1Loss

import sklearn.preprocessing
from sklearn.preprocessing import MultiLabelBinarizer
import glob
import csv
import cv2
import random
from PIL import Image
from itertools import product

In [24]:
path1 = './Movie_Poster_Metadata/groundtruth'
temp_path = './Movie_Poster_Metadata/temp_groundtruth'
path2 = './Movie_Poster_Metadata/updated_groundtruth'

In [25]:
def reconstruct_metadata():
    dir_list = os.listdir(path1)

    if not os.path.exists(temp_path):
        os.makedirs(temp_path)

    if not os.path.exists(path2):
        os.makedirs(path2)
    else: 
        print('directories already exists. not cleaning metadata')
        return None

    #adding comma between json objects from path1
    for file_name in dir_list:
        with open(path1+ '/' + file_name, 'r', encoding='utf-16-le') as file1:
            temp_file = open(temp_path + '/' + file_name, 'w', encoding='utf-8')
            for line in file1.readlines():
                #adding comma at the end of each poster metdata for ease of seperation
                line = line.replace('}\n','},\n') 
                #read all lines starting with """_id"
                y = re.findall('^ \'_id\'', line)
                if not y:
                    temp_file.write(line)
        file1.close()
        temp_file.close()

    dir_list = os.listdir(temp_path)

    #create final json array from temp_file's json objects
    for file_name in dir_list:
        with open(temp_path + '/' + file_name, 'r',  encoding = 'utf-8') as temp_file:
            file2 = open(path2 + '/' + file_name, 'w', encoding = 'utf-8')
            lines = temp_file.readlines()
            lines = lines[1:-1] #removing movie id from each json object
            file2.write('[{')
            file2.writelines(lines)
            file2.write('}]')
        temp_file.close()
        file2.close()
    
    shutil.rmtree(temp_path) #delete temporary file

    return None        
                 

In [26]:
reconstruct_metadata()

directories already exists. not cleaning metadata


In [17]:
folder_directory = './Movie_Poster_Dataset'
folders = os.listdir(folder_directory)

In [115]:
# Create a new folder for augmented images
#add salt and pepper noisy image
augmented_folder = './Movie_Poster_Dataset_Augmented/'
os.makedirs(augmented_folder, exist_ok=True)

path3 = './Movie_Poster_Dataset/'

for dirname in os.listdir(path3):
    for filename in os.listdir(os.path.join(path3, dirname)):
        name, ext = os.path.splitext(filename)
        if ext == '.jpg':
            image = Image.open(os.path.join(path3, dirname, filename))
            img_array = np.array(image)

            noise_intensity = random.uniform(0.05, 0.2)

            salt_and_pepper = np.random.choice([0, 1, 2], 
                                               size=img_array.shape, 
                                               p=[1 - noise_intensity,
                                                  noise_intensity/2.0,
                                                  noise_intensity/2.0])

            noisy_image_array = img_array.copy()
            noisy_image_array[salt_and_pepper == 1] = 255  # Set salt pixels to white
            noisy_image_array[salt_and_pepper == 2] = 0    # Set pepper pixels to black

            # Save the augmented image
            new_filename = os.path.join(augmented_folder, f'{name}_salty.jpg')
            Image.fromarray(noisy_image_array).save(new_filename)


In [103]:
#drop pixels and add them to folder
path3 = './Movie_Poster_Dataset/'
augmented_folder = './Movie_Poster_Dataset_Augmented/'
os.makedirs(augmented_folder, exist_ok=True)

for dirname in os.listdir(path3):
    for filename in os.listdir(path3 + '/' + dirname):
        name, ext = os.path.splitext(filename)
        if ext == '.jpg':
            image = Image.open(os.path.join(path3 + '/' + dirname, filename))

            img_array = np.array(image)
            drop_percentage = random.uniform(0.2, 0.5)
            num_blocks = random.randint(200, 300)

            for _ in range(num_blocks):
                block_size_x = random.randint(5, 20)
                block_size_y = random.randint(5, 20)
                height, width, channels = img_array.shape
                start_x = random.randint(0, width - block_size_x)
                start_y = random.randint(0, height - block_size_y)

                # Randomly choose channels to set to 0
                channels_to_drop = random.sample(range(channels), random.randint(1, channels-1))

                img_array[start_y: start_y + block_size_y, start_x: start_x + block_size_x, channels_to_drop] = 0

            dropped_image = Image.fromarray(img_array.astype(np.uint8))

            #save augmented image
            new_filename = os.path.join(augmented_folder, f'{name}_dropped.jpg')
            dropped_image.save(new_filename)

In [119]:
#add distorted images to folder

augmented_folder = './Movie_Poster_Dataset_Augmented/'
os.makedirs(augmented_folder, exist_ok=True)
path3 = './Movie_Poster_Dataset/'

def apply_color_distortion(img_array):
    # Apply color distortion (changes in hue)
    hue_shift = random.randint(-50, 50)
    img_array = (img_array + hue_shift) % 256

    return img_array

for dirname in os.listdir(path3):
    for filename in os.listdir(path3 + '/' + dirname):
        name, ext = os.path.splitext(filename)
        if (ext == '.jpg'):
            image = Image.open(os.path.join(path3 + '/' + dirname, filename))

            img_array = np.array(image)

            #apply color distortion
            img_array = apply_color_distortion(img_array)

            # noise = np.random.normal(loc=0, scale =40, size = img_array.shape)
            # noisy_image_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)

            distorted_image = Image.fromarray(img_array.astype(np.uint8))

            #save augmented image
            new_filename = os.path.join(augmented_folder, f'{name}_distorted.jpg')
            distorted_image.save(new_filename)

In [120]:
#add rotation to images and add them to folder
path3 = './Movie_Poster_Dataset/'
augmented_folder = './Movie_Poster_Dataset_Augmented/'
os.makedirs(augmented_folder, exist_ok=True)

for dirname in os.listdir(path3):
    for filename in os.listdir(path3 + '/' + dirname):
        name, ext = os.path.splitext(filename)
        if (ext == '.jpg'):
                image = Image.open(os.path.join(path3 + '/' + dirname, filename))

                rotation_angle = random.uniform(-180,180)
                rotated_image = image.rotate(rotation_angle)

                # print('%s_rotated.jpg' %name)

                #save augmented image
                new_filename = os.path.join(augmented_folder, f'{name}_rotated.jpg')
                rotated_image.save(new_filename)