In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
data_dir = '../Video Location Identification/Data/'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'Val')
test_dir = os.path.join(data_dir, 'test')

cities = ['paris', 'moscow', 'cairo']

In [7]:
def make_frame_arrays(train_dir, frame_list):
    
    ##Function to store images as arrays given directory and filenames of images 
    
    shape = (224,224)
    
    frame_arr = []
    
    for images in frame_list:
        
        img = Image.open(os.path.join(train_dir, images))
        resized_image = np.array(img.resize(shape, Image.ANTIALIAS))
        
        frame_arr.append(resized_image)
        
    return np.array(frame_arr)

In [4]:
def sequencify(arr, sequence_size):
    if sequence_size <= 0:
        raise ValueError("Sequence size must be a positive integer.")

    if len(arr) < sequence_size:
        raise ValueError("Array size should be greater than or equal to the sequence size.")

    sequences = [arr[i:i + sequence_size] for i in range(len(arr) - sequence_size + 1)]
    return sequences

In [45]:
def make_dataset(path_dir, cities, data_seg):
    
    paris_dir = os.path.join(path_dir, cities[0])
    moscow_dir = os.path.join(path_dir, cities[1])
    cairo_dir = os.path.join(path_dir, cities[2])
    
    if data_seg == 'train':
        frame_nums = [x for x in range(0,18000,30)]
    
    elif data_seg == 'val':
        frame_nums = [x for x in range(0, 3000, 30)]
    
    elif data_seg == 'test':
        frame_nums = [x for x in range(0, 6000, 30)]
    else:
        raise Exception("data_seg argument must be either 'train', 'val', or 'test'")

    
    paris_frames = [cities[0]+'_frame'+str(x)+'.jpg' for x in frame_nums]
    moscow_frames = [cities[1]+'_frame'+str(x)+'.jpg' for x in frame_nums]
    cairo_frames = [cities[2]+'_frame'+str(x)+'.jpg' for x in frame_nums]
    
    paris = make_frame_arrays(paris_dir, paris_frames)
    moscow = make_frame_arrays(moscow_dir, moscow_frames)
    cairo = make_frame_arrays(cairo_dir, cairo_frames)
    
    paris_seq = np.array(sequencify(paris, 10))
    moscow_seq = np.array(sequencify(moscow, 10))
    cairo_seq = np.array(sequencify(cairo, 10))
    
    paris_labels = np.zeros(len(paris_seq))
    moscow_labels = np.ones(len(moscow_seq))
    cairo_labels = np.dot(np.ones(len(cairo_seq)), 2)
    
    features = np.vstack((np.vstack((paris_seq, moscow_seq)), cairo_seq))
    labels = np.hstack((np.hstack((paris_labels, moscow_labels)), cairo_labels))
    
    return features, labels

In [46]:
x_train, y_train = make_dataset(train_dir, cities, 'train')

  resized_image = np.array(img.resize(shape, Image.ANTIALIAS))


In [47]:
x_val, y_val = make_dataset(val_dir, cities, 'val')
x_test, y_test = make_dataset(test_dir, cities, 'test')

  resized_image = np.array(img.resize(shape, Image.ANTIALIAS))


In [49]:
np.save('x_train.npy', x_train)
np.save('y_train.npy', y_train)

np.save('x_val.npy', x_val)
np.save('y_val.npy', y_val)

np.save('x_test.npy', x_test)
np.save('y_test.npy', y_test)