# DATASET

## Raw image data checking

In [84]:
import numpy as np
from numpy import asarray
from numpy import save
from numpy import load

from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

from PIL import Image

import io

import os
from os import makedirs
from os import listdir

import shutil
from shutil import copyfile


import random
from random import seed
from random import random

from matplotlib import pyplot
from matplotlib.image import imread

In [None]:
# define location of dataset
folder = 'data/train/'
# plot first few images
for i in range(9):
    # define subplot
    pyplot.subplot(330 + 1 + i)
    # define filename
    filename = folder + 'river_' + str(i) + '.png'
    # load image pixels
    image = imread(filename)
    # plot raw pixel data
    pyplot.imshow(image)
# show the figure
pyplot.show()

In [None]:
# define location of dataset
folder = 'data/train/'
tr_images, tr_labels = [], []
# enumerate files in the directory
for file in listdir(folder):
    # determine class
    if file.startswith('city'):
        output = 0.0
    elif file.startswith('park'):
        output = 1.0
    elif file.startswith('river'):
        output = 2.0
    elif file.startswith('seaside'):
        output = 3.0
    # load image
    #with open(path, 'rb') as f:
        tr_image = load_img(folder + file, target_size=(224, 224))
    # convert to numpy array
        tr_image = img_to_array(tr_image)
    # store
        tr_images = np.append(tr_images,tr_image)
               
        tr_labels = np.append(tr_labels, output)
        
# convert to a numpy arrays
tr_images = asarray(tr_images)
tr_labels = asarray(tr_labels)
print(tr_images.shape, tr_labels.shape)
# save the reshaped images
save('train_running_routes.npy', tr_images)
save('train_running_routes.npy', tr_labels)

In [None]:
# test dataset
folder = 'data/test/'
te_images, te_labels = [], []

for file in listdir(folder):
    # determine class
    if file.startswith('city'):
        output = 0.0
    elif file.startswith('park'):
        output = 1.0
    elif file.startswith('river'):
        output = 2.0
    elif file.startswith('seaside'):
        output = 3.0

        te_image = load_img(folder + file, target_size=(224, 224))
        te_image = img_to_array(te_image)
        te_images = np.append(te_images,te_image)            
        te_labels = np.append(te_labels, output)
        
# convert to a numpy arrays
te_images = asarray(te_images)
te_labels = asarray(te_labels)
print(te_images.shape, te_labels.shape)
# save the reshaped images
save('test_running_routes.npy', te_images)
save('test_running_routes.npy', te_labels)

In [None]:
#load and confirm the shape
#from numpy import load
images = load('train_running_routes.npy')
labels = load('train_running_routes.npy')
print(images.shape, labels.shape)

In [None]:
images = load('test_running_routes.npy')
labels = load('test_running_routes.npy')
print(images.shape, labels.shape)

### Creating Dataset Directories 

In [None]:
#creating directories
dataset_home = 'running_routes/'
subdirs = ['train/', 'val/', 'test/']
for subdir in subdirs:
    #create label subdicretories:
    labeldirs = ['city_run/','park_run/', 'riverside_run/', 'seaside_run/']
    for lbldir in labeldirs:
        newdir = dataset_home + subdir + lbldir
        makedirs(newdir, exist_ok=True)

### Train & Validation Dataset

In [None]:
# seed random number generator
import random
from random import seed
seed(1)
# define ratio of picture to use for validation
val_ratio = 0.2
# copy training dataset iages into subdirectories
src_directory = 'data/train/'
for file in listdir(src_directory):
    src = src_directory + '/' + file
    dst_dir = 'train/'
    if random.random() < val_ratio:
        dst_dir = 'val/'
    if file.startswith('city'):
        dst_dir = dataset_home + dst_dir + 'city_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('park'):
        dst_dir = dataset_home + dst_dir + 'park_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('river'):
        dst_dir = dataset_home + dst_dir + 'riverside_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('seaside'):
        dst_dir = dataset_home + dst_dir + 'seaside_run/' + file
        copyfile(src, dst_dir)

### Test Dataset

In [None]:
src_directory = 'data/test/'
for file in listdir(src_directory):
    src = src_directory + '/' + file
    dst_dir = 'test/'
    if file.startswith('city'):
        dst_dir = dataset_home + dst_dir + 'city_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('park'):
        dst_dir = dataset_home + dst_dir + 'park_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('river'):
        dst_dir = dataset_home + dst_dir + 'riverside_run/' + file
        copyfile(src, dst_dir)
    elif file.startswith('seaside'):
        dst_dir = dataset_home + dst_dir + 'seaside_run/' + file
        copyfile(src, dst_dir)

### Final Train (train + validation) Dataset

In [None]:
dataset_home = 'running_routes/'
subdirs = ['final_train/']
for subdir in subdirs:
    #create label subdicretories:
    labeldirs = ['city_run/','park_run/', 'riverside_run/', 'seaside_run/']
    for lbldir in labeldirs:
        newdir = dataset_home + subdir + lbldir
        makedirs(newdir, exist_ok=True)

In [None]:
import os
import shutil
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/train/city_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/city_run')
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/train/seaside_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/seaside_run')
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/train/park_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/park_run')  
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/train/riverside_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/riverside_run')



In [None]:
import os
import shutil
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/val/city_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/city_run')
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/val/seaside_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/seaside_run')
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/val/park_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/park_run')  
for root, dirs, files in os.walk('/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/val/riverside_run'):  
    for file in files:
        path_file = os.path.join(root,file)
        shutil.copy2(path_file,'/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/riverside_run')

In [None]:
final_train = '/Users/carlamoestafa/Documents/GitHub/project-5/running_routes/final_train/city_run'

count = 0
for path in os.listdir(final_train):
    if os.path.isfile(os.path.join(final_train, path)):
        count += 1
count