# Setup Folders to train CNNs from data on imagedir

In [70]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

import numpy as np

import PIL

import scipy

import pickle

import pandas as pd

from shutil import copyfile

from skimage import transform

from PIL import Image

import matplotlib.pyplot as plt

import openpyxl

from random import random, shuffle

import time

Configure directories paths:

In [None]:
imagedir = '/tf/notebooks/collision_avoidance/data/2020-03-10/images/' #set the path of the images

In [3]:
def path_join(dirname, filenames):
    return [os.path.join(dirname, filename) for filename in filenames]

In [4]:
def load_images(image_paths):
    # Load the images from disk.
    images = [plt.imread(path) for path in image_paths]

    # Convert to a numpy array and return it.
    return np.asarray(images)

In [5]:
def load(filename):
   np_image = Image.open(filename)
   np_image = np.array(np_image).astype('float32')/255
   np_image = transform.resize(np_image, IMG_SHAPE)
   np_image = np.expand_dims(np_image, axis=0)
   return np_image

In [6]:
def generateDFprediction(filename, pred):
    df = pd.DataFrame(data=pred)
    df['file'] = filename
    # reorder file to be the first
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    return df

In [7]:
def loadOutputVectoresFile(checkfiledir, vectordir, vectorfile):
    cols_headings = ['xmin','xmax', 'ymin', 'ymax', 'zmin', 'zmax']
    df = pd.read_csv(vectordir + vectorfile, sep=" ", header=None , names=cols_headings)
    df['file'] = df.apply(lambda r: framename(checkfiledir,r,vectorfile),axis = 1)
    df['collision'] = df.apply(lambda r: hascollision(r),axis = 1)
    df = df[df.file != "error"]

    # move last two collums to the start
    cols = df.columns.tolist()
    cols = cols[-2:] + cols[:-2]
    
    # return data frame
    return df[cols]
    
    


# Delete images from folder

In [79]:
import os, shutil

def del_img_in_folder(folder):
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

In [80]:
# train data
folder_train_collision = '/tf/notebooks/collision_avoidance/data/images/train/collision'
del_img_in_folder(folder_train_collision)

folder_train_nocollision = '/tf/notebooks/collision_avoidance/data/images/train/no_collision'
del_img_in_folder(folder_train_nocollision)

# test data
folder_test_collision = '/tf/notebooks/collision_avoidance/data/images/test/collision'
del_img_in_folder(folder_test_collision)

folder_test_nocollision = '/tf/notebooks/collision_avoidance/data/images/test/no_collision'
del_img_in_folder(folder_test_nocollision)

## Load Excel and Copy files

In [9]:
once = 1

dataframes_path = '/tf/notebooks/collision_avoidance/data/2020-03-10/dataframes/' #set the path of excels with labels

#df2 = pd.read_excel('/tf/notebooks/collision_avoidance/data/2020-03-10/dataframes/video-00093.xlsx')

for file in os.listdir(dataframes_path): #iterator of the all the excels in the directory set in previous line
    filename = os.fsdecode(file) #gets the name of the current excel file
    df1 = pd.read_excel(dataframes_path + filename) #reads from the selected excel to panda
    if once: #in the first run our final panda is the same as the first read excel
        df2 = df1 #df2 will be our panda with the data from all the frames
        once = 0
    else:    
        df2 = pd.concat([df2, df1]) #on all the other runs concatenates the data read to the complete panda

df2 = df2.sort_values(by=['file'],ascending=True) #sorts the panda by column 'file' to order all
df2 = df2.set_index('file') # sets the index as the value of the file column

## Copy Last % of frames

In [43]:
total_images = df2.shape[0] #stors the total number of rows and decreases one that corresponds to the amount of images to annalize
target_path = '/tf/notebooks/collision_avoidance/data/images/' # + test or train  + collision or no collision
i = 0

PERCENTAGE = 0.05

for i in range(total_images): #iterator of all the images in the dataset
    
    img_name = df2.index[i]
    src = imagedir+img_name +'.png' #path to target image
    colision = df2.at[img_name,'collision']
        
    
    if random() < PERCENTAGE:
        test_or_train = 'test/'
    else:
        test_or_train = 'train/'
    
    
    
    if colision: # in the first run our final panda only has the values of the first image
        collision_or_not = 'collision/'
    else:
        collision_or_not = 'no_collision/'
        
    dst = target_path + test_or_train + collision_or_not + img_name +'.png'
    
    copyfile(src, dst)
        


## Copy Last frames of video

In [43]:
total_images = df2.shape[0] #stors the total number of rows and decreases one that corresponds to the amount of images to annalize
target_path = '/tf/notebooks/collision_avoidance/data/images/' # + test or train  + collision or no collision
i = 0

LAST_VIDEO_NUMBER = 86

for i in range(total_images): #iterator of all the images in the dataset
    
    img_name = df2.index[i]
    src = imagedir+img_name +'.png' #path to target image
    colision = df2.at[img_name,'collision']
    
    video_numb = img_name.split('-')[1].lstrip("0")
    
    
    if int(video_numb) > LAST_VIDEO_NUMBER:
        test_or_train = 'test/'
    else:
        test_or_train = 'train/'
    
    
    
    if colision: # in the first run our final panda only has the values of the first image
        collision_or_not = 'collision/'
    else:
        collision_or_not = 'no_collision/'
        
    dst = target_path + test_or_train + collision_or_not + img_name +'.png'
    
    copyfile(src, dst)
           


## Copy the SAME NUMBER of collisions and no collisions - % of frames

In [73]:
j = 0
collision    = []  # list that will store collision sequences and targets
no_collision = []  # list that will store NO collision sequences and targets

for row_values in df2.values:
    # init frame name
    frame_name = df2.index[j]
    j = j + 1
    
    # isColllision
    if row_values[0] == 1:
        collision.append(frame_name)
    else:
        no_collision.append(frame_name)

shuffle(collision)  # shuffle the collision
shuffle(no_collision)  # shuffle the no_collision!
lower = min(len(collision), len(no_collision))  # what's the shorter length?

collision = collision[:lower]  # make sure both lists are only up to the shortest length.
no_collision = no_collision[:lower]  # make sure both lists are only up to the shortest length.

print(f'arrays collision and no_collision ready with {lower} files')

arrays collision and no_collision ready with 3082 files


In [72]:
PERCENTAGE = 0.05
target_path = '/tf/notebooks/collision_avoidance/data/images/' # + test or train  + collision or no collision

def copy_from_array(images_names_array, isCollision):
    for img_name in images_names_array: #iterator of all the images in the dataset

        src = imagedir+img_name +'.png' #path to target image

        if random() < PERCENTAGE:
            test_or_train = 'test/'
        else:
            test_or_train = 'train/'

        if isCollision: # in the first run our final panda only has the values of the first image
            collision_or_not = 'collision/'
        else:
            collision_or_not = 'no_collision/'

        dst = target_path + test_or_train + collision_or_not + img_name +'.png'

        copyfile(src, dst)
        
## execute the copy_from_array on the col and no_col arrays
copy_from_array(collision,True)
copy_from_array(no_collision,False)

print('Filed copied with success')

## Copy the SAME NUMBER of collisions and no collisions - % of videos

In [81]:
j = 0
train_collision    = []  # TRAIN list that will store collision sequences and targets
train_no_collision = []  # TRAIN list that will store NO collision sequences and targets

test_collision    = []  # TRAIN list that will store collision sequences and targets
test_no_collision = []  # TRAIN list that will store NO collision sequences and targets

LAST_VIDEO_NUMBER = 86

for row_values in df2.values:
    # init frame name
    frame_name = df2.index[j]
    j = j + 1
    video_numb = frame_name.split('-')[1].lstrip("0")
    
    # Colllision
    if row_values[0] == 1:
        if int(video_numb) > LAST_VIDEO_NUMBER:
            test_collision.append(frame_name)
        else:
            train_collision.append(frame_name)
    # no Collision    
    else:
        if int(video_numb) > LAST_VIDEO_NUMBER:
            test_no_collision.append(frame_name)
        else:
            train_no_collision.append(frame_name)

            
## PROCESS TRAIN
shuffle(train_collision)  # shuffle the train collision
shuffle(train_no_collision)  # shuffle the train no_collision!
train_lower = min(len(train_collision), len(train_no_collision))  # what's the train shorter length?

train_collision = train_collision[:train_lower]  # make sure both lists are only up to the shortest length.
train_no_collision = train_no_collision[:train_lower]  # make sure both lists are only up to the shortest length.


## PROCESS TEST
shuffle(test_collision)  # shuffle the train collision
shuffle(test_no_collision)  # shuffle the train no_collision!
test_lower = min(len(test_collision), len(test_no_collision))  # what's the train shorter length?

test_collision = test_collision[:test_lower]  # make sure both lists are only up to the shortest length.
test_no_collision = test_no_collision[:test_lower]  # make sure both lists are only up to the shortest length.

print(f'TRAIN| arrays collision and no_collision ready with {train_lower} files')
print(f'TEST| arrays collision and no_collision ready with {test_lower} files')

TRAIN| arrays collision and no_collision ready with 3004 files
TEST| arrays collision and no_collision ready with 78 files


In [82]:
target_path = '/tf/notebooks/collision_avoidance/data/images/' # + test or train  + collision or no collision

def copy_from_array_vid(images_names_array, isCollision, isTest):
    for img_name in images_names_array: #iterator of all the images in the dataset

        src = imagedir+img_name +'.png' #path to target image

        if isTest:
            test_or_train = 'test/'
        else:
            test_or_train = 'train/'

        if isCollision: # in the first run our final panda only has the values of the first image
            collision_or_not = 'collision/'
        else:
            collision_or_not = 'no_collision/'

        dst = target_path + test_or_train + collision_or_not + img_name +'.png'

        copyfile(src, dst)
        
## execute the copy_from_array on the col and no_col arrays
copy_from_array_vid(train_collision,isCollision=True,isTest=False)
copy_from_array_vid(train_no_collision,isCollision=False,isTest=False)
copy_from_array_vid(test_collision,isCollision=True,isTest=True)
copy_from_array_vid(test_no_collision,isCollision=False,isTest=True)

print('Filed copied with success')

Filed copied with success
