In [4]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization#, CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard

import matplotlib.pyplot as plt
import numpy as np
from collections import deque
import random
import time
import pandas as pd
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

keras = tf.keras

# Set up this notebook variables
## Load your data from csv OR xlsx

In [5]:
IS_CSV = True

if IS_CSV:
    main_df_file = 'features_csv/MBV2-TrainedMB-1586455938.csv' # file name string
    loaded_df = pd.read_csv(main_df_file) # load fle sorted by filename
    loaded_df.dropna(inplace=True) # always dropna
else:
    main_df_file = 'result-merge.xlsx' # file name string
    loaded_df = pd.read_excel(main_df_file).sort_values(by=['file']) # load fle sorted by filename
    loaded_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
    loaded_df.dropna(inplace=True) # always dropna


## results will be saved with the printed name

In [6]:
SEQ_LEN = 25  # how long of a preceeding sequence to collect for RNN - around 1 second
FUTURE_PERIOD_PREDICT = 1  # how far into the future are we trying to predict?
NET_TO_PREDICT = "TrainnedMNLSTM" # Feature extractor used
EPOCHS = 20  # how many passes through our data
BATCH_SIZE = 32  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.

### Preprocess data frames. Input must be of the structure bellow

col 0        - file

col 1        - collision

col 2        - xmin

col 3        - xmax

col 4        - ymin

col 5        - ymax

col 6        - zmin

col 7        - zmax

col 8...1287 - feature values == col[8:-1]

col 1288     - target 

In [7]:
# col 0        - file
# col 1        - collision
# col 2        - xmin
# col 3        - xmax
# col 4        - ymin
# col 5        - ymax
# col 6        - zmin
# col 7        - zmax
# col 8...1287 - feature values == col[8:-1]
# col 1288     - target 
def preprocess_df(df):

    df.dropna(inplace=True)  # cleanup again... jic.

    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_frames = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in
    last_vid_id = '-1'
    
    for i in df.values:  # iterate over the values

        curr_video_id = i[0].split('-')[1]    
        if(curr_video_id != last_vid_id):
            prev_frames = deque(maxlen=SEQ_LEN) # reset prev_frame, because we started a new video     
        
        prev_frames.append([n for n in i[8:-1]])  # store all but the target
        if len(prev_frames) == SEQ_LEN:  # make sure we have the defined SEQ_LEN sequences!
            sequential_data.append([np.array(prev_frames), i[-1]])  # append those bad boys!
            
        last_vid_id = curr_video_id

    random.shuffle(sequential_data)  # shuffle for good measure.

    collision    = []  # list that will store collision sequences and targets
    no_collision = []  # list that will store NO collision sequences and targets

    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "no collision"
            no_collision.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            collision.append([seq, target])  # it's a collision!

    random.shuffle(collision)  # shuffle the collision
    random.shuffle(no_collision)  # shuffle the no_collision!

    lower = min(len(collision), len(no_collision))  # what's the shorter length?

    collision = collision[:lower]  # make sure both lists are only up to the shortest length.
    no_collision = no_collision[:lower]  # make sure both lists are only up to the shortest length.

    sequential_data = collision+no_collision  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (collision vs no_collision)

    return np.array(X), np.array(y)  # return X and y...and make X a numpy array!

# Load the Panda dataframe
### If you're playing with data, probably you just want to do this once because it may take some time

In [8]:
loaded_df

Unnamed: 0,file,collision,xmin,xmax,ymin,ymax,zmin,zmax,0,1,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,video-00001-frame-00001,0,0.0,0,0.0,0,0,0.0,0.000000,1.277455,...,0.084901,2.089694,0.000000,1.464460,0.066770,0.000000,1.162432,0.038809,0.000000,0.199287
1,video-00001-frame-00002,0,0.0,0,0.0,0,0,0.0,0.000000,1.279236,...,0.087343,2.106670,0.000000,1.482725,0.091790,0.000000,1.170342,0.042953,0.000000,0.157721
2,video-00001-frame-00003,0,0.0,0,0.0,0,0,0.0,0.006308,1.379721,...,0.019968,1.971704,0.000000,1.478081,0.060229,0.000000,1.038643,0.032718,0.000000,0.145420
3,video-00001-frame-00004,0,0.0,0,0.0,0,0,0.0,0.000000,1.531438,...,0.019317,1.825757,0.000000,1.364859,0.046398,0.000000,0.932802,0.030138,0.000000,0.218516
4,video-00001-frame-00005,0,0.0,0,0.0,0,0,0.0,0.000775,1.594032,...,0.276330,1.764018,0.100490,1.467184,0.007454,0.004712,1.383240,0.047734,0.000000,0.498166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18821,video-00093-frame-00270,1,1.0,0,0.0,0,0,1.0,0.135931,0.057399,...,1.814514,0.098220,0.071962,0.535567,0.176611,0.049225,0.000000,0.000000,0.000000,0.327920
18822,video-00093-frame-00271,1,1.0,0,0.0,0,0,1.0,0.424963,0.231844,...,2.448182,0.035836,0.771066,0.313978,0.010759,0.304636,0.015674,0.000000,0.000000,0.706399
18823,video-00093-frame-00272,1,1.0,0,0.0,0,0,1.0,0.098345,0.399402,...,1.670298,0.000000,0.366020,0.734022,0.035202,0.076421,0.000000,0.000000,0.000000,0.295819
18824,video-00093-frame-00273,1,1.0,0,0.0,0,0,1.0,0.254651,0.301734,...,1.347202,0.000000,0.124113,0.420140,0.118618,0.000000,0.000000,0.000000,0.001419,1.027279


# Prepare training and validation data

In [9]:
loaded_df['target'] = loaded_df['collision'].shift(-FUTURE_PERIOD_PREDICT) ## target is a future collision
loaded_df.dropna(inplace=True) # always dropna

## here, split away some slice of the future data from the main main_df.
times = sorted(loaded_df.index.values)
 # TODO : Get a better way to calculate the last 5 percent (maybe by file name??)
last_5pct = sorted(loaded_df.index.values)[-int(0.05*len(times))]

## TO FIND THE LAST_5PCT the loaded_df was analyzed and it was the found at 17635; in the following version, code should be done to find this value
last_5pct = 17635

validation_df = loaded_df[(loaded_df.index >= last_5pct)]
main_df = loaded_df[(loaded_df.index < last_5pct)]
#main_df = loaded_df

In [13]:
train_x[1:].shape

(5849, 25, 1280)

In [11]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_df)

print(f"TRAIN No collision: {np.count_nonzero(train_y == 0)}, collision: {np.count_nonzero(train_y == 1)}")
print(f"VALIDATION No collision: {np.count_nonzero(validation_y == 0)}, collision: {np.count_nonzero(validation_y == 1)}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


TRAIN No collision: 2925, collision: 2925
VALIDATION No collision: 70, collision: 70


In [39]:
def plot_results(history):
    acc = history.history['sparse_categorical_accuracy']
    val_acc = history.history['val_sparse_categorical_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    fig = plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1).grid()
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')

    plt.ylim([0, 1])
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')

    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.ylim([0, 1.0])
    plt.legend(loc='upper right')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid()
    pdffilename = f'/tf/notebooks/collision_avoidance/Figures/trainned/{NAME}-50.50-pervideo-ud.pdf'
    fig.savefig(pdffilename, bbox_inches='tight')
    #plt.show()
    plt.close(fig) # so the figure is not displayed

In [40]:
L1s = [16] #[16,8] # 64,32, 16, 8, 4, 2, 1
L2s = [16] #[16,8,4]
L3s = [16] #[16,8,4]
D1s = [16] #[16,8]
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
results = []
run_opts = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom = True)

for L1 in L1s:
    for L2 in L2s:
        for L3 in L3s:
            for D1 in D1s:

                NAME = f"{NET_TO_PREDICT}-L1-{L1}-L2-{L2}-L3-{L3}-D1-{D1}-SEQ-{SEQ_LEN}-PRED-{FUTURE_PERIOD_PREDICT}-{int(time.time())}"
                #print(NAME)

                model = Sequential()
                model.add(LSTM(L1, input_shape=(train_x.shape[1:]), activation='relu', return_sequences=True))
                model.add(Dropout(0.5))
                model.add(BatchNormalization())

                model.add(LSTM(L2, activation='relu', return_sequences=True))
                model.add(Dropout(0.5))
                model.add(BatchNormalization())

                model.add(LSTM(L3))
                model.add(Dropout(0.4))
                model.add(BatchNormalization())

                model.add(Dense(D1, activation='relu'))
                model.add(Dropout(0.2))

                model.add(Dense(2, activation='softmax'))


                opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

                # Compile model
                model.compile(
                    loss='sparse_categorical_crossentropy',
                    optimizer=opt,
                    metrics=['sparse_categorical_accuracy']
                )

                # tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

                #filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
                # checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

                # Train model
                history = model.fit(
                    train_x, train_y,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(validation_x, validation_y),
                    verbose = 0,
                    options = run_opts,
                    #callbacks=[tensorboard],
                )

                # Score model
                score = model.evaluate(validation_x, validation_y, verbose=0)
                #print('Test loss:', score[0])
                #print('Test accuracy:', score[1])
                acc = history.history['sparse_categorical_accuracy']
                val_acc = score[1]
                loss = history.history['loss']
                val_loss = score[0]
                # NAME - L1 - L2 - L3 - D1 - acc - val_acc - loss - val_loss
                resultline = f'{NAME},{L1},{L2},{L3},{D1},{acc[19]},{val_acc},{loss[19]},{val_loss}'
                print(resultline)
                results.append(resultline)
                # Save model
                model.save("models/{}".format(NAME))
                # plot results and save movel
                plot_results(history)

                
# SAVE RESULTS
file = open(f'results_vars/{NAME}.txt', 'w')
file.write('\n'.join(results))
file.close()

TrainnedMNLSTM-L1-16-L2-16-L3-16-D1-16-SEQ-25-PRED-1-1589555744,16,16,16,16,0.9834188222885132,0.7571428418159485,0.05719286575913429,0.5034038424491882
INFO:tensorflow:Assets written to: models/TrainnedMNLSTM-L1-16-L2-16-L3-16-D1-16-SEQ-25-PRED-1-1589555744/assets


In [41]:
results

['TrainnedMNLSTM-L1-16-L2-16-L3-16-D1-16-SEQ-25-PRED-1-1589555744,16,16,16,16,0.9834188222885132,0.7571428418159485,0.05719286575913429,0.5034038424491882']

In [43]:
train_x.shape

(5850, 25, 1280)