In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import r2_score

In [None]:
image_dir = Path("../data/customdata")

file = "data/train_flow.txt"

speeds_list = []

with open(file) as f:
    for line in f: # read rest of lines
        speeds_list.append(float(line))

print(len(speeds_list))

In [None]:
filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)

speeds = pd.Series(speeds_list,name="Speed").astype(np.float32)

images = pd.concat([filepaths, speeds], axis=1).sample(frac=1.0, random_state=1).reset_index(drop=True)

print(images)

In [11]:
# Let's only use 5000 images to speed up training time
# image_df = images.sample(5000, random_state=1).reset_index(drop=True)

# Using all 20,399 images
image_df = images

train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

In [12]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [None]:
train_df

In [None]:
#     target_size=(height, width)

train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

In [None]:
inputs = tf.keras.Input(shape=(60, 80, 3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

In [None]:
predicted_speeds = np.squeeze(model.predict(test_images))
true_speeds = test_images.labels

rmse = np.sqrt(model.evaluate(test_images, verbose=0))
print("     Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(true_speeds, predicted_speeds)
print("Test R^2 Score: {:.5f}".format(r2))

In [None]:
null_rmse = np.sqrt(np.sum((true_speeds - np.mean(true_speeds))**2) / len(true_speeds))
print("Null/Baseline Model Test RMSE: {:.5f}".format(null_rmse))