In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import r2_score

2022-09-16 02:49:50.019989: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
image_dir = Path("../data/customdata")

file = "data/train_flow.txt"

speeds_list = []

with open(file) as f:
    for line in f: # read rest of lines
        speeds_list.append(float(line))

print(len(speeds_list))

20399


In [3]:
filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)

speeds = pd.Series(speeds_list,name="Speed").astype(np.float32)

images = pd.concat([filepaths, speeds], axis=1).sample(frac=1.0, random_state=1).reset_index(drop=True)

print(images)

                           Filepath      Speed
0      ../data/customdata/12843.jpg   1.895863
1       ../data/customdata/4230.jpg   1.142319
2      ../data/customdata/13569.jpg  14.314065
3       ../data/customdata/6695.jpg   8.651502
4        ../data/customdata/556.jpg   1.904211
...                             ...        ...
20394  ../data/customdata/14064.jpg   7.032850
20395   ../data/customdata/4368.jpg   8.477479
20396  ../data/customdata/15206.jpg  22.861872
20397  ../data/customdata/15351.jpg   8.200358
20398  ../data/customdata/17670.jpg  22.412535

[20399 rows x 2 columns]


In [4]:
# Let's only use 5000 images to speed up training time
# image_df = images.sample(5000, random_state=1).reset_index(drop=True)

# Using all 20,399 images
image_df = images

train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

In [5]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [6]:
train_df

Unnamed: 0,Filepath,Speed
15178,../data/customdata/4522.jpg,2.648524
9203,../data/customdata/19060.jpg,22.169050
13695,../data/customdata/10655.jpg,22.558355
7124,../data/customdata/16825.jpg,4.168012
14721,../data/customdata/5309.jpg,24.156046
...,...,...
10955,../data/customdata/13037.jpg,7.237634
17289,../data/customdata/13352.jpg,1.583669
5192,../data/customdata/11021.jpg,1.515900
12172,../data/customdata/13325.jpg,7.559739


In [7]:
#     target_size=(height, width)

train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Speed',
    target_size=(60, 80),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

Found 11424 validated image filenames.
Found 2855 validated image filenames.
Found 6120 validated image filenames.


In [8]:
inputs = tf.keras.Input(shape=(60, 80, 3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

2022-09-16 02:50:10.929796: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


In [10]:
predicted_speeds = np.squeeze(model.predict(test_images))
true_speeds = test_images.labels

rmse = np.sqrt(model.evaluate(test_images, verbose=0))
print("     Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(true_speeds, predicted_speeds)
print("Test R^2 Score: {:.5f}".format(r2))

     Test RMSE: 8.22049
Test R^2 Score: -0.00142


In [11]:
null_rmse = np.sqrt(np.sum((true_speeds - np.mean(true_speeds))**2) / len(true_speeds))
print("Null/Baseline Model Test RMSE: {:.5f}".format(null_rmse))

Null/Baseline Model Test RMSE: 8.21466
