### Asserting Python and TensorFlow versions due exceptions thrown whilst loading the model onto the PiCar

In [None]:
##Running this will force Kaggle to use Python 3.10.10, just in case you didn’t have a way

!mamba create -n py310 -y

!source /opt/conda/bin/activate py310 && mamba install python=3.10 jupyter mamba -y

!sudo rm /opt/conda/bin/python3

!sudo ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python3

!sudo rm /opt/conda/bin/python3.7

!sudo ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python3.7

!sudo rm /opt/conda/bin/python

!sudo ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python

### Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import keras
from keras.layers import *
from keras.models import * 
from keras.applications import InceptionResNetV2
from keras.applications import MobileNet
from keras.regularizers import l2
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.mobilenet import MobileNet
from keras.layers import Resizing

### Data import and preprocessing

In [None]:
def preprocessing(img_dir,csv_dir):
    image_dir = Path(img_dir)
    filepaths = pd.DataFrame(list(image_dir.glob(r'**/*.png')), columns=['Filepath']).astype(str)
    li = ['/3141.png','/3999.png','/4895.png','/8285.png','/10171.png']
    for i in range(5):
        filepaths.drop(filepaths[filepaths['Filepath'] == img_dir + li[i]].index, inplace = True)
    filepaths['image_name'] = filepaths['Filepath'].str.extract('\d+.*?(\d+)')
    filepaths['image_name'] = filepaths['image_name'].astype(np.int64)
    filepaths.sort_values(by = ['image_name'], inplace = True)
    df = pd.read_csv(csv_dir)
    new_df = pd.concat([filepaths.reset_index(drop = True), df[['image_id', 'angle', 'speed']].reset_index(drop = True)],axis = 1)
    img_id = new_df['image_id'].to_numpy()
    return new_df, img_id

In [None]:
train_dir = '/kaggle/input/machine-learning-in-science-ii-2023/training_data/training_data'
train_csv_dir = '/kaggle/input/machine-learning-in-science-ii-2023/training_norm.csv'
train_df, train_img_id = preprocessing(train_dir, train_csv_dir)

test_dir = '/kaggle/input/machine-learning-in-science-ii-2023/test_data'
test_csv_dir = '/kaggle/input/machine-learning-in-science-ii-2023/sampleSubmission.csv'
test_df, test_img_id = preprocessing(test_dir, test_csv_dir)

### Our attempt at including the extra collected data in the training, however this threw exceptions

In [None]:
image_dir = Path('/kaggle/input/new-dataset')
new_dataset = pd.DataFrame(list(image_dir.glob(r'**/*.png')), columns = ['Filepath']).astype(str)
new_dataset['image_name'] = np.nan
new_dataset['image_id'] = np.nan
new_dataset['speed'] = np.nan
new_dataset['angle'] = np.nan

for i in range(len(new_dataset)):
    new_dataset['image_name'].iloc[i] = new_dataset['Filepath'].iloc[i].split('_')[0]
    new_dataset['image_id'] = new_dataset['image_name'].astype(int)
    new_dataset['speed'].iloc[i] = new_dataset['Filepath'].iloc[i].split('_')[1]
    new_dataset['angle'].iloc[i] = new_dataset['Filepath'].iloc[i].split('_')[2].split('.')[0]
    
print(new_dataset.head())

final_df = pd.concat([train_df, new_dataset], ignore_index = True)
final_df.to_csv('final_df.csv')
print(final_df.head())
print(final_df.shape)

### Generators rescale the image data into range 0-1 whilst splitting train, validation and testing sets

In [None]:
train_generator = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.25
)

test_generator = ImageDataGenerator(
    rescale=1./255
)

In [None]:
train_images = train_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'Filepath',
    y_col = ['angle', 'speed'],
    target_size = (224,224),
    color_mode = 'rgb',
    class_mode = 'raw',
    batch_size = 160,
    shuffle = True,
    seed = 42,
    subset = 'training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'Filepath',
    y_col = ['angle', 'speed'],
    target_size = (224,224),
    color_mode = 'rgb',
    class_mode = 'raw',
    batch_size = 160,
    shuffle = True,
    seed = 42,
    subset = 'validation'
)
    
test_images = test_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'Filepath',
    y_col = ['angle', 'speed'],
    target_size = (224,224),
    color_mode = 'rgb',
    class_mode = 'raw',
    batch_size = 160,
    shuffle = False
)

### Custom model

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size = (3,3), activation = 'relu', input_shape = (224,224,3)))
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Conv2D(256, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Flatten())
model.add(Dense(2048, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(2, activation = 'linear'))

model.compile(loss = 'mean_squared_error', optimizer = 'adam')
model.summary()

### InceptionResNetV2 (final Kaggle model)

In [None]:
model = Sequential(name = 'incresnet-model')

pretrained_model = InceptionResNetV2(include_top = False, weights = 'imagenet', input_shape = (224, 224, 3))

for layer in pretrained_model.layers:
        layer.trainable = False

#played with different values and took the best architecture/layers combo based on evaluation
for layer in pretrained_model.layers[745:]:
    layer.trainable = True

model.add(pretrained_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(1024, activation = 'relu', kernel_regularizer = l2(1e-6), bias_regularizer = l2(1e-6)))
model.add(Dropout(0.2))
model.add(Dense(256, activation = 'relu', kernel_regularizer = l2(1e-7), bias_regularizer = l2(1e-7)))
model.add(Dropout(0.2))
model.add(Dense(2))

model.summary()
model.compile(optimizer = 'adam',loss = 'mse')

### The Nvidia model is the lightest model we experimented with and produced fairly reasonable metrics 
### Unfortunately this did not run on the rasp4 PiCar - We believe this would have reduced inference times drastically due to its 450K parameters

In [None]:
def nvidia_model():
    model = Sequential(name = 'nvidia_model')

    model.add(Conv2D(24, (5,5), strides = (2,2), input_shape = (66,200,3), activation = 'elu')) 
    model.add(Conv2D(36, (5,5), strides = (2,2), activation = 'elu')) 
    model.add(Conv2D(48, (5,5), strides = (2,2), activation = 'elu')) 
    model.add(Conv2D(64, (3,3), activation = 'elu')) 
    model.add(Dropout(0.2)) #not in original model, added for robustness
    model.add(Conv2D(64, (3,3), activation = 'elu')) 
    
    model.add(Flatten())
    model.add(Dropout(0.2)) #not in original model, added for robustness
    model.add(Dense(256, activation = 'elu'))
    model.add(Dense(100, activation = 'elu'))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dense(10, activation = 'relu'))
    
    model.add(Dense(2)) #output layer - turn angle (from 45-135, 90 is straight, <90 turn left, >90 turn right), speed needs adding 
    
    optimizer = Adam(lr = 1e-3) #lr = learning rate
    model.compile(loss = 'mean_squared_error', optimizer = optimizer)
    
    return model

model = nvidia_model()
print(model.summary())

### MobileNet (final PiCar model)

In [None]:
model = Sequential(name = 'mobilenet_model')

Resizing(224, 224, interpolation = 'bilinear', crop_to_aspect_ratio = False)

pretrained_model = MobileNet(include_top = False, weights = 'imagenet', input_shape = (224,224,3))

model.add(pretrained_model)
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.2))
model.add(Dense(2))
model.compile(loss = 'mean_squared_error', optimizer = 'adam')

model.summary()

### Custom early stopping for InceptionResNetV2 especially used for Kaggle contest - Validation loss barely changes but the training loss significantly decreases with over-training

In [None]:
class CustomEarlyStopping(keras.callbacks.Callback):
    cont = 0
    val_loss = {}
    loss = {}
    best_weights = None
    def __init__(self, patience = None):
        super(CustomEarlyStopping, self).__init__()
        self.patience = 30
    
    def on_epoch_end(self, epoch, logs = None):
        self.loss[epoch] = logs['loss']
        self.val_loss[epoch] = logs['val_loss']
        
        if self.val_loss[epoch] - self.loss[epoch] <= 0.003:
            if self.val_loss[epoch] <= self.val_loss[self.cont] and epoch - self.cont <= self.patience:
                self.best_weights = self.model.get_weights()
                self.cont = epoch
            elif epoch - self.cont > self.patience:
                self.model.stop_training = True
                self.model.set_weights(self.best_weights)
        else:
            self.model.stop_training = True
            self.model.set_weights(self.best_weights)

hist = model.fit(
    train_images,
    validation_data = val_images,
    epochs = 500,
    callbacks = [CustomEarlyStopping(patience = 30)]
)

### Regular early stopping monitoring validation loss

In [None]:
callback = EarlyStopping(monitor = 'val_loss', patience = 30, restore_best_weights = True)

hist = model.fit(
    train_images,
    validation_data = val_images,
    epochs = 500,
    callbacks = callback
)

In [None]:
model.save('/kaggle/working')

In [None]:
!zip -r nvidia.zip /kaggle/working

### Plotting the results

In [None]:
def plot_loss(result):
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    plt.figure(figsize = (25,8))
    plt.subplot(122)
    plt.plot(range(0, len(loss)), loss[0:], label = 'Train_loss')
    plt.plot(range(0,len(loss)), val_loss[0:], label = 'Test_loss')
    plt.title('Loss over ' + str(len(loss)) + ' epochs', size = 15)
    plt.legend()
    plt.grid(True)
    plt.show()
    
plot_loss(hist)

### Generating a prediction on the test set and saving to .csv

In [None]:
def prediction(model, img_id):
    Y_pred = model.predict(test_images)
    Y_pred[:,1] = (np.rint(Y_pred[:,1])).astype(int)
    df = pd.DataFrame(Y_pred, columns = ['angle', 'speed'])
    df.insert(0, 'image_id', img_id)
    df.to_csv('/kaggle/working/inceptionresnet.csv',index = False)
    return Y_pred

Y_pred = prediction(model, test_img_id)

In [None]:
Y_pred

In [None]:
Y_pred_set = set(list(Y_pred[:,1]))
Y_pred_set

In [None]:
print(Y_pred[:,0].max(), Y_pred[:,0].min())