In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.utils import load_img, img_to_array
from sklearn.model_selection import train_test_split
# building a alexnet model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping




In [2]:
# Load the CSV file
data = pd.read_csv('merged_less_up.csv')


# print unique values for action
print(data['action'].unique())

# replace nan values with 'no_action'
# data['action'] = data['action'].fillna('no_action')

# drop nan values
data.dropna(inplace=True)


# print unique values for action
# print(data['action'].unique())

print(data['action'].value_counts())
print(data.tail(10))

['up' 'right' nan 'left' 'down']
action
up       4851
right    4613
left     4041
down     1701
Name: count, dtype: int64
                          image_name last_action action
20349  11269_2024-06-01_17-34-01.png          up     up
20350  11274_2024-06-01_17-34-01.png          up     up
20351  11299_2024-06-01_17-34-01.png          up   left
20352  11300_2024-06-01_17-34-01.png        left   left
20353  11301_2024-06-01_17-34-01.png        left   left
20354  11302_2024-06-01_17-34-01.png        left   left
20355  11303_2024-06-01_17-34-01.png        left   left
20359  11313_2024-06-01_17-34-01.png        left   left
20360  11314_2024-06-01_17-34-01.png        left   left
20361  11315_2024-06-01_17-34-01.png        left   left


In [3]:
mapping = {'up': 0, 'left': 1, 'right': 2, 'down': 3}

def encode_action(action):
    ac = mapping[action]
    temp=np.zeros(4)
    temp[ac]=1
    return temp

data["encoded_action"] = data["action"].apply(encode_action)


In [4]:
data.tail(10)

Unnamed: 0,image_name,last_action,action,encoded_action
20349,11269_2024-06-01_17-34-01.png,up,up,"[1.0, 0.0, 0.0, 0.0]"
20350,11274_2024-06-01_17-34-01.png,up,up,"[1.0, 0.0, 0.0, 0.0]"
20351,11299_2024-06-01_17-34-01.png,up,left,"[0.0, 1.0, 0.0, 0.0]"
20352,11300_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20353,11301_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20354,11302_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20355,11303_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20359,11313_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20360,11314_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"
20361,11315_2024-06-01_17-34-01.png,left,left,"[0.0, 1.0, 0.0, 0.0]"


In [5]:

# Load images and corresponding labels
images = []
labels = []

for idx, row in data.iterrows():
    img_path = os.path.join('merged', row['image_name'])
    img = load_img(img_path)  # Resize images to 256x144
    img_array = img_to_array(img) / 255.0
    images.append(img_array)
    labels.append(row['encoded_action'])

# Convert lists to numpy arrays
X = np.array(images)
y = np.array(labels)


# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the alextnet model architecture

In [6]:



def alexNet():
    inp = tf.keras.layers.Input(shape=(144, 256, 3))
    # 1st Convolutional Layer
    x = Conv2D(filters=96, input_shape=(144, 256, 3), kernel_size=(11, 11), strides=(4, 4), padding='same',
               activation='relu')(inp)
    # Max Pooling
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)

    # 2nd Convolutional Layer
    x = Conv2D(filters=256, kernel_size=(11, 11), strides=(1, 1), padding='same', activation='relu')(x)
    # Max Pooling
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)

    # 3rd Convolutional Layer
    x = Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(x)

    # 4th Convolutional Layer
    x = Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(x)

    # 5th Convolutional Layer
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(x)
    # Max Pooling
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)

    # Passing it to a Fully Connected layer
    x = Flatten()(x)
    # 1st Fully Connected Layer
    x = Dense(units=4096, activation='relu')(x)
    # Add Dropout to prevent overfitting
    x = Dropout(0.4)(x)

    # 2nd Fully Connected Layer
    x = Dense(units=4096, activation='relu')(x)
    # Add Dropout
    x = Dropout(0.4)(x)

    # 3rd Fully Connected Layer
    x = Dense(units=1000, activation='relu')(x)
    # Add Dropout
    x = Dropout(0.4)(x)

    # Output Layer
    x = Dense(units=4, activation='softmax')(x)

    model = tf.keras.models.Model(inputs=inp, outputs=x)
    return model




In [7]:

model = alexNet()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 144, 256, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 36, 64, 96)        34944     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 18, 32, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 18, 32, 256)       2973952   
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 9, 16, 256)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 16, 384)        885120

In [8]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)



In [9]:
model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1dc67f86530>

In [13]:
# evaluate the model
loss, acc = model.evaluate(X_val, y_val)
print(f"Validation accuracy: {acc}")
print(f"Validation loss: {loss}")


Validation accuracy: 0.7932281494140625
Validation loss: 0.5339846014976501


In [11]:

# Save the model
model.save('alexnet.h5')