# **Homework 2**

In [66]:
import gdown
import zipfile
import os

In [67]:
#Check if files exists in data folder
if os.path.exists('data/'):
    print('Files already downloaded.')

else:
    output_path = 'data.zip'
    file_id = '1KDN-rFCq9IDJ7_kNW5y5Co100KNpklz-'
    url = f'https://drive.google.com/uc?id={file_id}'
    # Download the zip file
    gdown.download(url, output_path, quiet=False)

    # Extract the contents of the zip file
    with zipfile.ZipFile(output_path, 'r') as zip_ref:
        zip_ref.extractall('data')

    # Remove the zip file
    os.remove(output_path)


Files already downloaded.


In [68]:
if(os.path.exists('test/') and os.path.exists('train')):
    print('Files already extracted')
else:
    print('Extracting the test.zip and train.zip files...')
    # Extract the test.zip file
    with zipfile.ZipFile('data/public/test.zip', 'r') as zip_ref:
        zip_ref.extractall()

    # Extract the train.zip file
    with zipfile.ZipFile('data/public/train.zip', 'r') as zip_ref:
        zip_ref.extractall()

    print('Done!')

Files already extracted


## First Approach

For the first approach, we will use a different architecture to train our model. We will use a custom convolutional neural network (CNN) architecture.

### Step 1: Data Loading and Preprocessing

Similar to the first approach, we will load and preprocess our dataset using the same transformations. We will also create data loaders for the training and validation sets.

### Step 2: Model Architecture

In this approach, we will define a custom CNN model. The model will consist of multiple convolutional layers followed by fully connected layers. We will use ReLU activation functions and dropout regularization to prevent overfitting.

### Step 3: Training Loop

We will train the model using a similar training loop as in the first approach. We will iterate over the training set, compute the loss, perform backpropagation, and update the model's weights.

### Step 4: Model Evaluation

After training, we will evaluate the model on the validation set. We will calculate the accuracy of the model by comparing the predicted labels with the ground truth labels.

### Step 5: Save the Model

Finally, we will save the trained model to a file for future use.

## Conclusion

In this second approach, we used a custom CNN architecture to train our model. This approach allows us to have more control over the model's architecture and potentially achieve better performance. However, it requires more manual design and experimentation compared to using a pre-trained model like ResNet18.

It is important to note that the choice of architecture depends on the specific problem and dataset. It is recommended to experiment with different architectures and hyperparameters to find the best model for your task.


In [69]:
import tensorflow as tf

# Check if GPU is available
print('GPU is', 'available' if tf.config.list_physical_devices('GPU') else 'NOT AVAILABLE')

# Enable GPU
physical_devices = tf.config.list_physical_devices('GPU')

GPU is available


In [70]:
from keras_preprocessing.image import ImageDataGenerator

# Define the path to your training data
trainingset = 'train/'
validationset = 'test/'

batch_size = 64

# Define batch size and input shape
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
)

# Augment training data
train_generator = train_datagen.flow_from_directory(
    directory=trainingset,
    target_size=(96, 96),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True,
    subset='training'
)

# No augmentation for validation data
validation_datagen = ImageDataGenerator(rescale=1. / 255)

validation_generator = validation_datagen.flow_from_directory(
    directory=validationset,
    target_size=(96, 96),
    batch_size=batch_size,
    shuffle=False,
    class_mode='categorical'
)  # set as validation data

num_samples = train_generator.n
num_classes = train_generator.num_classes
input_shape = train_generator.image_shape

classnames = [k for k, v in train_generator.class_indices.items()]
img_h = input_shape[0]
img_w = input_shape[1]
print("Image height = %d, Image Width = %d" % (img_h, img_w))
print("Image input %s" % str(input_shape))
print("Classes: %r" % classnames)
print('Loaded %d training samples from  %d classes.' % (num_samples, num_classes))
print('Loaded %d test samples from %d classes.' % (validation_generator.n, validation_generator.num_classes))


Found 6369 images belonging to 5 classes.
Found 2749 images belonging to 5 classes.
Image height = 96, Image Width = 96
Image input (96, 96, 3)
Classes: ['0', '1', '2', '3', '4']
Loaded 6369 training samples from  5 classes.
Loaded 2749 test samples from 5 classes.


In [71]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten,\
                         Conv2D, MaxPooling2D
from keras import optimizers

def MyCNN(input_shape, num_classes):
    model = Sequential(name="MyOptimizedCNN")

    # C1 Convolutional Layer 
    model.add(Conv2D(filters=15, input_shape=input_shape, kernel_size=(4,4)))
    model.add(Activation('relu'))

    # C2 Convolutional Layer
    model.add(Conv2D(filters=20, kernel_size=(4,4)))
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2)))

    # C3 Convolutional Layer
    model.add(Conv2D(filters=30, kernel_size=(4,4)))
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2)))

    # Flatten
    model.add(Flatten())

    # D1 Dense Layer
    model.add(Dense(128))
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.4))
    # D2 Dense Layer
    model.add(Dense(96))
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.4))

    # Output Layer
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # Compile

    optimizer = optimizers.RMSprop(lr=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

# create the model
# Input shape is (3, 96, 96) for the RGB image
model = MyCNN(input_shape, num_classes)
model.summary()

Model: "MyOptimizedCNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_50 (Conv2D)          (None, 93, 93, 15)        735       
                                                                 
 activation_83 (Activation)  (None, 93, 93, 15)        0         
                                                                 
 conv2d_51 (Conv2D)          (None, 90, 90, 20)        4820      
                                                                 
 activation_84 (Activation)  (None, 90, 90, 20)        0         
                                                                 
 max_pooling2d_43 (MaxPoolin  (None, 45, 45, 20)       0         
 g2D)                                                            
                                                                 
 conv2d_52 (Conv2D)          (None, 42, 42, 30)        9630      
                                                    

In [72]:
from keras import callbacks

# Define callbacks
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Assuming you have a `train_generator` and `validation_generator` defined

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
val_steps = len(validation_generator)

try:
    # Train the model with better training parameters
    history = model.fit(
        train_generator,
        epochs=50,
        steps_per_epoch=steps_per_epoch,
        validation_data=validation_generator,
        validation_steps=val_steps,
        callbacks=[early_stopping, reduce_lr]
    )

except KeyboardInterrupt:
    pass


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 18: early stopping


In [73]:
# Save the model to disk
model.save('model.h5')

In [53]:
val_steps=validation_generator.n//validation_generator.batch_size+1
loss, acc = model.evaluate_generator(validation_generator,steps=val_steps)
print('Test loss: %f' %loss)
print('Test accuracy: %f' %acc)

  loss, acc = model.evaluate_generator(validation_generator,steps=val_steps)


Test loss: 1.120741
Test accuracy: 0.654784


In [43]:

from sklearn.metrics import classification_report
import numpy as np
 
preds = model.predict(validation_generator,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = validation_generator.classes  # shuffle=False in test_generator

print(classification_report(Ytest, Ypred, labels=None, target_names=classnames, digits=3))

              precision    recall  f1-score   support

           0      0.500     0.075     0.131        53
           1      0.287     0.509     0.367       110
           2      0.417     0.512     0.460       162
           3      0.810     0.736     0.771       758
           4      0.000     0.000     0.000        15

    accuracy                          0.638      1098
   macro avg      0.403     0.367     0.346      1098
weighted avg      0.674     0.638     0.643      1098



In [44]:
from sklearn.metrics import confusion_matrix

preds = model.predict(validation_generator,verbose=1,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = validation_generator.classes  # shuffle=False in test_generator

cm = confusion_matrix(Ytest, Ypred)

conf = [] # data structure for confusions: list of (i,j,cm[i][j])
for i in range(0,cm.shape[0]):
  for j in range(0,cm.shape[1]):
    if (i!=j and cm[i][j]>0):
      conf.append([i,j,cm[i][j]])

col=2
conf = np.array(conf)
conf = conf[np.argsort(-conf[:,col])]  # decreasing order by 3-rd column (i.e., cm[i][j])

print('%-16s     %-16s  \t%s \t%s ' %('True','Predicted','errors','err %'))
print('------------------------------------------------------------------')
for k in conf:
  print('%-16s ->  %-16s  \t%d \t%.2f %% ' %(classnames[k[0]],classnames[k[1]],k[2],k[2]*100.0/validation_generator.n))
  

True                 Predicted         	errors 	err % 
------------------------------------------------------------------
3                ->  1                 	107 	9.74 % 
3                ->  2                 	94 	8.56 % 
2                ->  3                 	58 	5.28 % 
1                ->  3                 	36 	3.28 % 
0                ->  3                 	27 	2.46 % 
0                ->  1                 	15 	1.37 % 
2                ->  1                 	15 	1.37 % 
1                ->  2                 	13 	1.18 % 
4                ->  3                 	9 	0.82 % 
0                ->  2                 	5 	0.46 % 
2                ->  4                 	3 	0.27 % 
4                ->  1                 	3 	0.27 % 
4                ->  2                 	2 	0.18 % 
3                ->  4                 	2 	0.18 % 
2                ->  0                 	1 	0.09 % 
1                ->  0                 	1 	0.09 % 
3                ->  0                 	1 	0.09 % 
0 

In [74]:
import sys
import numpy as np

try:
    import gymnasium as gym
except ModuleNotFoundError:
    print('gymnasium module not found. Try to install with')
    print('pip install gymnasium[box2d]')
    sys.exit(1)



def play(env, model):

    seed = 2000
    obs, _ = env.reset(seed=seed)
    
    # drop initial frames
    action0 = 0
    for i in range(50):
        obs,_,_,_,_ = env.step(action0)
    
    done = False
    while not done:
        p = model(np.expand_dims(obs, axis=0)) # reshape input data to have a batch dimension of size 1
        action = np.argmax(p)  # adapt to your model
        obs, _, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

    env.close()


env_arguments = {
    'domain_randomize': False,
    'continuous': False,
    'render_mode': 'human'
}

env_name = 'CarRacing-v2'
env = gym.make(env_name, **env_arguments)

print("Environment:", env_name)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)

play(env, model)


Environment: CarRacing-v2
Action space: Discrete(5)
Observation space: Box(0, 255, (96, 96, 3), uint8)


## Second Try ##

In [87]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, BatchNormalization, Dropout, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler

def lr_scheduler(epoch, lr):
    if epoch % 10 == 0 and epoch != 0:
        lr = lr * 0.9  # Adjust the learning rate decay factor as needed
    return lr

def AdvancedCNN(num_classes, input_shape):
    model = Sequential()

    # C1 Convolutional Layer 
    model.add(Conv2D(filters=15, input_shape=input_shape, kernel_size=(5,5)))
    model.add(Activation('relu'))

    # C2 Convolutional Layer
    model.add(Conv2D(filters=20, kernel_size=(5,5)))
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2)))

    # C3 Convolutional Layer
    model.add(Conv2D(filters=30, kernel_size=(3,3)))
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2,2)))

    # Flatten
    model.add(Flatten())


    # D1 Dense Layer
    model.add(Dense(128))
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.4))
    # D2 Dense Layer
    model.add(Dense(96))
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.4))

    # Output Layer
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    # Compile the model
    optimizer = Adam(learning_rate=0.001)  # Adjust learning rate as needed
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    # Print the model summary
    model.summary()

    return model

# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

# Assuming you have 'train' and 'validation' directories for training and validation data
train_generator = train_datagen.flow_from_directory(
    directory='train',
    target_size=(96, 96),
    color_mode='rgb',
    batch_size=64,
    class_mode='categorical',
    shuffle=True
)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(
    directory='test',
    target_size=(96, 96),
    batch_size=64,
    class_mode='categorical',
    shuffle=False
)

# Train the model using the generators with learning rate scheduler
model = AdvancedCNN(num_classes, input_shape)
lr_schedule = LearningRateScheduler(lr_scheduler)
history = model.fit(train_generator, epochs=50, validation_data=validation_generator, callbacks=[lr_schedule])

Found 6369 images belonging to 5 classes.
Found 2749 images belonging to 5 classes.
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_71 (Conv2D)          (None, 92, 92, 15)        1140      
                                                                 
 activation_89 (Activation)  (None, 92, 92, 15)        0         
                                                                 
 conv2d_72 (Conv2D)          (None, 88, 88, 20)        7520      
                                                                 
 activation_90 (Activation)  (None, 88, 88, 20)        0         
                                                                 
 max_pooling2d_57 (MaxPoolin  (None, 44, 44, 20)       0         
 g2D)                                                            
                                                                 
 conv2d_73 (Conv2D)          (None, 

In [88]:
import sys
import numpy as np

try:
    import gymnasium as gym
except ModuleNotFoundError:
    print('gymnasium module not found. Try to install with')
    print('pip install gymnasium[box2d]')
    sys.exit(1)



def play(env, model):

    seed = 2000
    obs, _ = env.reset(seed=seed)
    
    # drop initial frames
    action0 = 0
    for i in range(50):
        obs,_,_,_,_ = env.step(action0)
    
    done = False
    while not done:
        p = model(np.expand_dims(obs, axis=0)) # reshape input data to have a batch dimension of size 1
        action = np.argmax(p)  # adapt to your model
        obs, _, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

    env.close()


env_arguments = {
    'domain_randomize': False,
    'continuous': False,
    'render_mode': 'human'
}

env_name = 'CarRacing-v2'
env = gym.make(env_name, **env_arguments)

print("Environment:", env_name)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)

play(env, model)

Environment: CarRacing-v2
Action space: Discrete(5)
Observation space: Box(0, 255, (96, 96, 3), uint8)
