In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense # creates densely connected layer object
from tensorflow.keras.layers import Flatten # takes 2D input and turns into 1D array

from tensorflow import keras
from tensorflow.keras import datasets, layers, models

from tensorflow.keras.layers import Conv2D # convolution layer
from tensorflow.keras.layers import MaxPooling2D # max pooling layer

In [2]:
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout

# a new layer that rescales/normalizes the activations after each layer.
# has a weak regularization effect. also allows 

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation

from keras.preprocessing.image import ImageDataGenerator

In [3]:
train_dir = '/Users/eitan/Documents/Flatiron/Eyeballing-Urban-Sounds/train_splits_2'

test_dir = '/Users/eitan/Documents/Flatiron/Eyeballing-Urban-Sounds/test_splits_2'

In [4]:
train_data_gen = ImageDataGenerator(rescale = 1./255, validation_split = .2)

In [5]:
train_generator = train_data_gen.flow_from_directory(
    
                                                    train_dir, 
                                                    target_size = (150, 150),
                                                    batch_size = 32,
                                                    class_mode = 'categorical',
                                                    color_mode = 'rgb',
                                                    seed = 42,
                                                    subset = 'training',

)

Found 5240 images belonging to 10 classes.


In [6]:
val_data_gen = ImageDataGenerator(rescale=1./255)

In [7]:
validation_generator = train_data_gen.flow_from_directory(
    
                                                    train_dir, 
                                                    target_size = (150, 150),
                                                    batch_size = 32,
                                                    class_mode = 'categorical',
                                                    color_mode = 'rgb',
                                                    seed = 42,
                                                    subset = 'validation',

)

Found 1309 images belonging to 10 classes.


In [8]:
test_data_gen = ImageDataGenerator(rescale=1./255)

In [9]:
test_generator = test_data_gen.flow_from_directory(

                                                    test_dir, 
                                                    target_size = (150, 150),
                                                    batch_size = 32,
                                                    class_mode = 'categorical',
                                                    color_mode = 'rgb',
                                                    seed = 42,
                                                    shuffle = False

)

Found 2183 images belonging to 10 classes.


In [10]:
model3 = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model3.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3), kernel_regularizer = l2(5e-4) ))

# max pool in 2x2 window
model3.add(MaxPooling2D(pool_size=(2, 2)))

# define 3x3 filter window sizes. Create 64 filters.
model3.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model3.add(MaxPooling2D((2, 2)))

model3.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model3.add(Dropout(0.2))


model3.add(MaxPooling2D((2, 2)))
model3.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model3.add(Dropout(0.25))



# transition to dense fully-connected part of network
model3.add(Flatten())
model3.add(Dense(64, activation='relu'))
model3.add(Dropout(0.25))

model3.add(BatchNormalization())
model3.add(Dense(128, activation='relu'))
model3.add(Dropout(0.5))

model3.add(Dense(10, activation='softmax'))

model3.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [11]:
history_cnn_3 = model3.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 20,
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
model = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3)))
# max pool in 2x2 window
model.add(MaxPooling2D(pool_size=(2, 2)))
# define 3x3 filter window sizes. Create 64 filters.
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))

# transition to dense fully-connected part of network
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 74, 74, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 64)        36928     
                                                                 
 flatten (Flatten)           (None, 73984)             0

In [14]:
history_cnn = model.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 20,
         )

Epoch 1/20
 10/164 [>.............................] - ETA: 31s - loss: 2.2826 - accuracy: 0.1474

KeyboardInterrupt: 

### Model #2 with L2 Regularization and Dropout

In [18]:
model2 = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model2.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3), kernel_regularizer = l2(5e-4)))

# max pool in 2x2 window
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.2))

# define 3x3 filter window sizes. Create 64 filters.
model2.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model2.add(MaxPooling2D((2, 2)))


model2.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model2.add(Dropout(0.2))

# transition to dense fully-connected part of network
model2.add(Flatten())
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.25))

model2.add(Dense(10, activation='softmax'))

model2.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [19]:
model2.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 dropout_5 (Dropout)         (None, 74, 74, 32)        0         
                                                                 
 conv2d_13 (Conv2D)          (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_14 (Conv2D)          (None, 34, 34, 64)       

In [20]:
history_cnn = model2.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 20,
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Stronger Dropout and Regularization

In [21]:
model3 = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model3.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3), kernel_regularizer = l2(8e-4)))

# max pool in 2x2 window
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Dropout(0.25))

# define 3x3 filter window sizes. Create 64 filters.
model3.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(8e-4)))
model3.add(MaxPooling2D((2, 2)))

model3.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(8e-4)))
model3.add(Dropout(0.25))

# transition to dense fully-connected part of network
model3.add(Flatten())
model3.add(Dense(64, activation='relu'))
model3.add(Dropout(0.5))

model3.add(Dense(10, activation='softmax'))


model3.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [22]:
model3.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_15 (Conv2D)          (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 dropout_8 (Dropout)         (None, 74, 74, 32)        0         
                                                                 
 conv2d_16 (Conv2D)          (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_10 (MaxPooli  (None, 36, 36, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_17 (Conv2D)          (None, 34, 34, 64)       

In [23]:
history_cnn = model3.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 20,
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20

2023-07-24 16:23:30.921757: W tensorflow/core/framework/op_kernel.cc:1816] UNKNOWN: FileNotFoundError: [Errno 2] No such file or directory: '/Users/eitan/Documents/Flatiron/Capstone/train_splits_2/2/192269-2-0-1.png'
Traceback (most recent call last):

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/ops/script_ops.py", line 268, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/en



UnknownError: Graph execution error:

FileNotFoundError: [Errno 2] No such file or directory: '/Users/eitan/Documents/Flatiron/Capstone/train_splits_2/2/192269-2-0-1.png'
Traceback (most recent call last):

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/ops/script_ops.py", line 268, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/engine/data_adapter.py", line 917, in wrapped_generator
    for data in generator_fn():

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/engine/data_adapter.py", line 1064, in generator_fn
    yield x[i]
          ~^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/preprocessing/image.py", line 116, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/preprocessing/image.py", line 370, in _get_batches_of_transformed_samples
    img = image_utils.load_img(
          ^^^^^^^^^^^^^^^^^^^^^

  File "/Users/eitan/anaconda3/envs/Number41/lib/python3.11/site-packages/keras/src/utils/image_utils.py", line 422, in load_img
    with open(path, "rb") as f:
         ^^^^^^^^^^^^^^^^

FileNotFoundError: [Errno 2] No such file or directory: '/Users/eitan/Documents/Flatiron/Capstone/train_splits_2/2/192269-2-0-1.png'


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_44208]

### stronger dropout

In [15]:
model4 = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model4.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3), kernel_regularizer = l2(8e-4)))

# max pool in 2x2 window
model4.add(MaxPooling2D(pool_size=(2, 2)))
model4.add(Dropout(0.25))

# define 3x3 filter window sizes. Create 64 filters.
model4.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(8e-4)))
model4.add(MaxPooling2D((2, 2)))

model4.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(8e-4)))
model4.add(Dropout(0.25))

# transition to dense fully-connected part of network
model4.add(Flatten())
model4.add(Dense(128, activation='relu'))
model4.add(Dropout(0.7))

model4.add(Dense(10, activation='softmax'))


model4.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [16]:
model4.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 dropout (Dropout)           (None, 74, 74, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 34, 34, 64)       

In [17]:
history_cnn = model4.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 25,
         )

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
 17/164 [==>...........................] - ETA: 35s - loss: 1.0161 - accuracy: 0.6379

KeyboardInterrupt: 

### Less L2 and Dropout

In [19]:
model5 = Sequential()

# define 3x3 filter window sizes. Create 32 filters.
model5.add(Conv2D(filters=32,
                        kernel_size=(3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3), kernel_regularizer = l2(5e-4)))

# max pool in 2x2 window
model5.add(MaxPooling2D(pool_size=(2, 2)))
model5.add(Dropout(0.2))

# define 3x3 filter window sizes. Create 64 filters.
model5.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model5.add(MaxPooling2D((2, 2)))

model5.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer = l2(5e-4)))
model5.add(Dropout(0.25))

# transition to dense fully-connected part of network
model5.add(Flatten())
model5.add(Dense(128, activation='relu'))
model5.add(Dropout(0.6))

model5.add(Dense(10, activation='softmax'))


model5.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

In [20]:
model5.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 dropout_6 (Dropout)         (None, 74, 74, 32)        0         
                                                                 
 conv2d_10 (Conv2D)          (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 34, 34, 64)       

In [21]:
history_cnn = model5.fit(train_generator,
                        validation_data = validation_generator,
                        epochs = 25,
         )

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
