In [None]:
import numpy as np # library to use array
import pandas as pd # used for datasets 
import matplotlib.pyplot as plt  # plotting library for creating visualizations in Python.
import cv2  # used for computer vision and image processing tasks.
import tensorflow as tf  # open-source machine learning framework developed by Google.
from PIL import Image  # provides support for opening, manipulating, and saving many different image file formats.
import os  # provides a way of using operating system-dependent functionality.
from sklearn.model_selection import train_test_split  # Used for splitting datasets into training and testing sets.
from keras.utils import to_categorical  # A high-level neural networks API. to_categorical is used for one-hot encoding categorical variables.
from keras.models import Sequential, load_model  # A linear stack of layers. load_model is used to load pre-trained Keras models.
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout  # Layers used to build convolutional neural networks (CNNs) in Keras.

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from PIL import Image
from tensorflow.keras.optimizers import Adam


%matplotlib inline

In [None]:
data_dir = '../input/gtsrb-german-traffic-sign'
train_path = '../input/gtsrb-german-traffic-sign/Train'
test_path = '../input/gtsrb-german-traffic-sign/'
IMG_HEIGHT = 30
IMG_WIDTH = 30
channels = 3
NUM_CATEGORIES = len(os.listdir(train_path))
NUM_CATEGORIES

In [None]:
def load_data(data_dir):
   
    images = list()
    labels = list()
    for category in range(NUM_CATEGORIES):
        categories = os.path.join(data_dir, str(category))
        for img in os.listdir(categories):
            img = load_img(os.path.join(categories, img), target_size=(30, 30))
            image = img_to_array(img)
            images.append(image)
            labels.append(category)
    
    return images, labels

In [None]:
images, labels = load_data(train_path)
images = np.array(images)
images = images/255
classNo = np.array(labels)
data=np.array(images)
data= np.array(data).reshape(-1, 32, 32, 3)

# One hot encoding the labels
# labels = to_categorical(labels)

X = images.astype(np.float32)
y = labels.astype(np.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Y_tests=y_test
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2) 

print("Data Shapes")
print("Train",end = "");print(X_train.shape,y_train.shape)
print("Validation",end = "");print(X_validation.shape,y_validation.shape)
print("Test",end = "");print(X_test.shape,y_test.shape)

# Splitting the dataset into training and test set
# x_train, x_test, y_train, y_test = train_test_split(
#                                                     np.array(images),
#                                                     labels,
#                                                     test_size=0.2 , 
#                                                     random_state=42, 
#                                                     shuffle=True
#                                                     )

In [None]:
print("X_train.shape", x_train.shape)
print("X_valid.shape", x_test.shape)
print("y_train.shape", y_train.shape)
print("y_valid.shape", y_test.shape)
X_train.shape (31367, 30, 30, 3)
X_valid.shape (7842, 30, 30, 3)
y_train.shape (31367, 43)
y_valid.shape (7842, 43)

In [None]:
def grayscale(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img


def equalize(img):
    img = cv2.equalizeHist(img)
    return img


def preprocessing(img):
    img = grayscale(img) 
    img = equalize(img)  
    img = img / 255 # image normalization 
    return img


X_train = np.array(list(map(preprocessing, X_train)))
X_validation = np.array(list(map(preprocessing, X_validation)))
X_test = np.array(list(map(preprocessing, X_test)))
### reshape data into channel 1
X_train=X_train.reshape(-1,32,32,1)
X_validation=X_validation.reshape(-1,32,32,1)
X_test=X_test.reshape(-1,32,32,1)

In [None]:
dataGen = ImageDataGenerator(width_shift_range=0.1,
                             height_shift_range=0.1,
                             zoom_range=0.2,
                             shear_range=0.1,
                             rotation_range=10)
dataGen.fit(X_train)
batches = dataGen.flow(X_train, y_train,batch_size=20)
X_batch, y_batch = next(batches)

In [None]:
y_train = to_categorical(y_train, noOfClasses)
y_validation = to_categorical(y_validation, noOfClasses)
y_test = to_categorical(y_test, noOfClasses)

In [None]:
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(3,3), activation="relu", input_shape=(IMG_HEIGHT,IMG_WIDTH,3)))
model.add(Conv2D(filters=32, kernel_size=(3,3), activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(rate=0.25))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation="softmax"))

In [None]:
imgs_path = "../input/gtsrb-german-traffic-sign/Train"
data_list = []
labels_list = []
classes_list = 43
for i in range(classes_list):
    i_path = os.path.join(imgs_path, str(i)) #0-42
    for img in os.listdir(i_path):
        im = Image.open(i_path +'/'+ img)
        im = im.resize((30,30))
        im = np.array(im)
        data_list.append(im)
        labels_list.append(i)
data = np.array(data_list)
labels = np.array(labels_list)
print(data)

In [None]:
def prep_dataset(X,y):
    X_prep = X.astype('float32')
    y_prep = to_categorical(np.array(y))
    return (X_prep, y_prep)

X, y = prep_dataset(data,labels)

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X,y, test_size=0.2, shuffle=True,stratify=y)
X_val, X_test, Y_val, Y_test = train_test_split(X_val,Y_val, test_size=0.5, shuffle=True)

In [None]:
model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode="nearest")

EPOCHS = 30
history = model.fit(aug.flow(x_train, 
                    y_train, batch_size = 32),
                    validation_data = (x_test, y_test), 
                    epochs=EPOCHS   
                   )

# history= model.fit(X_train,Y_train,
#                  epochs=15,
#                  batch_size=64,
#                  validation_data=(X_val,Y_val))

In [None]:
import seaborn as sns
fig,ax=plt.subplots(2,1,figsize=(12,10))
fig.suptitle('Train evaluation')

sns.lineplot(ax= ax[0],x=np.arange(0,len(history.history['accuracy'])),y=history.history['accuracy'])
sns.lineplot(ax= ax[0],x=np.arange(0,len(history.history['accuracy'])),y=history.history['val_accuracy'])

ax[0].legend(['Train','Validation'])
ax[0].set_title('Accuracy')

sns.lineplot(ax= ax[1],x=np.arange(0,len(history.history['loss'])),y=history.history['loss'])
sns.lineplot(ax= ax[1],x=np.arange(0,len(history.history['loss'])),y=history.history['val_loss'])

ax[1].legend(['Train','Validation'])
ax[1].set_title('Loss')
print(fig)
print(ax)

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)

print('test set accuracy: ', accuracy * 100)

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, accuracy, label='Training Accuracy')
plt.plot(epochs_range, val_accuracy, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score
import pandas as pd

Y_test = pd.read_csv(test_path + 'Test.csv')
test_labels = Y_test["ClassId"].values
test_images = Y_test["Path"].values

output = list()
for img in test_images:
    image = load_img(os.path.join(test_path, img), target_size=(30, 30))
    output.append(np.array(image))

X_test=np.array(output)
pred = model.predict(X_test)
pred=np.argmax(pred, axis=1)

#Accuracy with the test data
print('Test Data accuracy: ',accuracy_score(test_labels, pred)*100)

In [None]:
plt.figure(figsize = (16, 16))

start_index = 0
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    prediction = pred[start_index + i]
    actual = test_labels[start_index + i]
    col = 'g'
    if prediction != actual:
        col = 'r'
    plt.xlabel('Actual={} || Pred={}'.format(actual, prediction), color = col)
    plt.imshow(X_test[start_index + i])
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
cf = confusion_matrix(test_labels,pred )
import seaborn as sns
df_cm = pd.DataFrame(cf, index = classes,  columns = classes)
plt.figure(figsize = (20,20))
sns.heatmap(df_cm, annot=True)

In [None]:
#classification report for the Validation Dataset
val_prob = model.predict(x_val)
#convert tests labels in single-digits instead of one-hot encoding
y_val_arg = np.argmax(y_val,axis=1)
val_predicted_labels = np.argmax(val_prob, axis = 1) #take argmax because the class with the highest probability would be the predicted class
val_report = classification_report(y_val_arg,val_predicted_labels)
print('---')
print('Classification report for Validation Dataset:')
print(val_report)

In [None]:
# loading dataset
data = []  # List to store the images.
labels = []  # List to store the corresponding labels.
classes = 43  # Total number of classes/categories in the dataset.
cur_path = os.getcwd()  # Get the current working directory.

# Loop through each directory in the current path, representing different classes.
for i in os.listdir(cur_path):
    dir = cur_path + '/' + i  # Create the path for the current class directory.
    
    # Loop through each file (image) in the current class directory.
    for j in os.listdir(dir):
        img_path = dir+'/'+j  # Create the full path for the current image.
        
        # Read the image using OpenCV, resize it to (30, 30), and store it in the data list.
        img = cv2.imread(img_path, -1)
        img = cv2.resize(img, (30, 30), interpolation=cv2.INTER_NEAREST)
        data.append(img)
        labels.append(i) # Append the corresponding label (class) to the labels list.

# Convert data and labels lists to NumPy arrays for further processing.
data = np.array(data)
labels = np.array(labels)

# Print the shape of the data (images) and labels arrays.
print(data.shape, labels.shape)

In [None]:
# Splitting training and testing dataset
# Using train_test_split to divide the dataset into training and testing sets.
# The test_size parameter determines the proportion of the dataset to include in the test split.
# random_state is set for reproducibility of results.
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Print the shapes of the training and testing sets to verify the split.
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
#Converting the labels into one hot encoding
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)

In [None]:
y_train.shape, y_test.shape # Print the shape of the training labels (y_train) and (y_test) array.

In [None]:
#Building the model
model = Sequential()

# First Layer
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=X_train.shape[1:]))
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# Second Layer 
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))


# Dense Layer
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compilation of the model
# Configuring the model for training with categorical crossentropy loss, Adam optimizer, and accuracy metric.
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Number of training epochs.
epochs = 20

In [None]:
# Training the model on the training data (X_train, y_train) with batch size 64,
# and validating on the testing data (X_test, y_test).
history = model.fit(X_train, y_train, batch_size=64, epochs=epochs, validation_data=(X_test, y_test))

In [None]:
# Saving the trained model to a file named 'my_model.h5' in the working directory.
model.save('./modeli/model_1.h5')
model.save('./modeli/model_1.keras')

In [None]:
# Plotting graphs for accuracy
# Plotting the training accuracy over epochs.
plt.plot(history.history['accuracy'], label='training accuracy')

# Plotting the validation accuracy over epochs.
plt.plot(history.history['val_accuracy'], label='val accuracy')

# Adding a title to the plot.
plt.title('Accuracy')

# Labeling the x-axis as 'epochs'.
plt.xlabel('epochs')

# Labeling the y-axis as 'accuracy'.
plt.ylabel('accuracy')

# Adding a legend to distinguish between training and validation accuracy.
plt.legend()

# Displaying the plot.
plt.show()

In [None]:
# Plotting graphs for loss
# Plotting the training loss over epochs.
plt.plot(history.history['loss'], label='training loss')

# Plotting the validation loss over epochs.
plt.plot(history.history['val_loss'], label='val loss')

# Adding a title to the plot.
plt.title('Loss')

# Labeling the x-axis as 'epochs'.
plt.xlabel('epochs')

# Labeling the y-axis as 'loss'.
plt.ylabel('loss')

# Adding a legend to distinguish between training and validation loss.
plt.legend()

# Displaying the plot.
plt.show()

In [None]:
# Score
# Evaluate the model on the testing data and store the results in the variable 'score'.
score = model.evaluate(X_test, y_test, verbose=0)

# Print the test loss (index 0 of the score array).
print('Test Loss', score[0])

# Print the test accuracy (index 1 of the score array).
print('Test accuracy', score[1])

In [None]:
# Generate predictions on the testing data using the trained model.
y_pred = model.predict(X_test)

# Extract the true class labels from the one-hot encoded 'y_test'.
y_test_class = np.argmax(y_test, axis=1)

# Extract the predicted class labels from the one-hot encoded 'y_pred'.
y_pred_class = np.argmax(y_pred, axis=1)

In [None]:
# Import the necessary functions from scikit-learn for generating classification reports and confusion matrices.
from sklearn.metrics import classification_report, confusion_matrix

# Print the classification report, which includes precision, recall, and F1-score.
print(classification_report(y_test_class, y_pred_class))

# Print the confusion matrix, which shows the number of true positive, true negative, false positive, and false negative predictions.
print(confusion_matrix(y_test_class, y_pred_class))

In [None]:
# Making the Confusion Matrix
# Import the confusion_matrix function from scikit-learn.
from sklearn.metrics import confusion_matrix

# Compute the confusion matrix based on true class labels (y_test_class) and predicted class labels (y_pred_class).
cm = confusion_matrix(y_test_class, y_pred_class)

# Import the seaborn library for data visualization.
import seaborn as sns

# Create a heatmap of the confusion matrix with annotations.
sns.heatmap(cm, annot=True)

# Save the heatmap as an image file named 'h1.png' in the working directory.
plt.savefig('/kaggle/working/h1.png')

In [None]:
# Calculate the Accuracy
# Import the accuracy_score function from scikit-learn.
from sklearn.metrics import accuracy_score

# Calculate the accuracy by comparing predicted class labels (y_pred_class) with true class labels (y_test_class).
score = accuracy_score(y_pred_class, y_test_class)

# Display the computed accuracy score.
score