In [None]:
import os

import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from tensorflow.keras import layers, models
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

import keras_tuner as kt
from keras import regularizers

import gc

# Importing and transforming dataset

In [None]:
def label_counter(inpt, data_name):
    c = 0
    u = 0
    for i in inpt:
        if i == 1:
            c = c+1
        elif i == 0:
            u = u + 1
    print(f'{data_name} --> Traffic related: {c}, Traffic unrelated: {u}')

def import_dataset(split_seed):

    #First dataset
    # traffic_related = pl.read_csv('/content/drive/MyDrive/congestion_detector_datasets/model1_traffic_related_dataset.csv')

    #Second dtaset
    # traffic_unrelated = pl.read_csv('/content/drive/MyDrive/congestion_detector_datasets/model1_traffic_unrelated_dataset.csv')

    #Combining first and second dataset
    df = pl.concat([
        pl.read_csv('/kaggle/input/traffic-related-vs-unrelated-datasets/model1_traffic_related_dataset.csv'),
        pl.read_csv('/kaggle/input/traffic-related-vs-unrelated-datasets/model1_traffic_unrelated_dataset.csv')]
        , how="vertical")

    y = df[:, 0] # Getting labels as series
    y = y.to_numpy()

    df = df[:, 1:]/255.0 # Normalizing pixels value to range of 0 to 1
    df = df.to_numpy().reshape(-1, 222, 296, 1) #reshaping to 296 by 222

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.1, random_state=split_seed) #42

    # Split train data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=split_seed)

    label_counter(y_train, 'y_train')
    label_counter(y_test, 'y_test')
    label_counter(y_val, 'y_val')

    return X_train, y_train, X_test, y_test, X_val, y_val

X_train, y_train, X_test, y_test, X_val, y_val = import_dataset(13)

# Exploring dataset

In [None]:
import numpy as np
import plotly.graph_objects as go

# Define class labels
class_labels = {0: 'Traffic unrelated images', 1: 'Traffic related images'}

# Map class labels for visualization
train_classes = np.array([class_labels[label] for label in y_train])
test_classes = np.array([class_labels[label] for label in y_test])
val_classes = np.array([class_labels[label] for label in y_val])

# Calculate class frequencies
train_class_counts = np.unique(train_classes, return_counts=True)
test_class_counts = np.unique(test_classes, return_counts=True)
val_class_counts = np.unique(val_classes, return_counts=True)

# Create bar chart data
data = [
    go.Bar(x=train_class_counts[0], y=train_class_counts[1], name='Train'),
    go.Bar(x=test_class_counts[0], y=test_class_counts[1], name='Test'),
    go.Bar(x=val_class_counts[0], y=val_class_counts[1], name='Validation')
]

# Set layout
layout = go.Layout(
    title='Occurrences of Binary Classes',
    xaxis=dict(title='Class'),
    yaxis=dict(title='Count'),
    barmode='group'
)

# Create figure
fig = go.Figure(data=data, layout=layout)

# Show the bar chart
fig.show()

In [None]:
import numpy as np
import plotly.graph_objects as go

# Define class labels
class_labels = {
    0: 'Traffic Unrelated',
    1: 'Traffic Related'
}

# Calculate total class counts
class_counts = {
    class_labels[0]: np.sum([np.sum(y_train == 0), np.sum(y_test == 0), np.sum(y_val == 0)]),
    class_labels[1]: np.sum([np.sum(y_train == 1), np.sum(y_test == 1), np.sum(y_val == 1)])
}

# Create bar chart data
data = [
    go.Bar(x=list(class_counts.keys()), y=list(class_counts.values()))
]

# Set layout
layout = go.Layout(
    title='Total Occurrences of Binary Classes',
    xaxis=dict(title='Class'),
    yaxis=dict(title='Count')
)

# Create figure
fig = go.Figure(data=data, layout=layout)

# Show the bar chart
fig.show()

In [None]:
def image_visualizer(X, y, row_number):
    X = X[row_number]
    y = y[row_number]

    plt.imshow(X, cmap='gray')
    lbl = None
    if y == 0:
        lbl = 'Traffic unrelated'
    elif y == 1:
        lbl = 'Traffic related'
    plt.title(lbl)
    plt.show()

image_visualizer(X_train, y_train, 5)
image_visualizer(X_train, y_train, 225)
image_visualizer(X_train, y_train, 512)
image_visualizer(X_train, y_train, 995)

# Further data transformations

In [None]:
import numpy as np

def transformer_func(X):
  datagen = keras.preprocessing.image.ImageDataGenerator(
      rotation_range=15,
      width_shift_range=0.1,
      height_shift_range=0.1,
      shear_range=0.1,
      zoom_range=0.1,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest'
  )

  # Create empty list for augmented data
  X_augmented = []

  transformed_image_counter = 0

  # Define probability threshold
  probability_threshold = 0.5

  # Apply data augmentation to each image individually
  for i in range(len(X)):
      # Generate a random number between 0 and 1
      random_probability = np.random.uniform(0, 1)

      # Apply transformations only if random_probability is above the threshold
      if random_probability > probability_threshold:
          augmented_image = datagen.apply_transform(X[i], datagen.get_random_transform(X[i].shape))
          X_augmented.append(augmented_image)
          transformed_image_counter += 1
      else:
          X_augmented.append(X_train[i])

  # Convert augmented data to array
  X_train_augmented = np.array(X_augmented)

  return np.array(X_augmented), transformed_image_counter

X_train, transformed_image_counter = transformer_func(X_train)

print(f'{transformed_image_counter} images transformed.')

In [None]:
X_train.shape

In [None]:
from keras.utils import to_categorical

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)

In [None]:
# image_visualizer(X_train, y_train, 5)
# image_visualizer(X_train, y_train, 3005)
# image_visualizer(X_train, y_train, 662)
# image_visualizer(X_train, y_train, 2235)

# Model hyperparameter tuning

In [None]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import ReduceLROnPlateau

# cnn = Sequential()

# # Block 1
# cnn.add(Conv2D(115, (3, 3), activation='relu', padding='same', input_shape=(222, 296, 1)))
# cnn.add(Conv2D(115, (3, 3), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Block 2
# cnn.add(Conv2D(40, (3, 3), activation='relu', padding='same'))
# cnn.add(Conv2D(40, (3, 3), activation='relu'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Block 3
# cnn.add(Conv2D(25, (3, 3), activation='relu', padding='same'))
# cnn.add(Conv2D(25, (3, 3), activation='relu'))
# cnn.add(Conv2D(25, (2, 2), activation='relu'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Block 4
# cnn.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
# cnn.add(Conv2D(64, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(1, 1)))
# cnn.add(Conv2D(64, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Block 5
# cnn.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
# cnn.add(Conv2D(128, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Conv2D(128, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((1, 1), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Classification block
# cnn.add(Flatten())
# cnn.add(Dense(64, activation='relu'))
# cnn.add(BatchNormalization())
# cnn.add(Dropout(0.2))
# cnn.add(Dense(128, activation='relu'))
# cnn.add(BatchNormalization())
# cnn.add(Dropout(0.4))
# cnn.add(Dense(2, activation='sigmoid'))

# optimizer = Adam(learning_rate=0.001)
# # compile the model
# cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
# cnn.summary()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

cnn = Sequential()

# Block 1
cnn.add(Conv2D(115, (4, 4), strides=(2, 2), activation='relu', padding='same', input_shape=(222, 296, 1)))
cnn.add(Conv2D(115, (3, 3), strides=(2, 2), activation='relu', padding='same'))
cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
cnn.add(Dropout(0.1))

# Block 2
cnn.add(Conv2D(84, (3, 3), activation='relu', padding='same'))
cnn.add(Conv2D(64, (3, 3), activation='relu'))
cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
cnn.add(Dropout(0.1))

# Block 3
cnn.add(Conv2D(126, (3, 3), activation='relu', padding='same'))
cnn.add(Conv2D(126, (3, 3), activation='relu'))
cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
cnn.add(Dropout(0.1))

# # Block 4
# cnn.add(Conv2D(128, (3, 3), strides=(2, 2), activation='relu', padding='same'))
# cnn.add(Conv2D(128, (2, 2), strides=(2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# # Block 5
# cnn.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
# cnn.add(Conv2D(128, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((2, 2), strides=(2, 2)))
# cnn.add(Conv2D(128, (2, 2), activation='relu', padding='same'))
# cnn.add(MaxPooling2D((1, 1), strides=(2, 2)))
# cnn.add(Dropout(0.1))

# Classification block
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.3))
cnn.add(Dense(256, activation='relu'))
cnn.add(Dropout(0.4))
cnn.add(Dense(2, activation='sigmoid'))

optimizer = Adam(learning_rate=0.001)

# compile the model
cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
cnn.summary()

In [None]:
# del history
# gc.collect()

In [None]:
# Add ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(factor=0.1, patience=4, min_lr=0.00001)

history = cnn.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=45,
                    batch_size=80,
                    callbacks=[reduce_lr])

In [None]:
# # Train the model
# history = cnn.fit(X_train, y_train,
#                     validation_data=(X_val, y_val),
#                     epochs=45,
#                     batch_size=80)

In [None]:
cnn.save('/kaggle/working/model')

loss, accuracy = cnn.evaluate(X_test, y_test)
print('Loss:', loss)
print('Accuracy:', accuracy)

In [None]:
# val_acc_per_epoch = history.history['val_accuracy']
# best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
# print('Best epoch: %d' % (best_epoch,))