In [1]:
#pip install Augmentor 

In [3]:
#pip install tensorflow

In [5]:
import os
import shutil
import random

In [7]:
# Paths
raw_data_dir = 'dataset'
split_data_dir = 'split_dataset'

# Split ratios
train_ratio = 0.70
val_ratio = 0.15
test_ratio = 0.15

In [9]:
# Class names
class_names = ['cardboard', 'glass', 'ied', 'metal', 'paper', 'plastic', 'trash']

In [11]:
# Create directories for split dataset
def create_dirs(base_path):
    os.makedirs(base_path, exist_ok=True)
    os.makedirs(os.path.join(base_path, 'train'), exist_ok=True)
    os.makedirs(os.path.join(base_path, 'val'), exist_ok=True)
    os.makedirs(os.path.join(base_path, 'test'), exist_ok=True)
    
    for class_name in class_names:
        os.makedirs(os.path.join(base_path, 'train', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_path, 'val', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_path, 'test', class_name), exist_ok=True)

In [13]:
# Split images into train, val, test sets
def split_images():
    for class_name in class_names:
        class_dir = os.path.join(raw_data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
        
        images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
        random.shuffle(images)
        
        total_images = len(images)
        train_count = int(total_images * train_ratio)
        val_count = int(total_images * val_ratio)
        
        train_images = images[:train_count]
        val_images = images[train_count:train_count + val_count]
        test_images = images[train_count + val_count:]
        
        # Move images to respective directories
        for img in train_images:
            shutil.move(os.path.join(class_dir, img), os.path.join(split_data_dir, 'train', class_name, img))
        for img in val_images:
            shutil.move(os.path.join(class_dir, img), os.path.join(split_data_dir, 'val', class_name, img))
        for img in test_images:
            shutil.move(os.path.join(class_dir, img), os.path.join(split_data_dir, 'test', class_name, img))

# Run the process
create_dirs(split_data_dir)
split_images()

print("Dataset has been split and moved to 'split_dataset'.")

Dataset has been split and moved to 'split_dataset'.


In [15]:
import time # used to measure model training time
exp_start_time = time.time()
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
import os
from skimage import io
from PIL import Image
from pathlib import Path
import Augmentor

from sklearn.metrics import confusion_matrix, classification_report, \
        ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from tensorflow.keras import applications
from keras import optimizers, layers, models
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, \
    Flatten, Dense, Dropout, Input, Lambda, LeakyReLU, PReLU, Activation

In [17]:
# Define base directory and class names
base_dir = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset'
class_names = ['cardboard', 'glass', 'ied', 'metal', 'paper', 'plastic', 'trash']

# Function to generate local paths
def generate_paths(base_path, classes):
    paths = {}
    
    for split in ['train', 'val', 'test']:
        for class_name in classes:
            path = os.path.join(base_path, split, class_name)
            paths[path] = os.path.exists(path)  # Check if path exists
    
    return paths

# Generate and print paths
paths = generate_paths(base_dir, class_names)
paths

{'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\cardboard': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\glass': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\ied': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\metal': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\paper': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\plastic': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\train\\trash': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\val\\cardboard': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\val\\glass': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\val\\ied': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\val\\metal': True,
 'C:\\Users\\Ashish\\Assignment1\\I2Net-main\\split_dataset\\val\\paper': True,
 'C:\\Users\\Ashish\

In [19]:
# Read datasets from local directories as train/val/test sets
train_dir = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\ied'
val_dir = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\ied'
test_dir = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\ied'

train_cardboard = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\cardboard'
val_cardboard = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\cardboard'
test_cardboard = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\cardboard'

train_glass = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\glass'
val_glass = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\glass'
test_glass = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\glass'

train_metal = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\metal'
val_metal = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\metal'
test_metal = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\metal'

train_paper = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\paper'
val_paper = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\paper'
test_paper = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\paper'

train_plastic = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\plastic'
val_plastic = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\plastic'
test_plastic = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\plastic'

train_trash = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\trash'
val_trash = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\trash'
test_trash = r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\trash'

In [21]:
# Training directories
train_dirs = [
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\cardboard',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\glass',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\metal',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\paper',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\plastic',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train\trash'
]

# Validation directories
val_dirs = [
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\cardboard',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\glass',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\metal',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\paper',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\plastic',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val\trash'
]

# Test directories
test_dirs = [
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\cardboard',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\glass',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\metal',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\paper',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\plastic',
    r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test\trash'
]

In [23]:
def count_images_in_dirs(dirs):
    total_images = 0
    for dir_path in dirs:
        total_images += len(os.listdir(dir_path))
    return total_images

## Set general model hyperparameters

In [25]:
# General model hyperparameters
img_height = 224
img_width = 224
num_channels = 3  # 3 for RGB images, 1 for grayscale
batchSize = 128    # Batch size
epochs = 5         # Number of epochs
N = 0              # Number of neurons in the first layer of the top layer
dropout_rate = 0   # Dropout rate (0 means no dropout)
dropout_seed = 200 # Optional seed for dropout

# Convolutional parameters
pool_size = (2, 2)
pool_strides = (2, 2)
kernel_size = (3, 3)
conv2D_strides = (1, 1)

# Target class names
target_names = ['cardboard', 'glass', 'ied', 'metal', 'paper', 'plastic', 'trash']

## Define the optimizer for the training

In [27]:
# Stochastic Gradient Descent (SGD) optimizer
optimizer = tf.keras.optimizers.SGD(
    learning_rate=0.05,
    momentum=0.9,
    clipvalue=1.0,
    nesterov=True
)

## Activation function

In [29]:
activation_function = Activation('relu')

## Define functions for data standardization

In [31]:
# Create the image data generator to standardize the images
datagen = ImageDataGenerator(
    rescale=1./255,                  # Scale pixel values to [0, 1]
    featurewise_center=True,         # Center images by mean pixel value
    featurewise_std_normalization=True, # Standardize images by removing mean and scaling to unit variance
)

# Function to load and preprocess images
def load_and_preprocess_images(dataset_path, img_height, img_width, img_ext='jpg'):
    from pathlib import Path
    from PIL import Image
    import numpy as np

    def read_pil_image(img_path, img_height, img_width):
        with open(img_path, 'rb') as f:
            return np.array(Image.open(f).convert('RGB').resize((img_width, img_height)))

    return np.array([read_pil_image(str(p), img_height, img_width) for p in Path(dataset_path).rglob("*."+img_ext)])

### Perform standardization and normalization of non-augmented dataset

In [33]:
# Load images for training and validation
train_images = load_and_preprocess_images(train_dirs[0], img_height, img_width)
val_images = load_and_preprocess_images(val_dirs[0], img_height, img_width)

# Fit the data generator to the training dataset
datagen.fit(train_images)

# Example of how to use the data generator
train_generator = datagen.flow_from_directory(
    directory=r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\train',
    target_size=(img_height, img_width),
    batch_size=batchSize,
    class_mode='categorical'
)

val_generator = datagen.flow_from_directory(
    directory=r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\val',
    target_size=(img_height, img_width),
    batch_size=batchSize,
    class_mode='categorical'
)

test_generator = datagen.flow_from_directory(
    directory=r'C:\Users\Ashish\Assignment1\I2Net-main\split_dataset\test',
    target_size=(img_height, img_width),
    batch_size=batchSize,
    class_mode='categorical'
)

Found 1882 images belonging to 7 classes.
Found 401 images belonging to 7 classes.
Found 410 images belonging to 7 classes.


## Function to compute the classification performance

In [37]:
def classification_performance(y_test, y_pred):
    #tp, fp, fn, tn = confusion_matrix(y_test, y_pred).ravel()
    accuracy = round(accuracy_score(y_true = y_test, y_pred = y_pred, normalize=True, sample_weight=None),2)
    precision = round(precision_score(y_true = y_test, y_pred = y_pred, average='weighted'),2)
    recall = round(recall_score(y_true = y_test, y_pred = y_pred, average='weighted'),2)
    f1_score = round(2*precision*recall/(precision + recall),2)    
    auc_roc = round(roc_auc_score(y_score = y_pred, y_true = y_test),2)

    result = pd.DataFrame({'Accuracy' : [accuracy],
                         'Precision (or PPV)' : [precision],
                         'Recall (senitivity or TPR)' : [recall],
                         'f1 score' : [f1_score],
                         'AUC_ROC' : [auc_roc],
                         })
    return result

## Define a function for the top layer in transfer learning

In [39]:
def new_top_layers(input_model):
    output_model = Sequential()
    output_model.add(input_model)
    output_model.add(Flatten())
    output_model.add(Dense(N,  # number of neurons in first layer of top layer
        use_bias=True,
        kernel_initializer=kInitializer,
        bias_initializer=bInitializer,
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None))
    output_model.add(activation_function)
    output_model.add(layers.Dropout(rate=dropout_rate, seed=dropout_seed)) # add dropout to mimize overfitting
    
    output_model.add(Dense(7, # layer has only 7 neurons for seven classes
        use_bias=True,
        kernel_initializer=kInitializer,
        bias_initializer=bInitializer,
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None, 
        activation='softmax')) 
    return output_model

In [78]:
def new_top_layers(input_model):
    output_model = Sequential()
    output_model.add(input_model)
    output_model.add(Flatten())
    output_model.add(Dense(N,  # number of neurons in first layer of top layer
        use_bias=True,
        kernel_initializer=kInitializer,
        bias_initializer=bInitializer,
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None))
    output_model.add(activation_function)
    output_model.add(layers.Dropout(rate=dropout_rate, seed=dropout_seed)) # add dropout to mimize overfitting
    
    output_model.add(Dense(7, # layer has only 7 neurons for seven classes
        use_bias=True,
        kernel_initializer=kInitializer,
        bias_initializer=bInitializer,
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None, 
        activation='softmax')) 
    return output_model

## TL 1: Transfer Learning with MobileNetV2

In [72]:
# Create the pretrained model as the base model
from keras.applications.mobilenet_v2 import MobileNetV2 # import the pre-trained model

base_model_MblNetV2 = MobileNetV2(input_shape=(img_height, img_width, 3), 
                include_top=False, # ideal for feature extraction
                weights='imagenet' )

base_model_MblNetV2.trainable = False # Prevent the weights of the base model from being updated before compiling & training the model

In [74]:
# Find number of model parameters
base_model_MblNetV2.summary()

In [62]:

num_classes = 10
# Define the Sequential model
model = Sequential([
    # Add convolutional layer
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Add more convolutional layers if needed
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Flatten the output of the convolutional layers
    Flatten(),
    
    # Add dense layers
    Dense(128, activation='relu'),
    Dropout(0.5),  # Add dropout for regularization
    
    # Output layer
    Dense(num_classes, activation='softmax')
])


In [64]:
model.summary()

In [68]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [70]:
#Fit the model to the dataset
MNv2_st = time.time()
history = MblNetV2_model.fit(
    train_generator,
    steps_per_epoch = steps_per_epoch,
    epochs = epochs,
    #workers = 4,
    validation_data = validation_generator,
    validation_steps = validation_steps)
MNv2_et = time.time()
MNv2_training_time = MNv2_et - MNv2_st
print('Model training time in seconds: ', MNv2_training_time)

NameError: name 'steps_per_epoch' is not defined