In [18]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn import preprocessing
import wandb
dataset = """C:\\Users\\divij\\Desktop\\ml\\project\\dataset"""

In [19]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Conv2D,Add,MaxPooling2D, Dense, BatchNormalization,Input,Flatten, Dropout,GlobalMaxPooling2D,Lambda
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

This Project is Divided Into 3 section Data Cleaning and Pre Processing

Section 1 : Data PreProcessing 

Check if GPU is Available

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Selected device:", device)

if device.type == "cuda":
    gpu_index = 0
    print("Selected GPU:", torch.cuda.get_device_name(gpu_index))
    torch.cuda.set_device(gpu_index)

SECTION 1.1 : Seting Up DataFrames

Train Data

In [None]:
paths = []
labels = []
for bird_type in os.listdir(dataset+"\\train"):
    cur_path = os.path.join(dataset+"\\train",bird_type)
    path_data = os.listdir(cur_path)
    paths.extend([os.path.join(cur_path,img) for img in path_data])
    labels.extend([bird_type]*len(path_data))

df_train = pd.DataFrame({"path":paths,"label":labels})

print("Shape of df_train is: ", df_train.shape)
df_train.head(10)

Validation DataFrame

In [None]:
paths = []
labels = []
for bird_type in os.listdir(dataset+"\\valid"):
    cur_path = os.path.join(dataset+"\\valid",bird_type)
    path_data = os.listdir(cur_path)
    paths.extend([os.path.join(cur_path,img) for img in path_data])
    labels.extend([bird_type]*len(path_data))

df_valid = pd.DataFrame({"path":paths,"label":labels})

print("Shape of df_valid is: ", df_valid.shape)
df_valid.head(10)

In [None]:
paths = []
labels = []
for bird_type in os.listdir(dataset+"\\test"):
    cur_path = os.path.join(dataset+"\\test",bird_type)
    path_data = os.listdir(cur_path)
    paths.extend([os.path.join(cur_path,img) for img in path_data])
    labels.extend([bird_type]*len(path_data))

df_test = pd.DataFrame({"path":paths,"label":labels})

print("Shape of df_test is: ", df_test.shape)
df_test.head(10)

No of Species of Brids are in the Dataset

In [None]:
print("How many species of birds are there in dataset: ", df_train['label'].nunique())

No of Lables

In [None]:
df_train['label'].value_counts()

Data Visualization Labels and Sample

In [None]:
labels = df_train['label']
label_counts = labels.value_counts()

plt.figure(figsize=(18, 6))
sns.barplot(x=label_counts.index, y=label_counts.values)
plt.title('Class Distribution of trainset')
plt.xlabel('Class Labels')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
labels = df_valid['label']
label_counts = labels.value_counts()

plt.figure(figsize=(18, 6))
sns.barplot(x=label_counts.index, y=label_counts.values)
plt.title('Class Distribution of validset')
plt.xlabel('Class Labels')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
labels = df_test['label']
label_counts = labels.value_counts()

plt.figure(figsize=(18, 6))
sns.barplot(x=label_counts.index, y=label_counts.values)
plt.title('Class Distribution of testset')
plt.xlabel('Class Labels')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
# mapping data
le = preprocessing.LabelEncoder()

df_train['label'] = df_train['label'].replace('PARAKETT  AUKLET', 'PARAKETT AUKLET')
df_test['label'] = df_test['label'].replace('PARAKETT  AUKLET', 'PARAKETT AUKLET')

print("Shape of df_train is: ", df_train.shape)
print("Shape of df_valid is: ", df_valid.shape)
print("Shape of df_test is: ", df_test.shape)

df_test.head(7)

View Images

In [None]:
def view_random_image(target_dir,target_class):
  # setting up the image directory
  target_folder = target_dir

  #read image and plotting it
  img = mpimg.imread(target_folder)
  plt.imshow(img)
  plt.title(target_class)
  plt.axis("off")

  print(f"Image shape: {img.shape}")
  print(f"Path : {target_folder}")
  
  return img

img = view_random_image(str(df_test['path'].values[99]), str(df_test['label'].values[99]))

Classification Model: 

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(525, activation='softmax'))

model.summary()

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)
valid_datagen = ImageDataGenerator(rescale = 1./255)

train_dir = dataset+"\\train"
test_dir = dataset+"\\test"
val_dir = dataset+"\\valid"


# data transfer from directories to batches
train_data = train_datagen.flow_from_directory(directory = train_dir,
                                               batch_size= 32,
                                               target_size= (224,224),
                                               class_mode = "categorical")

test_data = test_datagen.flow_from_directory(directory = test_dir,
                                               batch_size = 32,
                                               target_size = (224,224),
                                               class_mode = "categorical")

val_data = valid_datagen.flow_from_directory(directory = val_dir,
                                               batch_size = 32,
                                               target_size = (224,224),
                                               class_mode = "categorical")

In [None]:
steps_per_epoch = len(train_data)
validation_steps = int(0.25*len(val_data))
                       

# Training

In [None]:
# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="Comparing-Object-Recog-Models",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "Birds-525",
    "epochs": 10,
    }
)

In [None]:
model.compile(loss='categorical_crossentropy',
               optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_data,
           steps_per_epoch = len(train_data), epochs=20,
            batch_size=16,
            validation_data =val_data,
            validation_steps = int(0.25*len(val_data)))



In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_data)
print(f'Test Accuracy: {test_acc*100:.2f}%')