In [42]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
from tensorflow.keras.layers import Input,Dense,Flatten
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob

In [6]:
data_dir = '../input/csc4851-homework4/birds_400'  
print(f'Directories: {os.listdir(data_dir)}')
classes = os.listdir(data_dir + "/train")
print(f'Number of classes: {len(classes)}')

In [None]:
import torchvision.transforms as tt
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = tt.Compose([tt.RandomCrop(224, padding=15, padding_mode='reflect'),
                         tt.RandomHorizontalFlip(),
                         tt.ToTensor(),
                         tt.Normalize(*stats,inplace=True)])   
valid_tfms = tt.Compose([tt.ToTensor(),
                         tt.Normalize(*stats)])

In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
train_ds = ImageFolder(data_dir+'/train', train_tfms) 
valid_ds = ImageFolder(data_dir+'/valid', valid_tfms) 
test_ds = ImageFolder(data_dir+'/test', valid_tfms) 

In [7]:
def dataset_info(dataset):
    print(f'Size of dataset: {len(dataset)}')
    img, label = dataset[0]
    print(f'Sample-01 Image size: {img.shape}, Label: {label}')
    print(f'Number of classes: {len(dataset.classes)}\n\n')

print('Train Dataset')
dataset_info(train_ds)
print('Validation Dataset')
dataset_info(valid_ds)
print('Test Dataset')
dataset_info(test_ds)

In [8]:
from tqdm import tqdm
from cv2 import imread
import os
def get_counts(dataset_path,dataset_type):
    all_species_count = len(os.listdir(dataset_path))
    all_species_names=[]
    species_image_count=[]
    all_heights=[]
    all_widths=[]
    
    for i in tqdm(os.listdir(dataset_path)):
        all_species_names.append(i)
        species_folder_path = dataset_path + "/" + i + "/"
        species_image_count.append(len(os.listdir(species_folder_path)))
        for j in os.listdir(species_folder_path):
            filename = species_folder_path + j
            image = imread(filename)
            all_heights.append(image.shape[0])
            all_widths.append(image.shape[1])
    print()
    print(f"Total no. of species in {dataset_type}= {all_species_count}")
    return all_species_names,species_image_count,all_heights,all_widths

In [9]:
train_species_names,train_species_image_count,train_images_heights,train_images_widths = get_counts(dataset_path=data_dir+"/train/",dataset_type="train")

In [10]:
val_species_names,val_species_image_count,val_images_heights,val_images_widths = get_counts(dataset_path=data_dir+"/valid",dataset_type="validation")

In [11]:
test_species_names,test_species_image_count,test_images_heights,test_images_widths = get_counts(dataset_path=data_dir+"/test/",dataset_type="test")

In [12]:
check = sorted(train_species_names) == sorted(val_species_names) == sorted(test_species_names)
print("Are all species names same in train, validation & test datasets? -->",check)

In [13]:
for i in train_species_names:
    
    if (i not in val_species_names) or (i not in test_species_names):
        print(i)

print("Performing set difference by subtracting the validation species names from train:-")
set(train_species_names).difference(set(val_species_names))

print("Performing set difference by subtracting the train species names from validation:-")
set(val_species_names).difference(set(train_species_names))

print("Performing set difference by subtracting the train species names from test:-")
set(test_species_names).difference(set(train_species_names))

In [14]:
os.rename(src=data_dir+"/train/BLACK & YELLOW  BROADBILL",dst=data_dir+"/train/BLACK & YELLOW BROADBILL")

In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.subplot(1,2,1)
sns.lineplot(data=train_images_heights)
plt.title("all image heights")
plt.xlabel("Height")

plt.subplot(1,2,2)
sns.lineplot(data=train_images_widths)
plt.title("all image widths")
plt.xlabel("Width")

In [18]:
IMAGE_SIZE = [224, 224]

In [19]:
train_path = '../input/csc4851-homework4/birds_400/train'
valid_path = '../input/csc4851-homework4/birds_400/valid'
test_path = '../input/csc4851-homework4/birds_400/test'

In [20]:
xcept = Xception(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

In [23]:
for layer in xcept.layers:
    layer.trainable = False

folders = glob('../input/csc4851-homework4/birds_400/train/*')
x = Flatten()(xcept.output)

x = layers.Dense(256, 'relu', kernel_initializer='he_normal')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

prediction = Dense(len(folders), activation='softmax')(x)

model = Model(inputs=xcept.input, outputs=prediction)

model.summary()

In [24]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [25]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

valid_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory('../input/csc4851-homework4/birds_400/train',
                                                 target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')
valid_set = valid_datagen.flow_from_directory('../input/csc4851-homework4/birds_400/valid',
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')

test_set = test_datagen.flow_from_directory('../input/csc4851-homework4/birds_400/test',
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')

In [None]:
r = model.fit(training_set,validation_data=valid_set,epochs=10,steps_per_epoch=len(training_set),validation_steps=len(valid_set))

In [None]:
model.evaluate(test_set)

In [None]:
def plot_accuracies(history):
    accuracies = [x['val_accuracy'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs')

In [None]:
import tensorflow as tf
from keras.models import load_model
model.save('xception.h5')

In [None]:
accuracies=r.history['accuracy']
plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Accuracy vs. No. of epochs');

In [None]:
train_losses = r.history['loss']
val_losses = r.history['val_loss']
plt.plot(train_losses, '-bx')
plt.plot(val_losses, '-rx')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Training', 'Validation'])
plt.title('Loss vs. No. of epochs')

In [None]:
from sklearn.metrics import log_loss
from keras import losses
from glob import glob
from keras.preprocessing import image
def prepare(img_path):
    img = image.load_img(img_path, target_size=(224,224))
    x = image.img_to_array(img)
    x = x/224
    return np.expand_dims(x, axis=0)

cce = losses.CategoricalCrossentropy()

In [None]:
template = [0]*400
log_loss = {}
test_images = glob("/kaggle/input/csc4851-homework4/birds_400/test/" + "*/*.jpg")

for path in test_images:
    result = model.predict([prepare(path)])
    actual_class = path.split('/')[-2]
    actual_class = "BLACK & YELLOW  BROADBILL" if actual_class == "BLACK & YELLOW BROADBILL" else actual_class
    actual_index = classes.index(actual_class)
    template[actual_index] = 1
    log_loss_current = cce(template, result[0]).numpy()
    if actual_index in log_loss:
        log_loss[actual_index] += (log_loss_current)/100
    else:
        log_loss[actual_index] = (log_loss_current)/100
    template[actual_index] = 0

In [None]:
log_loss

In [None]:
import csv
ids = list(log_loss.keys())
values = list(log_loss.values())
f = open('submission.csv', 'w')
writer = csv.writer(f)
writer.writerow(['id','birds'])
for index in range(len(ids)):
    writer.writerow([ids[index],values[index]])

In [None]:
ls