# Imports and Setup

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

#Seed for making reproducible experiments
seed = 61299
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#import tensorflow and print the version
import tensorflow as tf
print(tf.__version__)

# Preprocessing the Data

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

INPUT_WIDTH = 256
INPUT_HEIGHT = 256

In [None]:
def get_resized(im, w=INPUT_WIDTH, h=INPUT_HEIGHT):
    """
        Fit an image to the designated input dimensions, 
            padding the edges with black pixels.
        Given: 
            im - PIL Image to resize
        Return:
            the resized PIL Image
    """
    # create empty background and add to background
    #   (instead of padding)
    background = Image.new("RGB", (w, h))
    
    # get ratios to background dims
    w_r = im.width / w
    h_r = im.height / h
    aspect = im.width / im.height
    
    # use largest ratio as the longest edge of background
    if w_r > h_r:
        width = w
        height = int(w / aspect)
    else:
        width = int(h * aspect)
        height = h
    
    resized = im.resize((width, height))
    
    # add resized image to background, centered
    background.paste(
        resized,
        ((w - width) // 2, 
         (h - height) // 2))
    return background

In [None]:
## get_resized() usage:

# load a local image
path = '../input/demo-data/eiffel.jpg'
img = Image.open(path)

# resize and show
resized = get_resized(img)
img_a = np.asarray(resized)
plt.imshow(img_a)
print(img_a.shape)

In [None]:
#Added by Chirag
#the entire bulk of the data has been imported as you can see in the data section 

# General packages
import pandas as pd
import numpy as np

from IPython.display import Image, display
import warnings
warnings.filterwarnings("ignore")

BASE_PATH = '../input/landmark-recognition-2020'

TRAIN_DIR = f'{BASE_PATH}/train'
TEST_DIR = f'{BASE_PATH}/test'

print('Reading data...')
train = pd.read_csv(f'{BASE_PATH}/train.csv')
submission = pd.read_csv(f'{BASE_PATH}/sample_submission.csv')
print('Reading data completed')

#In the below below three cells three landmakrs with roughly 1100 images each have been chsoen to build our base model

In [None]:
#landmark 1 with ID 113209
landmark1 = train[train.landmark_id == 113209]
landmark1.head()

In [None]:
#landmark 1 with ID 177870
landmark2 = train[train.landmark_id == 177870]
landmark2.head()

In [None]:
#landmark 1 with ID 194914
landmark3 = train[train.landmark_id == 194914]
landmark3.head()

In [None]:
#First merge the above threee dataframes 
#Since the original index is retained we shuffle them, drop the index and selecet the columns we require
train = pd.concat([landmark1, landmark2, landmark3]).sample(frac=1).reset_index()[["id","landmark_id"]]
train.head()

In [None]:
#function taken from https://www.kaggle.com/rohitsingh9990/glr-eda-all-you-need-to-know
#this is jsut for visualziation 
import PIL
from PIL import Image, ImageDraw


def display_images(images, title=None): 
    f, ax = plt.subplots(5,5, figsize=(18,22))
    if title:
        f.suptitle(title, fontsize = 30)

    for i, image_id in enumerate(images):
        image_path = os.path.join(TRAIN_DIR, f'{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg')
        image = Image.open(image_path)
        
        ax[i//5, i%5].imshow(image) 
        image.close()       
        ax[i//5, i%5].axis('off')

        landmark_id = train[train.id==image_id.split('.')[0]].landmark_id.values[0]
        ax[i//5, i%5].set_title(f"ID: {image_id.split('.')[0]}\nLandmark_id: {landmark_id}", fontsize="12")

    plt.show() 

In [None]:
# pick random 25 images from the dataset and print
samples = train.sample(25).id.values
display_images(samples)

In [None]:
# added by Cihan
# yet to be tested

def collect_sample(w=INPUT_WIDTH, h=INPUT_HEIGHT, landmarks):
    #input of this function is weight, height and a list of landmarks and it returns a numpy array of all
    # training examples of landmarks at given width and height
    
    
    train = pd.read_csv(f'{BASE_PATH}/train.csv')
    landmarks_df = train[train.landmark_id == landmarks[0]]
    for landmark in landmarks:
        landmarks.append
    
    

In [None]:
# Converting the data into a Tensorflow Dataset
# Added by Saber

BASE_PATH = '../input/landmark-recognition-2020'

TRAIN_DIR = f'{BASE_PATH}/train'
TEST_DIR = f'{BASE_PATH}/test'

train_csv = pd.read_csv(f'{BASE_PATH}/train.csv')


In [None]:
# Adapted from https://cs230.stanford.edu/blog/datapipeline/
import os
import tensorflow as tf

def parse_function(filename, label, 
                   img_dim=256):
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)

    #This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)

    resized_image = tf.image.resize(image, [img_dim, img_dim])
    return resized_image, label
    

In [None]:
# Prepare the filenames and labels
path='/kaggle/input/landmark-recognition-2020/train'

n_samples = 256
image_ids = list(train_csv['id'][:n_samples])
filenames = [os.path.join(path, f'{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg') for image_id in image_ids]
labels = list(train_csv['landmark_id'][:n_samples])
del image_ids
batch_size = 256

In [None]:
# Create the Dataset object
with tf.device('/cpu:0'):
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.shuffle(len(filenames))
    dataset = dataset.map(parse_function, num_parallel_calls=4)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(1)

In [None]:
# for one_element in dataset:
#     print(one_element)

# Neural Networks Architectures

In [None]:
from sklearn.preprocessing import LabelBinarizer

#to train a simple example of a CNN importing MNIST dataset from keras
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
label_binarizer = LabelBinarizer()

#encode the labels to one hot vector
train_labels = label_binarizer.fit_transform(train_labels)
test_labels = label_binarizer.transform(test_labels)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Softmax, Conv2D, MaxPooling2D, AveragePooling2D

#define the sequential neural network
model = Sequential([
    Conv2D(16,(3,3),padding = 'SAME', activation = 'relu', input_shape = (28,28,1),data_format = 'channels_last'),
    MaxPooling2D((3,3)),
    Flatten(input_shape = (28,28)),
    Dense(16,activation = 'relu'),
    Dense(10,activation = 'softmax')
])

#print the summary of the network
model.summary()

In [None]:
#set up compiling options
opt = tf.keras.optimizers.Adam()
mae = tf.keras.metrics.MeanAbsoluteError()
model.compile(optimizer = opt,
             loss = 'categorical_crossentropy',
             metrics = [mae]
             )

## LeNet-5

In [None]:
leNet5 = Sequential([
    Conv2D(6,(5,5), activation = 'tanh',padding = 'SAME', input_shape = (28,28,1),data_format = 'channels_last'),
    AveragePooling2D((2,2)),
    Conv2D(16,(5,5), activation = 'tanh'),
    AveragePooling2D((2,2)),
    Conv2D(120,(5,5), activation = 'tanh'),
    Flatten(),
    Dense(84,activation = 'tanh'),
    Dense(10,activation = 'softmax')
])
leNet5.summary()

opt = tf.keras.optimizers.Adam()
mae = tf.keras.metrics.MeanAbsoluteError()
leNet5.compile(optimizer = opt,
             loss = 'categorical_crossentropy',
             metrics = [mae]
             )

history = leNet5.fit(train_images[...,np.newaxis],train_labels,epochs = 1, batch_size = 256)

## AlexNet

In [None]:
alexNet = Sequential([
    Conv2D(96,(11,11), activation = 'relu',padding = 'VALID', stride = 4, input_shape = (227,227,3), data_format = 'channels_last'),
    MaxPooling2D((3,3), stride = 2),
    Conv2D(256,(5,5), padding = 'SAME', activation = 'relu'),
    MaxPooling2D((3,3)),
    Conv2D(120,(5,5), activation = 'tanh'),
    Conv2D(256,(5,5), padding = 'SAME', activation = 'relu'),
    Conv2D(256,(5,5), padding = 'SAME', activation = 'relu'),
    Conv2D(256,(5,5), padding = 'SAME', activation = 'relu'),
    Flatten(),
    Dense(84,activation = 'tanh'),
    Dense(10,activation = 'softmax')
])
alexNet.summary()

opt = tf.keras.optimizers.Adam()
mae = tf.keras.metrics.MeanAbsoluteError()
alexNet.compile(optimizer = opt,
             loss = 'categorical_crossentropy',
             metrics = [mae]
             )

history = alexNet.fit(train_images[...,np.newaxis],train_labels,epochs = 1, batch_size = 256)

## GoogleLeNet

## VGG-16

## ResNet

## Xception

## SENet

## Spatial Pyramid Pooling

# Training, Diagnosing and Evaulating the Neural Networks

In [None]:
#train the model
history = model.fit(train_images[...,np.newaxis],train_labels,epochs = 1, batch_size = 256)