In [None]:
# Loading the dataset from google drive into google colab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Modules which used in the project
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from keras.applications.resnet50 import ResNet50
from tensorflow.keras import layers
from urllib.request import urlopen
import matplotlib.pyplot as plt
import tensorflow as tf
from io import BytesIO
from PIL import Image
import seaborn as sns
import pandas as pd
import numpy as np
import traceback
import pathlib
import os

In [None]:
# Reading the train.csv file and called the head function to load few lines of data.
train_df = pd.read_csv('/content/drive/MyDrive/Landmark_Recognition_Project/train.csv')
train_df.head()

In [None]:
# number of rows and columns in the dataframe
train_df.shape

In [None]:
# Feature data types
train_df.dtypes

In [None]:
# Looking for null values in the dataset
train_df.isna().sum()

In [None]:
train_df.isnull().values.any()

In [None]:
# Now try to open the URL
temp = 4444
print('id', train_df['id'][temp])
print('url:', train_df['url'][temp])
print('landmark id:', train_df['landmark_id'][temp])

In [None]:
# Cleaning the dataset as found out there are some 'NONE' strings in the urls. 
dropped_rows = train_df.loc[train_df['url'] == 'None'].index
dropped_rows

In [None]:
# Dropped the 'NONE' urls from the dataset
train_df.drop(dropped_rows, inplace = True)


In [None]:
# Trying to find out the unique landmark_id in the dataset
train_df['landmark_id'].value_counts().head(50)

In [None]:
# Occurance of landmark_id in decreasing order(Top categories)
temp = pd.DataFrame(train_df.landmark_id.value_counts().head(13))
temp.reset_index(inplace=True)
temp.columns = ['landmark_id','count']
temp

In [None]:
# Plot the most frequent landmark_ids
plt.figure(figsize = (9, 8))
plt.title('Most frequent landmarks')
sns.set_color_codes("pastel")
sns.barplot(x="landmark_id", y="count", data=temp,
            label="Count")
plt.show()

In [None]:
# Occurance of landmark_id in increasing order
temp = pd.DataFrame(train_df.landmark_id.value_counts().tail(13))
temp.reset_index(inplace=True)
temp.columns = ['landmark_id','count']
temp

In [None]:
# Plot the least frequent landmark_ids
plt.figure(figsize = (9, 8))
plt.title('Least frequent landmarks')
sns.set_color_codes("pastel")
sns.barplot(x="landmark_id", y="count", data=temp,
            label="Count")
plt.show()

In [None]:
# Mapping which landmark_id refers to the landmark.

key_map = { 
    "9633": "san_pietro_vatican_city",
    "6051": "colosseum_rome",
    "9779": "el_partal_spain", 
    "2061": "powder_tower_prague_czech", 
    "5554": "petronas_towers_malaysia",
    "5376": "rialto_bridge_venice",
    "6696": "national_museum_of_catalunya",
    "2743": "pantheon_rome",
    "4352": "alcatraz_california",
    "13526": "hofburg_vienna_austria",
    "1553": "berlin_cathedral_germany",
    "10900": "commerzbank_tower_frankfurt",
    "8063": "hagia_sophia_istanbul_turkey"
}

keys = list(key_map.keys())


In [None]:
# Trying to print how many urls are there in each landmark_id.
frames = {}

for elem in keys:
    frame = train_df.loc[train_df["landmark_id"] == elem]
    print(elem + " -> " + str(frame.shape))
    frames[elem] = frame

len(frames)
    

In [None]:
# Downloading images from the urls and loading them in local directory
base_directory = "/content/drive/MyDrive/Landmark_Recognition_Project/downloads"

train_directory = '{}/train'.format(base_directory)
test_directory = '{}/test'.format(base_directory)


train_count = 1600
test_count = 400

for key in keys:
    train_urls = frames[key]["url"][0:train_count].values # [0:1600] urls from the landmark_ids
    test_urls = frames[key]["url"][train_count:train_count+test_count].values #[1600: 2000] urls from landmark_ids
    
    # Download training images
    for index, url in enumerate(train_urls, start=1):
        folder_path = '{}/{}'.format(train_directory, key)
        filename = 'image_{}.jpg'.format(index )
        file_path = '{}/{}'.format(folder_path, filename)
        
        
        os.makedirs(folder_path, exist_ok=True)

        if os.path.exists(file_path):
            print('Image %s already exists. Skipping download.' % file_path)
            continue

        try:
            response = urlopen(url)
            image_data = response.read()
        except Exception as err:
            traceback.print_exc()
            print('Warning: Could not download image %s from %s' % (filename, url))
            continue

        try:
            pil_image = Image.open(BytesIO(image_data))
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to parse image %s' % filename)
            continue

        try:
            pil_image_rgb = pil_image.convert('RGB')
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to convert image %s to RGB' % filename)
            continue

        try:
            pil_image_rgb.save(file_path, format='JPEG', quality=90)
            print('Success: Saved image %s' % filename)
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to save image %s' % filename)
            continue
    
    # Download testing images
    for index, url in enumerate(test_urls, start=1):
        folder_path = '{}/{}'.format(test_directory, key)
        filename = 'image_{}.jpg'.format(index)
        file_path = '{}/{}'.format(folder_path, filename)
        
        os.makedirs(folder_path, exist_ok=True)

        if os.path.exists(file_path):
            print('Image %s already exists. Skipping download.' % file_path)
            continue

        try:
            response = urlopen(url)
            image_data = response.read()
        except Exception as err:
            traceback.print_exc()
            print('Warning: Could not download image %s from %s' % (filename, url))
            continue

        try:
            
            pil_image = Image.open(BytesIO(image_data))
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to parse image %s' % filename)
            continue

        try:
            pil_image_rgb = pil_image.convert('RGB')
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to convert image %s to RGB' % filename)
            continue

        try:
            pil_image_rgb.save(file_path, format='JPEG', quality=90)
            print('Success: Saved image %s' % file_path)
        except Exception as err:
            traceback.print_exc()
            print('Warning: Failed to save image %s' % filename)
            continue
    


In [None]:
# Building the input pipeline
base_directory = "/content/drive/MyDrive/Landmark_Recognition_Project/downloads/"
train_directory = '{}train/'.format(base_directory)
test_directory = '{}test/'.format(base_directory)

batch_size = 32
img_height = 224
img_width = 224

# Taking the images from the local directory and spliting 80% for training and 20% for validation
train_generator = tf.keras.preprocessing.image_dataset_from_directory(
  train_directory,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  labels="inferred",
  label_mode="int",
  batch_size=batch_size)

validation_generator = tf.keras.preprocessing.image_dataset_from_directory(
  train_directory,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  labels="inferred",
  label_mode="int",
  batch_size=batch_size)



In [None]:
# Finding the classes for my model.
class_names = train_generator.class_names
print(class_names)

In [None]:
# Visualizing the data
plt.figure(figsize=(20, 20))
for images, labels in train_generator.take(1):
    for i in range(13):
        ax = plt.subplot(4, 4, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(key_map[class_names[labels[i]]])
        plt.axis("off")

In [None]:
for image_batch, labels_batch in train_generator:
    print(image_batch.shape)
    print(labels_batch.shape)
    break


In [None]:
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_generator.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = validation_generator.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
# Number of classes used for my model
num_classes = 13

In [None]:
# Building model without using any pre- trained model.
model = tf.keras.Sequential([
  # First we have to rescale the image size as as CNN accepts values from 0 to 1.
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, activation = 'softmax')
])

In [None]:
# Compiling the model with optimizer 'adam' and loss function as sparse-category-crossentropy
model.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
# Building Model using ResNet50 Pre trained model as the base layer
my_new_model = Sequential()
my_new_model.add(ResNet50(include_top = False, weights = 'imagenet', pooling = 'avg'))
my_new_model.add(Dense(128, activation = 'relu'))
my_new_model.add(Dense(128, activation = 'relu'))
my_new_model.add(Dense(num_classes, activation = 'softmax'))
my_new_model.layers[0].trainable = False

In [None]:
# Compiling the model with optimizer 'adam' and loss function as sparse-category-crossentropy
my_new_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
# Fitting the model to the training data with epochs = 10
with_premodel = my_new_model.fit(train_generator,
        steps_per_epoch=len(train_generator),
        epochs=10,
        validation_data=validation_generator,
        validation_steps=len(validation_generator))

In [None]:
# loss
import matplotlib.pyplot as plt
plt.plot(with_premodel.history['loss'], label='train loss')
plt.plot(with_premodel.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('LossVal_loss')


In [None]:
# accuracies
plt.plot(with_premodel.history['accuracy'], label='train acc')
plt.plot(with_premodel.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()
plt.savefig('AccVal_acc')

In [None]:
# Saving the model 
my_new_model.save('/content/drive/MyDrive/Landmark_Recognition_Project/landmark-recognition-resnet-10epoch-new.model')

In [None]:
# Loading the model
trained_model = tf.keras.models.load_model('/content/drive/MyDrive/Landmark_Recognition_Project/landmark-recognition-resnet-10epoch-new.model')

In [None]:
# Using the newly created model to predict the new url
from tensorflow import keras

url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg"
path = tf.keras.utils.get_file('Red_sunflower', origin=url)

img = keras.preprocessing.image.load_img(
    path, target_size=(img_height, img_width)
)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch

predictions = trained_model.predict(img_array)
score = tf.nn.softmax(predictions[0])
predicted_key = class_names[np.argmax(score)]
print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(key_map[predicted_key], 100 * np.max(score))
)


In [None]:
# Trying to improve the model with giving 20 epoch values
with_premodel_20 = my_new_model.fit(train_generator,
        steps_per_epoch=len(train_generator),
        epochs=20,
        validation_data=validation_generator,
        validation_steps=len(validation_generator))

In [None]:
# loss
import matplotlib.pyplot as plt
plt.plot(with_premodel_20.history['loss'], label='train loss')
plt.plot(with_premodel_20.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('LossVal_loss')

In [None]:
# accuracies
plt.plot(with_premodel_20.history['accuracy'], label='train acc')
plt.plot(with_premodel_20.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()
plt.savefig('AccVal_acc')

In [None]:
# Saving the newly improved model
my_new_model.save('/content/drive/MyDrive/Landmark_Recognition_Project/landmark-recognition-resnet-20epoch.model')

In [None]:
# Loading the model
trained_model = tf.keras.models.load_model('/content/drive/MyDrive/Landmark_Recognition_Project/landmark-recognition-resnet-20epoch.model')

In [None]:
# For Testing the model used 9 images from my vacation pictures
demo_data_dir = pathlib.Path("/content/drive/MyDrive/Landmark_Recognition_Project/demo_data/")
demo_list = list(demo_data_dir.glob('*'))
image_count = len(demo_list)
image_count

In [None]:
# Testing the model
import PIL
from tensorflow import keras
import numpy as  np
from keras.applications.resnet50 import preprocess_input, decode_predictions

results = []

for path in demo_list:

    # path = tf.keras.utils.get_file('demo', origin=url)

    img = keras.preprocessing.image.load_img(
        path, target_size=(img_height, img_width)
    )
    img_array = keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0) # Create a batch

    # predictions = trained_model.predict(img_array)[0]
    # scores = tf.nn.softmax(predictions)

    # for prediction, score in list(zip(predictions, scores)):
      
    #   predicted_key = class_names[np.argmax(score)]
    #   print("    prediction {} - score {} - key {}".format(prediction, score, key_map[predicted_key]))


    # decoded_predictions = decode_predictions(predictions)
    # predict_dict[path] = decoded_predictions
    # print(decoded_predictions)

    predictions = trained_model.predict(img_array)
    score = tf.nn.softmax(predictions[0])
    predicted_key = class_names[np.argmax(score)]
    confidence =  100 * np.max(score)
    results.append((img, key_map[predicted_key], confidence))
    print(
        "This image most likely is from {} with a {:.2f} percent confidence."
        .format(key_map[predicted_key],confidence)
    )




In [None]:
# Visualizing testing data

plt.figure(figsize=(16, 16))
for i, (image, label, confidence) in enumerate(results):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image)
    title = "{} ({:.2f}%)".format(label, confidence)
    plt.title(title)
    plt.axis("off")


In [None]:
# Final Demonstration, using a live url from bing search and trying to predict the landmark of that image.
import PIL
from tensorflow import keras
import numpy as  np
from keras.applications.resnet50 import preprocess_input, decode_predictions


url = "https://th.bing.com/th/id/OIP.2zrAqdG7Kf6mQ62NxtGpqAHaE9?w=252&h=180&c=7&o=5&dpr=2&pid=1.7"
path = tf.keras.utils.get_file('demo', origin=url)

img = keras.preprocessing.image.load_img(
      path, target_size=(img_height, img_width)
      )
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch


predictions = trained_model.predict(img_array)
score = tf.nn.softmax(predictions[0])
predicted_key = class_names[np.argmax(score)]
confidence =  100 * np.max(score)
print(
      "This image most likely is from {} with a {:.2f} percent confidence."
      .format(key_map[predicted_key],confidence)
    )


