<a href="https://colab.research.google.com/github/bkcaleb/Personal_Projects/blob/main/Localization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
import re
from PIL import Image
import shutil
import random
import matplotlib.pyplot as plt
import xml.etree.ElementTree as et
from matplotlib.patches import Rectangle

In [2]:
import gdown
url = 'https://drive.google.com/uc?id=1dIR9ANjUsV9dWa0pS9J0c2KUGMfpIRG0'
fname = 'oxford_pet.zip'
gdown.download(url,fname,quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1dIR9ANjUsV9dWa0pS9J0c2KUGMfpIRG0
To: /content/oxford_pet.zip
100%|██████████| 811M/811M [00:13<00:00, 61.2MB/s]


'oxford_pet.zip'

In [3]:
!ls -l


total 791576
-rw-r--r-- 1 root root 810565619 Jan  2 11:06 oxford_pet.zip
drwxr-xr-x 1 root root      4096 Dec  3 14:33 sample_data


In [4]:
!unzip -q oxford_pet.zip -d oxford_pet

In [5]:
!ls oxford_pet


annotations  images


In [6]:
cur_dir = os.getcwd()
data_dir = os.path.join(cur_dir,'oxford_pet')
image_dir = os.path.join(data_dir,'images')
bbox_dir = os.path.join(data_dir, 'annotations', 'xmls')
seg_dir = os.path.join(data_dir, 'annotations', 'trimaps')

In [7]:
image_files = [fname for fname in os.listdir(image_dir) if os.path.splitext(fname)[-1] == '.jpg']
print(len(image_files))

7390


In [8]:
bbox_files = [fname for fname in os.listdir(bbox_dir) if os.path.splitext(fname)[-1] == '.xml']
print(len(bbox_files))

3686


In [9]:
for image_file in image_files:
  image_path = os.path.join(image_dir,image_file)
  bbox_file = os.path.splitext(image_file)[0]+'.xml'
  bbox_path = os.path.join(bbox_dir, bbox_file)
  image = Image.open(image_path)
  image_mode = image.mode
  if image_mode != 'RGB':
    print(image_file,image_mode)
    image = np.asarray(image)
    print(image.shape)
    os.remove(image_path)
    try:
      os.remove(bbox_path)
    except FileNotFoundError:
      pass

Egyptian_Mau_191.jpg P
(214, 300)
Abyssinian_34.jpg P
(202, 250)
Egyptian_Mau_14.jpg RGBA
(800, 582, 4)
staffordshire_bull_terrier_2.jpg L
(282, 500)
Egyptian_Mau_145.jpg P
(188, 216)
Egyptian_Mau_129.jpg L
(325, 299)
Egyptian_Mau_186.jpg RGBA
(275, 183, 4)
Egyptian_Mau_167.jpg P
(275, 183)
staffordshire_bull_terrier_22.jpg L
(500, 364)
Abyssinian_5.jpg RGBA
(150, 200, 4)
Egyptian_Mau_139.jpg P
(250, 350)
Egyptian_Mau_177.jpg P
(175, 246)


In [10]:
image_files = [fname for fname in os.listdir(image_dir) if os.path.splitext(fname)[-1] == '.jpg']
print(len(image_files))
bbox_files = [fname for fname in os.listdir(bbox_dir) if os.path.splitext(fname)[-1] == '.xml']
print(len(bbox_files))

7378
3685


In [11]:
class_list = set()
for image_file in image_files:
  file_name = os.path.splitext(image_file)[0]
  class_name = re.sub('_\d+','',file_name)
  class_list.add(class_name)
class_list = list(class_list)
print(len(class_list))

37


In [12]:
class_list.sort()
class_list

['Abyssinian',
 'Bengal',
 'Birman',
 'Bombay',
 'British_Shorthair',
 'Egyptian_Mau',
 'Maine_Coon',
 'Persian',
 'Ragdoll',
 'Russian_Blue',
 'Siamese',
 'Sphynx',
 'american_bulldog',
 'american_pit_bull_terrier',
 'basset_hound',
 'beagle',
 'boxer',
 'chihuahua',
 'english_cocker_spaniel',
 'english_setter',
 'german_shorthaired',
 'great_pyrenees',
 'havanese',
 'japanese_chin',
 'keeshond',
 'leonberger',
 'miniature_pinscher',
 'newfoundland',
 'pomeranian',
 'pug',
 'saint_bernard',
 'samoyed',
 'scottish_terrier',
 'shiba_inu',
 'staffordshire_bull_terrier',
 'wheaten_terrier',
 'yorkshire_terrier']

In [13]:
class2idx = {cls:idx for idx,cls in enumerate(class_list)}
class2idx

{'Abyssinian': 0,
 'Bengal': 1,
 'Birman': 2,
 'Bombay': 3,
 'British_Shorthair': 4,
 'Egyptian_Mau': 5,
 'Maine_Coon': 6,
 'Persian': 7,
 'Ragdoll': 8,
 'Russian_Blue': 9,
 'Siamese': 10,
 'Sphynx': 11,
 'american_bulldog': 12,
 'american_pit_bull_terrier': 13,
 'basset_hound': 14,
 'beagle': 15,
 'boxer': 16,
 'chihuahua': 17,
 'english_cocker_spaniel': 18,
 'english_setter': 19,
 'german_shorthaired': 20,
 'great_pyrenees': 21,
 'havanese': 22,
 'japanese_chin': 23,
 'keeshond': 24,
 'leonberger': 25,
 'miniature_pinscher': 26,
 'newfoundland': 27,
 'pomeranian': 28,
 'pug': 29,
 'saint_bernard': 30,
 'samoyed': 31,
 'scottish_terrier': 32,
 'shiba_inu': 33,
 'staffordshire_bull_terrier': 34,
 'wheaten_terrier': 35,
 'yorkshire_terrier': 36}

In [14]:
bbox_files[:20]

['Sphynx_143.xml',
 'yorkshire_terrier_106.xml',
 'pug_114.xml',
 'miniature_pinscher_187.xml',
 'american_bulldog_184.xml',
 'german_shorthaired_161.xml',
 'havanese_145.xml',
 'havanese_136.xml',
 'Siamese_183.xml',
 'beagle_144.xml',
 'Bombay_140.xml',
 'pomeranian_152.xml',
 'miniature_pinscher_12.xml',
 'german_shorthaired_144.xml',
 'newfoundland_189.xml',
 'boxer_119.xml',
 'chihuahua_185.xml',
 'shiba_inu_145.xml',
 'British_Shorthair_135.xml',
 'Persian_133.xml']

In [15]:
IMG_SIZE = 224
N_BBOX = len(bbox_files)
N_TRAIN = 3000
N_VAL = N_BBOX - N_TRAIN

In [16]:
tfr_dir = os.path.join(data_dir, "tfrecord")
os.makedirs(tfr_dir, exist_ok=True)

tfr_train_dir = os.path.join(tfr_dir, 'loc_train.tfr')
tfr_val_dir = os.path.join(tfr_dir, 'loc_val.tfr')

In [17]:
writer_train = tf.io.TFRecordWriter(tfr_train_dir)
writer_val = tf.io.TFRecordWriter(tfr_val_dir)

In [18]:
def _bytes_feature(value):
  """"Returns a bytes_list from a string / byte."""
  if isinstance(value,type(tf.constant(0))):
    value = value.numpy()
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [19]:
shuffle_list = list(range(N_BBOX))
random.shuffle(shuffle_list)

train_idx_list = shuffle_list[:N_TRAIN]
val_idx_list = shuffle_list[N_TRAIN:]

In [20]:
shuffle_list

[3449,
 2596,
 2376,
 1797,
 658,
 2845,
 822,
 3445,
 801,
 2119,
 3679,
 1136,
 865,
 1310,
 1392,
 980,
 1257,
 2983,
 1906,
 1544,
 2735,
 815,
 139,
 347,
 1066,
 761,
 2934,
 1344,
 2680,
 1046,
 1318,
 1439,
 2186,
 2377,
 3253,
 231,
 2177,
 3624,
 127,
 2948,
 51,
 1902,
 2473,
 3211,
 605,
 2598,
 882,
 516,
 2701,
 877,
 3404,
 3584,
 2689,
 977,
 2088,
 1328,
 3113,
 3674,
 2764,
 1341,
 3541,
 2863,
 1043,
 49,
 1843,
 1150,
 1269,
 1221,
 832,
 2528,
 88,
 3598,
 1095,
 392,
 1342,
 2760,
 3509,
 2369,
 1877,
 2830,
 196,
 1560,
 2246,
 2980,
 155,
 3355,
 112,
 154,
 593,
 468,
 1590,
 3327,
 3329,
 1411,
 755,
 3311,
 710,
 3317,
 2013,
 1112,
 3487,
 256,
 2046,
 2501,
 900,
 2822,
 1401,
 2978,
 2843,
 787,
 2029,
 1192,
 3130,
 2508,
 1614,
 1747,
 757,
 219,
 1086,
 1771,
 2621,
 3551,
 1082,
 2765,
 3682,
 3662,
 2865,
 1851,
 1421,
 1745,
 1013,
 2859,
 39,
 206,
 2116,
 3347,
 2159,
 3532,
 1559,
 3431,
 1952,
 1276,
 2986,
 1259,
 50,
 3629,
 2606,
 2699,
 3144,

In [21]:
for idx in train_idx_list:
  bbox_file = bbox_files[idx]
  bbox_path = os.path.join(bbox_dir, bbox_file)

  tree = et.parse(bbox_path)
  width = float(tree.find('./size/width').text)
  height = float(tree.find('./size/height').text)
  xmin = float(tree.find('./object/bndbox/xmin').text)
  ymin = float(tree.find('./object/bndbox/ymin').text)
  xmax = float(tree.find('./object/bndbox/xmax').text)
  ymax = float(tree.find('./object/bndbox/ymax').text)
  xc = (xmin+xmax)/2.
  yc = (ymin+ymax)/2.
  x = xc/width
  y = yc / height
  w = (xmax-xmin) / width
  h = (ymax-ymin) / height

  file_name = os.path.splitext(bbox_file)[0]
  image_file = file_name + '.jpg'
  image_path = os.path.join(image_dir,image_file)
  image = Image.open(image_path)
  image = image.resize((IMG_SIZE,IMG_SIZE))
  bimage = image.tobytes()

  class_name = re.sub('_\d+', '', file_name)
  class_num = class2idx[class_name]

  if file_name[0].islower():
    bi_cls_num=0
  else:
    bi_cls_num = 1

  example = tf.train.Example(features=tf.train.Features(feature={
      'image': _bytes_feature(bimage),
      'cls_num': _int64_feature(class_num),
      'bi_cls_num': _int64_feature(bi_cls_num),
      'x': _float_feature(x),
      'y': _float_feature(y),
      'w': _float_feature(w),
      'h': _float_feature(h)
  }))
  writer_train.write(example.SerializeToString())

writer_train.close()

In [22]:
N_ClASS = len(class_list)
N_EPOCHS = 40
N_BATCH = 40
IMG_SIZE = 224
learning_rate = 0.0001
steps_per_epoch = N_TRAIN / N_BATCH
validation_steps = int(np.ceil(N_VAL / N_BATCH))

In [23]:
def _parse_function(tfrecord_serialized):
  features={'image': tf.io.FixedLenFeature([],tf.string),
            'cls_num': tf.io.FixedLenFeature([],tf.int64),
            'bi_cls_num': tf.io.FixedLenFeature([],tf.int64),
            'x': tf.io.FixedLenFeature([], tf.float32),
            'y': tf.io.FixedLenFeature([], tf.float32),
            'w': tf.io.FixedLenFeature([], tf.float32),
            'h': tf.io.FixedLenFeature([], tf.float32)
            }
  parsed_features = tf.io.parse_single_example(tfrecord_serialized,features)

  image = tf.io.decode_raw(parsed_features['image'], tf.uint8)
  image = tf.reshape(image, [IMG_SIZE,IMG_SIZE,3])
  image = tf.cast(image, tf.float32)/255.

  cls_label = tf.cast(parsed_features['cls_num'], tf.int64)
  bi_cls_label = tf.cast(parsed_features['bi_cls_num'], tf.int64)

  x = tf.cast(parsed_features['x'], tf.float32)
  y = tf.cast(parsed_features['y'], tf.float32)
  w = tf.cast(parsed_features['w'], tf.float32)
  h = tf.cast(parsed_features['h'], tf.float32)
  gt = tf.stack([x,y,w,h],-1)
  return image,gt

In [24]:
train_dataset = tf.data.TFRecordDataset(tfr_train_dir)
train_dataset = train_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=N_TRAIN).prefetch(
    tf.data.experimental.AUTOTUNE).batch(N_BATCH).repeat()


In [25]:
val_dataset = tf.data.TFRecordDataset(tfr_val_dir)
val_dataset = val_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(N_BATCH).repeat()

In [26]:
for image, gt in val_dataset.take(3):
  x = gt[:,0]
  y = gt[:,1]
  w = gt[:,2]
  h = gt[:,3]
  xmin = x[0].numpy() - w[0].numpy()/2.
  ymin = y[0].numpy() - h[0].numpy()/2.
  rect_x = int(xmin*IMG_SIZE)
  rect_y = int(ymin*IMG_SIZE)
  rect_w = int(w[0].numpy()*IMG_SIZE)
  rect_h = int(h[0].numpy()*IMG_SIZE)

  #actual drawing part
  rect = Rectangle((rect_x,rect_y), rect_w, rect_h, fill=False, color='red')
  plt.axes().add_patch(rect)
  plt.imshow(image[0])
  plt.show()

In [27]:
def create_model():
  model = keras.Sequential()
  model.add(keras.layers.Conv2D(32,3,activation='relu',padding='SAME',
                                input_shape=(IMG_SIZE,IMG_SIZE,3)))
  model.add(keras.layers.MaxPool2D(padding='SAME'))
  model.add(keras.layers.Conv2D(64,3,activation='relu',padding='SAME'))
  model.add(keras.layers.MaxPool2D(padding='SAME'))
  model.add(keras.layers.Conv2D(128,3,activation='relu',padding='SAME'))
  model.add(keras.layers.MaxPool2D(padding='SAME'))
  model.add(keras.layers.Conv2D(256,3,activation='relu',padding='SAME'))
  model.add(keras.layers.MaxPool2D(padding='SAME'))
  model.add(keras.layers.Conv2D(256,3,activation='relu',padding='SAME'))
  model.add(keras.layers.MaxPool2D(padding='SAME'))
  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dense(1024, activation='relu'))
  model.add(keras.layers.Dropout(0.4))
  model.add(keras.layers.Dense(4, activation='softmax'))
  return model

In [28]:
def loss_fn(y_true, y_pred):
  return keras.losses.MeanSquaredError()(y_true,y_pred)

In [29]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 224, 224, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 56, 56, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 28, 28, 128)      0

In [30]:
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=learning_rate,
                                                          decay_steps=steps_per_epoch*10,
                                                          decay_rate=0.5,
                                                          staircase=True)
model.compile(keras.optimizers.RMSprop(lr_schedule), loss=loss_fn)

In [31]:
model.fit(train_dataset,steps_per_epoch=steps_per_epoch,
          epochs=N_EPOCHS,
          validation_data=val_dataset,
          validation_steps=validation_steps)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7efc7d629090>

In [41]:
idx = 0
num_imgs = validation_steps
for val_data, val_gt in  val_dataset.take(num_imgs):
  x=val_gt[:,0]
  y=val_gt[:,1]
  w=val_gt[:,2]
  h=val_gt[:,3]
  xmin = x[idx].numpy() - w[idx].numpy()/2.
  ymin = y[idx].numpy() - h[idx].numpy()/2.
  rect_x = int(xmin*IMG_SIZE)
  rect_y = int(ymin*IMG_SIZE)
  rect_w = int(w[idx].numpy() * IMG_SIZE)
  rect_h = int(h[idx].numpy() * IMG_SIZE)

  rect = Rectangle((rect_x,rect_y), rect_w,rect_h,fill=False,color='red')
  plt.axes().add_patch(rect)

  prediction = model.predict(val_data)
  pred_x=prediction[:,0]
  pred_y=prediction[:,1]
  pred_w=prediction[:,2]
  pred_h=prediction[:,3]
  pred_xmin = pred_x[idx].numpy() - pred_w[idx].numpy()/2.
  pred_ymin = pred_y[idx].numpy() - pred_h[idx].numpy()/2.
  pred_rect_x = int(pred_xmin*IMG_SIZE)
  pred_rect_y = int(pred_ymin*IMG_SIZE)
  pred_rect_w = int(pred_w[idx].numpy() * IMG_SIZE)
  pred_rect_h = int(pred_h[idx].numpy() * IMG_SIZE)

  pred_rect = Rectangle((pred_rect_x,pred_rect_y), pred_rect_w,pred_rect_h,fill=False,color='red')
  plt.axes().add_patch(pred_rect)

  plt.imshow(val_data[idx])
  plt.show()