# EEE 443 - Final Project - Image Captioning:

## Group 10:

Ayhan Okuyan, Baris Akcin, Emre Donmez, Hasan Emre Erdemoglu, Ruzgar Eserol, Suleyman Taylan Topaloglu

### Image Downloader & Pickler Notebook: (Part 1 of 2)

1. Build the necessary directories to download the images.
2. Open the given dataset file to extract URL downloads.
3. Download the images save them to directory.
4. Import Transfer Learning Models and do the encoding using these models, (Intricasies will be explained in the report).
5. Export necessary output to be used in the continuing notebooks.

**Note:** When starting this program please put this notebook and the dataset within the same directory. Do not put them in seperate directories

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [1]:
import random
import shutil
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

### Directory Making Section:

1. Build necessary directories, some files needed are already put within root folder.
2. Unpack given dataset nd download images.

In [2]:
import os
# See the directories construct directories if needed:
root_dir = os.getcwd()
imgs_dir = root_dir + '\\images'
exports_dir = root_dir + '\\exports'
print(root_dir)

if not os.path.exists(imgs_dir):
    os.mkdir(imgs_dir)

if not os.path.exists(exports_dir):
    os.mkdir(exports_dir)

C:\Users\ayhok\Desktop\EEE443 Project


In [5]:
import h5py
from tqdm import tqdm
import requests
from PIL import Image
def eee443_dataset_read(path):
    f = h5py.File(path + '\\eee443_project_dataset_train.h5', 'r')
    train_cap = f['train_cap']
    train_imid = f['train_imid']
    train_url = f['train_url']
    word_code = f['word_code']
    train_ims = None
    print('Size of URL list: ', train_url.shape[0])
    return train_imid, train_cap, train_url, word_code 

def download_images(img_dir, train_url):
    os.chdir(img_dir)
    header = {'User-Agent': 'Mozilla/5.0'} # fool the website to download easily
    print('Current working directory set as: ', os.getcwd())
    
    corrupt_idx = []
    for item in tqdm(range(1, len(train_url)+1)):
        url = train_url[item-1].decode('utf-8')

        # This option is more stable
        r = requests.get(url, allow_redirects=True, stream=True)

        # If this item exists, do not redo operation just pass to next item.
        if os.path.exists(img_dir + '/' + str(item)):
            continue

        # Log why certain images are gone
        #if not r.status_code == 200:
            #print('Image: ', item, ' Code: ', r.status_code)
    
        # When HTTP 200 is achieved write file: - Corrupted file stuck here
        if r.status_code == 200:
            with open(img_dir + '/' + str(item), 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)

            # Validate image is correct:
            try:
                img = Image.open(img_dir + '/' + str(item))  # open the image file
                img.verify()  # verify that it is, in fact an image
            except (IOError, SyntaxError) as e:
                print(e) # Print error encountered
                corrupt_idx.append(img_dir + '/' + str(item) + '  --- non image')
                # Might need to remove the image

        #if item % 500 == 0:
            #print('At image ' + str(item) + '. Continuing download.')

    os.chdir('..')
    print('Current working directory set as: ', os.getcwd())

    # At the end of output  the process report which indices are removed
    with open('removedIms.txt', 'w') as file_handle:
        for items in corrupt_idx:
            file_handle.write('%s\n' % items)
    return

In [7]:
# If already downloaded, do not attempt to re-download.
if os.path.exists(imgs_dir):
    imid, cap, url, words = eee443_dataset_read(root_dir)
    download_images(imgs_dir,url)

  0%|                                                                                        | 0/82783 [00:00<?, ?it/s]

Size of URL list:  82783
Current working directory set as:  C:\Users\ayhok\Desktop\EEE443 Project\images


  1%|▍                                                                           | 507/82783 [03:29<9:16:42,  2.46it/s]

KeyboardInterrupt: 

### Transfer Learning Section:

Try on different encoding schemes on CNN-Encoder part. With a given RNN-Decoder piece, these networks may give different performance. 
If time permits, there will be 4 different CNNs to be used in transfer learning. For RNN implementations, refer to that notebook.

1. Inception v3 model definition
2. VGG16 model definition (to be implemented)
3. Inception ResNet v2 (to be implemented)

In [12]:
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input # Our Transfer Learning Model 
from tensorflow.keras.preprocessing import image 
from tensorflow.keras import layers

from pickle import dump
# FLAGS to check whether encodings are already exported, if so do not allow to redo the process.
# RUN THIS ONLY ONCE PER INITIALIZATION!
FL_INCEP = False
FL_VGG16 = False
FL_RESNT = False
FL_YOLO = False

In [34]:
def inception_load_image(path):
    img = image.load_img(path, target_size=(299, 299))
    imgar = image.img_to_array(img)
    imgar = np.expand_dims(imgar, axis=0)
    imgar = preprocess_input(imgar)
    return imgar

def transfer_inception_model():
    tf.keras.backend.clear_session() # clears previous session if this code is run multiple time
    # This is necessary, as re-running these segments may stack up models 

    inception_model = InceptionV3(include_top=True, weights='imagenet', input_shape=(299,299,3))
    inception_model.trainable = False

    # Check layers via inception_model.summary()
    #inception_model.summary()

    inception_tx_layer = inception_model.get_layer('avg_pool') # mixed10 is the final layer with notop layout. 

    new_input = inception_model.input
    x = inception_tx_layer.output

    inception_tx_model = Model(outputs=x, inputs=new_input) # directly make a model from it.
    #inception_tx_model.summary()

    inception_img_size = K.int_shape(inception_tx_model.input)[1:3]
    print('Image size: ', inception_img_size)

    inception_tx_values_size = K.int_shape(inception_tx_layer.output)
    print('Vector size of transfer values: ', inception_tx_values_size)
    return inception_tx_model

def inception_encode_image(image_dir, img_id,model):
    image = inception_load_image(image_dir+img_id)
    image = image.reshape((1, image.shape[1], image.shape[2], image.shape[3]))
    image = preprocess_input(image)
    #with tf.device('/GPU:0'): # you need batches to utilize the gpu
    encoding = model.predict(image)
    #encoding = np.reshape(encoding, encoding.shape[1])
    return encoding

### Encoding & Pickling:

For each of the model, realize everything, encode the images and save them using pickle serialization.

In [37]:
inception_tx_model = transfer_inception_model()

os.chdir(imgs_dir) # change to training directory
print(os.getcwd())
print(len(os.listdir()))

inception_encoded = []
for img in tqdm(os.listdir()):
    tmp = inception_encode_image(os.getcwd()+ '\\',img, inception_tx_model)
    inception_encoded.append(tmp)

print('Encoding finished. Pickling.')
inception_enc_idx = os.chdir(exports_dir)
if not FL_INCEP:
    dump(inception_encoded, open('enc_inception.pkl', 'wb'))
    dump(inception_enc_idx, open('inception_enc_idx.pkl', 'wb'))
    FL_INCEP = True # we dumped data.
    
os.chdir(root_dir) # return back to old directory

Encoding finished. Pickling.


### Next Steps: 
In the next step, we will import these encodings to another notebook and construct an RNN model to do actual image captioning.