In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import cv2
from nltk.corpus import stopwords
import string
import json
from time import time
import pickle
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50,preprocess_input,decode_predictions,decode_predictions
from keras.preprocessing import image
from keras.models import Model,load_model
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input,Dense,Dropout,Embedding,LSTM
from keras.layers.merge import add

### Preprocessing for Image

In [4]:
model = ResNet50(weights = "imagenet",input_shape=(224,224,3))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
______________________________________________________________________________________________

In [5]:
model_new = Model(model.input,model.layers[-2].output)

In [12]:
def preprocess_img(img_pth):
    img = image.load_img(img_pth,target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img,axis=0)
    img = preprocess_input(img) #normalisation
    return img

In [13]:
def encode_image(img):
    img = preprocess_img(img)
    resnet_vec = model_new.predict(img)
    resnet_vec = resnet_vec.reshape((-1,))
    return resnet_vec

In [35]:
# encode_image("Flickr8k_Dataset/Flicker8k_Dataset/2513260012_03d33305cf.jpg").shape

In [15]:
imagePath = "Flickr8k_Dataset/Flicker8k_Dataset/"

In [37]:
def encoding_data(data):
    start = time()
    encoding_dataset = {}
    for i , img_id in enumerate(data):
        img_path = imagePath+img_id+".jpg"
        encoding_dataset[img_id] = encode_image(img_path)
        
        if i % 200 == 0:
            print("Encoding in Progress Time step %d "%i)
            
            
    end_t = time()
    print("Total Time Taken :",end_t-start)
    return encoding_dataset

In [38]:
import import_ipynb
import Data_cleaning_and_collection
train_data = readTxtFile("Flickr8k_text/Flickr_8k.trainImages.txt")
test_data = readTxtFile("Flickr8k_text/Flickr_8k.testImages.txt")
train = [row.split(".")[0] for row in train_data.split("\n")[:-1]]
test = [row.split(".")[0] for row in test_data.split("\n")[:-1]]

In [39]:
# train[0]

In [40]:
encoding_train = encoding_data(train)
encoding_test = encoding_data(test)

Encoding in Progress Time step 0 
Encoding in Progress Time step 200 
Encoding in Progress Time step 400 
Encoding in Progress Time step 600 
Encoding in Progress Time step 800 
Encoding in Progress Time step 1000 
Encoding in Progress Time step 1200 
Encoding in Progress Time step 1400 
Encoding in Progress Time step 1600 
Encoding in Progress Time step 1800 
Encoding in Progress Time step 2000 
Encoding in Progress Time step 2200 
Encoding in Progress Time step 2400 
Encoding in Progress Time step 2600 
Encoding in Progress Time step 2800 
Encoding in Progress Time step 3000 
Encoding in Progress Time step 3200 
Encoding in Progress Time step 3400 
Encoding in Progress Time step 3600 
Encoding in Progress Time step 3800 
Encoding in Progress Time step 4000 
Encoding in Progress Time step 4200 
Encoding in Progress Time step 4400 
Encoding in Progress Time step 4600 
Encoding in Progress Time step 4800 
Encoding in Progress Time step 5000 
Encoding in Progress Time step 5200 
Encoding

In [45]:
# Store everything to the disk 
with open("Encoding/encoded_train_features.pkl","wb") as f:
    pickle.dump(encoding_train,f)
with open("Encoding/encoded_test_features.pkl","wb") as f:
    pickle.dump(encoding_test,f)

### Data Pre-Processing for Captions

In [72]:
with open("total_word.txt","r") as f:
    total_words = f.read()
with open("vocab.txt","r") as f:
    vocab = f.read()

In [73]:
total_words = [wrd[2:-1] for wrd in total_words.split(",")]
vocab = [wrd[2:-1] for wrd in vocab.split(",")]

In [75]:
# print(len(vocab),len(total_words))

In [76]:
word_to_idx = {}
idx_to_word = {}

for i,word in enumerate(total_words):
    word_to_idx[word] = i+1
    idx_to_word[i+1] = word

In [77]:
# Two special words
idx_to_word[1846] = 'ss'
word_to_idx['ss'] = 1846

idx_to_word[1847] = 'es'
word_to_idx['es'] = 1847

vocab_size = len(word_to_idx) + 1
print("Vocab Size",vocab_size)

Vocab Size 1848


In [78]:
train_img_Cap_map = None
with open("train_img_cap_map.txt") as f:
    train_img_Cap_map = f.read()
    
json_accept_str = train_img_Cap_map.replace("'","\"")
train_img_Cap_map = json.loads(json_accept_str)

In [81]:
max_len = 0 
for key in train_img_Cap_map.keys():
    for cap in train_img_Cap_map[key]:
        max_len = max(max_len,len(cap.split()))
        
print(max_len)

35


In [83]:
with open("word_to_idx.txt","w")as f:
    f.write(str(word_to_idx))
with open("idx_to_word.txt","w") as f:
    f.write(str(idx_to_word))