In [1]:
import pandas as pd
import numpy as np
import pickle
from pickle import dump, load
import matplotlib.pyplot as plt

# TEXT ENCODING

In [2]:
import numpy as np
# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

def load_descriptions(doc):
    mapping = dict()
    # process lines
    for line in doc.split('\n'):
        # split line by white space
        tokens = line.split()
        if len(line) < 2:
            continue
        # take the first token as the image id, the rest as the description
        image_id, image_desc = tokens[0], tokens[1:]
        # extract filename from image id
        image_id = image_id.split('.')[0]
        # convert description tokens back to string
        image_desc = ' '.join(image_desc)
        # create the list if needed
        if image_id not in mapping:
            mapping[image_id] = list()
        # store description
        mapping[image_id].append(image_desc)
    return mapping

# load a pre-defined list of photo identifiers
def load_set(filename):
    doc = load_doc(filename)
    dataset = list()
    # process line by line
    for line in doc.split('\n'):
        # skip empty lines
        if len(line) < 1:
            continue
        # get the image identifier
        identifier = line.split('.')[0]
        dataset.append(identifier)
    return set(dataset)



# load clean descriptions into memory
def load_clean_descriptions(filename, dataset):
    
    # load document
    doc = load_doc(filename)
    descriptions = dict()
    
    i=0
    
    descriptions["id"] = list()
    descriptions["label"] = list()
    descriptions["image_id"] = list()
    descriptions["desc"] = list()
    
    # As the dataset is not prepared for a multimodal binary classification
    # let's mix some instances and make the label '0' means that the image 
    # and the text are similar and the label '1' that are not similar
    lines = doc.split('\n')
    total_instances = len(dataset)
    positivos = int(0.8 * total_instances)
    negativos = total_instances - positivos

    for j, line in enumerate(lines):        
        # split line by white space
        tokens = line.split()
        
        # split id from description
        image_id, image_desc = tokens[0], tokens[1:]
        
        desc = ""
        for w in image_desc:
            desc = desc + " " + w
            
        if image_id in dataset:
            
            # create list
            descriptions["id"].append(i)
            descriptions["desc"].append(desc)
            
            if i<positivos*5:
                descriptions["image_id"].append(image_id)
                descriptions["label"].append(0)

            else:
                original_state = j
                j = j-10
                tokens = lines[j].split()
                
                # split id from description
                image_id, image_desc = tokens[0], tokens[1:]
                
                while image_id not in dataset:
                    j = j-5
                    tokens = lines[j].split()
                    # split id from description
                    image_id, image_desc = tokens[0], tokens[1:]
                
                descriptions["image_id"].append(image_id)
                descriptions["label"].append(1)  
                    
                j = original_state
                
            i+=1

    return descriptions

In [3]:
desc_path = "C:/Users/rubio/Jupyter Notebook/Adecuacion Imagen Texto/Image_Text_Matching_Model/Image_Text_Adequacy_ELMO/descriptions.txt"
doc = load_doc(desc_path)
doc = load_descriptions(doc)

train_set = load_set("E://TFM/Flickr8k/Flickr8k_text/Flickr_8k.trainImages.txt")
train_set = load_clean_descriptions(desc_path,train_set)

test_set = load_set("E://TFM/Flickr8k/Flickr8k_text/Flickr_8k.testImages.txt")
test_set = load_clean_descriptions(desc_path,test_set)

val_set = load_set("E://TFM/Flickr8k/Flickr8k_text/Flickr_8k.devImages.txt")
val_set = load_clean_descriptions(desc_path,val_set)

df = pd.DataFrame(data=doc)
df_train = pd.DataFrame(data=train_set)
df_test = pd.DataFrame(data=test_set)
df_val = pd.DataFrame(data=val_set)

In [4]:
elmo_train_path = "C:/Users/rubio/Jupyter Notebook/Adecuacion Imagen Texto/Image_Text_Matching_Model/Image_Text_Adequacy_ELMO/elmo_train_03032019.pickle"
elmo_test_path = "C:/Users/rubio/Jupyter Notebook/Adecuacion Imagen Texto/Image_Text_Matching_Model/Image_Text_Adequacy_ELMO/elmo_test_03032019.pickle"

# load elmo_train_new
pickle_in = open(elmo_train_path, "rb")
elmo_train_new = pickle.load(pickle_in)

# load elmo_train_new
pickle_in = open(elmo_test_path, "rb")
elmo_test_new = pickle.load(pickle_in)

In [5]:
df_train["desc_encoded"] = list(elmo_train_new)
df_test["desc_encoded"] = list(elmo_test_new)

In [6]:
df_train.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded
0,0,0,1000268201_693b08cb0e,child in pink dress is climbing up set of sta...,"[-0.13638557, 0.08900155, -0.15587759, -0.1003..."
1,1,0,1000268201_693b08cb0e,girl going into wooden building,"[-0.13405389, -0.110273264, -0.062568694, -0.0..."
2,2,0,1000268201_693b08cb0e,little girl climbing into wooden playhouse,"[-0.14551371, -0.031122122, -0.009935513, 0.03..."
3,3,0,1000268201_693b08cb0e,little girl climbing the stairs to her playhouse,"[-0.15659836, -0.022028767, 0.07566264, 0.0556..."
4,4,0,1000268201_693b08cb0e,little girl in pink dress going into wooden c...,"[-0.16013935, -0.12621516, 0.012294158, -0.102..."


In [7]:
df_test.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded
0,0,0,1056338697_4f7d7ce270,blond woman in blue shirt appears to wait for...,"[-0.10440717, -0.10254673, -0.083271496, -0.23..."
1,1,0,1056338697_4f7d7ce270,blond woman is on the street hailing taxi,"[-0.1566839, -0.075696446, -0.07703825, -0.062..."
2,2,0,1056338697_4f7d7ce270,woman is signaling is to traffic as seen from...,"[-0.13784637, 0.219979, -0.18334195, 0.0646510..."
3,3,0,1056338697_4f7d7ce270,woman with blonde hair wearing blue tube top ...,"[-0.010734234, -0.23333493, -0.34981498, -0.13..."
4,4,0,1056338697_4f7d7ce270,woman in the blue dress is holding out her ar...,"[0.014249746, -0.05105914, -0.0928962, 0.00147..."


In [8]:
train_features = load(open("E://TFM/Pickle/encoded_train_images.pkl", "rb"))
print('Photos: train=%d' % len(train_features))

test_features = load(open("E://TFM/Pickle/encoded_test_images.pkl", "rb"))
print('Photos: test=%d' % len(test_features))

Photos: train=6000
Photos: test=1000


In [9]:
def add_images_encoded(df, features):
    img_encoded = list()
    for n in df['image_id']:
        name = n + ".jpg"
        img_encoded.append(features[name])

    df['image_encoded'] = img_encoded

In [10]:
add_images_encoded(df_train, train_features)
df_train.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded,image_encoded
0,0,0,1000268201_693b08cb0e,child in pink dress is climbing up set of sta...,"[-0.13638557, 0.08900155, -0.15587759, -0.1003...","[0.12277614, 0.33294913, 0.752717, 0.28675896,..."
1,1,0,1000268201_693b08cb0e,girl going into wooden building,"[-0.13405389, -0.110273264, -0.062568694, -0.0...","[0.12277614, 0.33294913, 0.752717, 0.28675896,..."
2,2,0,1000268201_693b08cb0e,little girl climbing into wooden playhouse,"[-0.14551371, -0.031122122, -0.009935513, 0.03...","[0.12277614, 0.33294913, 0.752717, 0.28675896,..."
3,3,0,1000268201_693b08cb0e,little girl climbing the stairs to her playhouse,"[-0.15659836, -0.022028767, 0.07566264, 0.0556...","[0.12277614, 0.33294913, 0.752717, 0.28675896,..."
4,4,0,1000268201_693b08cb0e,little girl in pink dress going into wooden c...,"[-0.16013935, -0.12621516, 0.012294158, -0.102...","[0.12277614, 0.33294913, 0.752717, 0.28675896,..."


In [11]:
add_images_encoded(df_test, test_features)
df_test.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded,image_encoded
0,0,0,1056338697_4f7d7ce270,blond woman in blue shirt appears to wait for...,"[-0.10440717, -0.10254673, -0.083271496, -0.23...","[0.45309222, 0.25752306, 0.11301514, 0.2944892..."
1,1,0,1056338697_4f7d7ce270,blond woman is on the street hailing taxi,"[-0.1566839, -0.075696446, -0.07703825, -0.062...","[0.45309222, 0.25752306, 0.11301514, 0.2944892..."
2,2,0,1056338697_4f7d7ce270,woman is signaling is to traffic as seen from...,"[-0.13784637, 0.219979, -0.18334195, 0.0646510...","[0.45309222, 0.25752306, 0.11301514, 0.2944892..."
3,3,0,1056338697_4f7d7ce270,woman with blonde hair wearing blue tube top ...,"[-0.010734234, -0.23333493, -0.34981498, -0.13...","[0.45309222, 0.25752306, 0.11301514, 0.2944892..."
4,4,0,1056338697_4f7d7ce270,woman in the blue dress is holding out her ar...,"[0.014249746, -0.05105914, -0.0928962, 0.00147...","[0.45309222, 0.25752306, 0.11301514, 0.2944892..."


In [12]:
combined_input = []
for i in df_train[['desc_encoded', 'image_encoded']].values:
    combined_input.append(np.concatenate((i[0],i[1])))
    
df_train['combined_input'] = combined_input
df_train.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded,image_encoded,combined_input
0,0,0,1000268201_693b08cb0e,child in pink dress is climbing up set of sta...,"[-0.13638557, 0.08900155, -0.15587759, -0.1003...","[0.12277614, 0.33294913, 0.752717, 0.28675896,...","[-0.13638557, 0.08900155, -0.15587759, -0.1003..."
1,1,0,1000268201_693b08cb0e,girl going into wooden building,"[-0.13405389, -0.110273264, -0.062568694, -0.0...","[0.12277614, 0.33294913, 0.752717, 0.28675896,...","[-0.13405389, -0.110273264, -0.062568694, -0.0..."
2,2,0,1000268201_693b08cb0e,little girl climbing into wooden playhouse,"[-0.14551371, -0.031122122, -0.009935513, 0.03...","[0.12277614, 0.33294913, 0.752717, 0.28675896,...","[-0.14551371, -0.031122122, -0.009935513, 0.03..."
3,3,0,1000268201_693b08cb0e,little girl climbing the stairs to her playhouse,"[-0.15659836, -0.022028767, 0.07566264, 0.0556...","[0.12277614, 0.33294913, 0.752717, 0.28675896,...","[-0.15659836, -0.022028767, 0.07566264, 0.0556..."
4,4,0,1000268201_693b08cb0e,little girl in pink dress going into wooden c...,"[-0.16013935, -0.12621516, 0.012294158, -0.102...","[0.12277614, 0.33294913, 0.752717, 0.28675896,...","[-0.16013935, -0.12621516, 0.012294158, -0.102..."


In [13]:
np.array(df_train['combined_input'].values.tolist())

array([[-0.13638557,  0.08900155, -0.15587759, ...,  0.21939674,
         0.30216393,  0.40283266],
       [-0.13405389, -0.11027326, -0.06256869, ...,  0.21939674,
         0.30216393,  0.40283266],
       [-0.14551371, -0.03112212, -0.00993551, ...,  0.21939674,
         0.30216393,  0.40283266],
       ...,
       [-0.0970743 , -0.03234397, -0.06601471, ...,  0.5747466 ,
         0.3907249 ,  0.5886121 ],
       [-0.0105999 ,  0.19849378,  0.00790932, ...,  0.5747466 ,
         0.3907249 ,  0.5886121 ],
       [ 0.01224608,  0.05317316,  0.02862476, ...,  0.5747466 ,
         0.3907249 ,  0.5886121 ]], dtype=float32)

In [14]:
combined_input = list()
for i in df_test[['desc_encoded', 'image_encoded']].values:
    combined_input.append(np.concatenate((i[0],i[1])).tolist())
    
df_test['combined_input'] = combined_input
df_test.head()

Unnamed: 0,id,label,image_id,desc,desc_encoded,image_encoded,combined_input
0,0,0,1056338697_4f7d7ce270,blond woman in blue shirt appears to wait for...,"[-0.10440717, -0.10254673, -0.083271496, -0.23...","[0.45309222, 0.25752306, 0.11301514, 0.2944892...","[-0.1044071689248085, -0.10254672914743423, -0..."
1,1,0,1056338697_4f7d7ce270,blond woman is on the street hailing taxi,"[-0.1566839, -0.075696446, -0.07703825, -0.062...","[0.45309222, 0.25752306, 0.11301514, 0.2944892...","[-0.15668390691280365, -0.0756964460015297, -0..."
2,2,0,1056338697_4f7d7ce270,woman is signaling is to traffic as seen from...,"[-0.13784637, 0.219979, -0.18334195, 0.0646510...","[0.45309222, 0.25752306, 0.11301514, 0.2944892...","[-0.13784636557102203, 0.21997900307178497, -0..."
3,3,0,1056338697_4f7d7ce270,woman with blonde hair wearing blue tube top ...,"[-0.010734234, -0.23333493, -0.34981498, -0.13...","[0.45309222, 0.25752306, 0.11301514, 0.2944892...","[-0.010734234005212784, -0.23333492875099182, ..."
4,4,0,1056338697_4f7d7ce270,woman in the blue dress is holding out her ar...,"[0.014249746, -0.05105914, -0.0928962, 0.00147...","[0.45309222, 0.25752306, 0.11301514, 0.2944892...","[0.01424974575638771, -0.051059141755104065, -..."


In [15]:
df_train.to_pickle("df_train_combined.pkl")
df_test.to_pickle("df_test_combined.pkl")