In [12]:
from PIL import Image
import requests as rq
from io import BytesIO
import pandas as pd
import numpy as np
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from keras.layers import (LSTM, Embedding, Input, Dense, Dropout)
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import img_to_array, load_img
from keras.preprocessing.text import Tokenizer
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers.merge import add
from random import randint, sample
import csv
from keras.callbacks import LambdaCallback

In [3]:
# Model to pre-process images
cnn_model = VGG16()

# re-structure the model
cnn_model.layers.pop()
cnn_model = Model(inputs=cnn_model.inputs, outputs=cnn_model.layers[-1].output)

# Read in twitter data [cols: images, captions]
data = pd.read_csv("masterdata.csv")
l = len(data)

# List to hold the images' feature vectors
features = []

# List to hold the captions
captions = []

# List to hold the indices explored
indices = []

# Dimension of images
image_dim = 224

# Number of image-caption pairs to extract
num_images = 10000

# Maximum number of characters that a caption can be
longest_caption = 60

while len(features) < num_images:
    try:
        i = randint(0, l)
        if i in indices:
            continue
        elif len(data.caption[i]) > longest_caption:
            continue
        try:
            # Get image from URL and run it through VGG16 to get feature vector
            url = data.photo[i]
            response = rq.get(url)
            img = Image.open(BytesIO(response.content)).resize((image_dim,image_dim))
            x = image.img_to_array(img)
            x = x.reshape((1, x.shape[0], x.shape[1], x.shape[2]))
            x = preprocess_input(x)
            f = cnn_model.predict(x)
            features.append(f)
            
            # Append caption
            captions.append(data.caption[i])
            
            # Append index
            indices.append(i)
        except:
            continue
    except:
        print(data.caption[i])
        continue
        
    # Print statement to check in on progress
    if (len(features) % 100 == 0):
        print(str(len(features)) + "\tpreprocessed")
    
    

100	preprocessed
200	preprocessed
300	preprocessed
400	preprocessed
500	preprocessed
600	preprocessed
700	preprocessed
800	preprocessed
900	preprocessed
1000	preprocessed
1100	preprocessed
1200	preprocessed
1300	preprocessed
1400	preprocessed
1500	preprocessed
1600	preprocessed
1700	preprocessed
1800	preprocessed
1900	preprocessed
2000	preprocessed
2100	preprocessed
2200	preprocessed
2300	preprocessed
2400	preprocessed
2500	preprocessed
2600	preprocessed
2700	preprocessed
2800	preprocessed
2900	preprocessed
3000	preprocessed
3100	preprocessed
3200	preprocessed
3300	preprocessed
3400	preprocessed
3500	preprocessed
3600	preprocessed
3700	preprocessed
3800	preprocessed
3900	preprocessed
4000	preprocessed
4100	preprocessed
4200	preprocessed
4300	preprocessed
4400	preprocessed
4500	preprocessed
4600	preprocessed
4700	preprocessed
4800	preprocessed
4900	preprocessed
5000	preprocessed
5100	preprocessed
5200	preprocessed
5300	preprocessed
5400	preprocessed
5500	preprocessed
5600	preprocessed
5

In [4]:
# Replace all newline characters in captions
captions = [c.replace('\n', ' ') for c in captions]

In [5]:
# Tokenize captions: leave punctuation and upper-case letters, tokenize on a char level
tokenizer = Tokenizer(lower=False, char_level=True,filters='\t\n')
tokenizer.fit_on_texts(captions)
encoded_captions = tokenizer.texts_to_sequences(captions)
start = len(tokenizer.word_index) + 1
stop = start + 1
vocab_size = stop + 1

# Insert start and stop sequences to the encoded caption size
encoded_captions = [([start] + c) for c in encoded_captions]
encoded_captions = [(c + [stop]) for c in encoded_captions]


In [None]:
# Write extracted data to file

import csv

with open("encoded.csv","w") as f:
    wr = csv.writer(f)
    wr.writerows(encoded_captions)
    
with open("captions.csv","w") as f:
    wr = csv.writer(f)
    wr.writerows(captions)

with open("features.csv","w") as f:
    wr = csv.writer(f)
    wr.writerows(features)

In [6]:
# Create input-output vectors
# Input: image feature vector, first n characters in caption
# Output: n+1 character

max_cap = max(len(c) for c in encoded_captions)
X1 = []
X2 = []
y = []

for i in range(len(encoded_captions)):
    c = encoded_captions[i]
    for j in range(len(c)):
        in_seq, out_seq = c[:j], c[j]
        in_seq = pad_sequences([in_seq], max_cap)[0]
        out_seq = to_categorical(out_seq, num_classes = vocab_size)
        X1.append(features[i])
        X2.append(in_seq)
        y.append(out_seq)
X1 = np.reshape(X1,(np.shape(X1)[0], np.shape(X1)[2]))

In [7]:
# feature extractor model
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)

# sequence model
inputs2 = Input(shape=(max_cap,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)

# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

# tie it together [image, seq] [word]
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', 
              metrics = ['accuracy'])

# summarize model
print(model.summary())

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 62)           0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 62, 256)      269312      input_3[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 4096)         0           input_2[0][0]                   

In [9]:
# Function to convert a token to a letter
def to_letter(yhat):
    for k, v in tokenizer.word_index.items():
        if v == yhat:
            return k

In [24]:
import sys

# Function to test model at the end of each epoch -- might need a little work
def on_epoch_end(epoch, _):
    """Function invoked at end of each epoch. Prints generated text."""
    print()
    print('Generating caption after epoch %d' % epoch)

    i = randint(0, num_images)
    generated = ''
    print('-------- Real caption: "' + captions[i] + '"')
    x_pred = np.zeros((1, max_cap))
    x_pred[0] = start
    f = np.array(features[i])
    
    for i in range(max_cap):
        preds = np.argmax(model.predict([f, x_pred], verbose=0))
        if preds == start or preds == stop:
            break
        next_char = to_letter(preds)
        generated += next_char
    print("--- Generated caption: \"" + generated + "\"")
    print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [25]:
# Train on 80% of the data, test on 20%
amt_data = int(len(X1) * 4 / 5)

# Create training and testing vectors
X1train = np.array(X1[:amt_data])
X1test = np.array(X1[amt_data:])

X2train = np.array(X2[:amt_data])
X2test = np.array(X2[amt_data:])

ytrain = np.array(y[:amt_data])
ytest = np.array(y[amt_data:])

# Fit the model
model.fit([X1train, X2train], ytrain, epochs=15, verbose=1, validation_data=([X1test, X2test], ytest), callbacks=[print_callback])


Train on 243540 samples, validate on 60886 samples
Epoch 1/15

Generating caption after epoch 0
----- Real caption: "Did I even wear heels?"
                                                              

Epoch 2/15

Generating caption after epoch 1
----- Real caption: "🗝 to my garden"


Epoch 3/15

Generating caption after epoch 2
----- Real caption: "Still waiting for my AARP card."


Epoch 4/15

Generating caption after epoch 3
----- Real caption: "dreamy🌸🌿"
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

Epoch 5/15

Generating caption after epoch 4
----- Real caption: "Seeing double!!"
                                                              

Epoch 6/15

Generating caption after epoch 5
----- Real caption: "Late, but, it was quality night"
                                                              

Epoch 7/15

Generating caption after epoch 6
----- Real caption: "The rough life ft. Heidinor"
ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss

E

<keras.callbacks.callbacks.History at 0x63e57f940>

In [28]:
# Test fitted model on 100 random images
for x in range(100):
    i = randint(0, num_images)
    print(i)
    print("Real:")
    print("\t" + captions[i])
    pred_encoded = [start]
    f = np.array(features[i])
    yhat = 0
    pred_capt = ""
    
    # Loop to continue feeding generated string in until we hit the stop sequence
    while yhat != stop and len(pred_capt) < 100:
        pred_pad = np.array(pad_sequences([pred_encoded], max_cap))
        yhat = np.argmax(model.predict([f, pred_pad]))
        if yhat == start or yhat == stop:
            break
        try:
            pred_encoded.append(yhat)
            pred_capt += to_letter(yhat)
        except:
            print("OH NO")
    print("Predicted:")
    print("\t" + pred_capt)
    print()

3155
Real:
	sup dog
Predicted:
	The world with the best weekend #thetan

6545
Real:
	Conociendo a personas increibles,caminando 18 km hoy.
Predicted:
	The world with the best weekend #thetan

6219
Real:
	sofabulus
Predicted:
	The world with the best weekend #thetan

8987
Real:
	Love my girls 👩‍👧‍👧 @soph512
Predicted:
	The world with the best weekend #thetan

5783
Real:
	My new friend, Richard Parker.
Predicted:
	The world with the best weekend #thetan

6376
Real:
	Someone really loves Twitter. @joeybrunk
Predicted:
	The world with the best weekend #thetan

1293
Real:
	Wednesday's are for me and my friend Paige
Predicted:
	The world with the best weekend #thetan

1840
Real:
	🏠 yayy
Predicted:
	The world with the best weekend #thetan

4544
Real:
	Halloween was a knock out 💥
Predicted:
	The world with the best weekend #thetan

4056
Real:
	My people ❤️ (don’t mind my awkward hand)
Predicted:
	The world with the best weekend #thetan

1490
Real:
	Momma-son brunch!
Predicted:
	The world with 

Predicted:
	The world with the best weekend #thetan

106
Real:
	Yesterday mornings sunrise from the Gorge 🌅
Predicted:
	The world with the best weekend #thetan

2105
Real:
	P(l)(r)aying among fallen giants.. In socks😵
Predicted:
	The world with the best weekend #thetan

2
Real:
	•just beachy• (ft the remnants of my henna)
Predicted:
	The world with the best weekend #thetan

9253
Real:
	Salt & pepper bitch
Predicted:
	The world with the best weekend #thetan

6540
Real:
	#loextrañomucho #loamo
Predicted:
	The world with the best weekend #thetan

8132
Real:
	There are two kinds of people
Predicted:
	The world with the best weekend #thetan

5657
Real:
	sparkle summit 2k19: the year of the sparkle stamps
Predicted:
	The world with the best weekend #thetan

1710
Real:
	Ariana Grande with my favorite rats 😻
Predicted:
	The world with the best weekend #thetan

2392
Real:
	Cape May #wuht
Predicted:
	The world with the best weekend #thetan

3669
Real:
	Roy Molloy in action. #respect
Predicted:
	