In [None]:
import numpy as np
import pandas as pd
from keras.applications import EfficientNetB0
from sklearn.metrics import accuracy_score,precision_score,f1_score
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.layers import Dense, Input, LSTM, concatenate, Embedding
from keras.models import Model
from keras.utils import img_to_array, load_img
from keras.utils import pad_sequences
from keras.utils import to_categorical

Load text data

In [None]:
train_data = pd.read_csv('task_informative_text_img_train.csv')
test_data = pd.read_csv('task_informative_text_img_test.csv')
val_data = pd.read_csv('task_informative_text_img_dev.csv')

In [None]:
X1_text=[]
Y1_text=[]
X2_text=[]
Y2_text=[]
X3_text=[]
Y3_text=[]
# train
for i in range(0,13608):
    if train_data['label_text'][i]==train_data['label_image'][i]:
        X1_text.append(train_data['tweet_text'][i])
        Y1_text.append(train_data['label_text'][i])

In [None]:
for i in range(0,2237):
    if test_data['label_text'][i]==test_data['label_image'][i]:
        X2_text.append(test_data['tweet_text'][i])
        Y2_text.append(test_data['label_text'][i])

In [None]:
for i in range(0,2237):
    if val_data['label_text'][i]==val_data['label_image'][i]:
        X3_text.append(val_data['tweet_text'][i])
        Y3_text.append(val_data['label_text'][i])
# Tokenize text data
max_words = 10000
max_len = 100
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X1_text)
sequences_train = tokenizer.texts_to_sequences(X1_text)
sequences_test = tokenizer.texts_to_sequences(X2_text)
sequences_val = tokenizer.texts_to_sequences(X3_text)
x_train = pad_sequences(sequences_train, maxlen=max_len)
x_test = pad_sequences(sequences_test, maxlen=max_len)
x_val = pad_sequences(sequences_val, maxlen=max_len)

In [None]:
X1_image=[]
Y1_image=[]
X2_image=[]
Y2_image=[]
X3_image=[]
Y3_image=[]
# train
for i in range(0,13608):
    if train_data['label_text'][i]==train_data['label_image'][i]:
        X1_image.append(train_data['image'][i])
        Y1_image.append(train_data['label_image'][i])

In [None]:
for i in range(0,2237):
    if test_data['label_text'][i]==test_data['label_image'][i]:
        X2_image.append(test_data['image'][i])
        Y2_image.append(test_data['label_image'][i])

In [None]:
for i in range(0,2237):
    if val_data['label_text'][i]==val_data['label_image'][i]:
        X3_image.append(val_data['image'][i])
        Y3_image.append(val_data['label_image'][i])
# Load image data
img_width, img_height = 100, 100
num_classes = 2
img_train = []
img_test = []
img_val = []

In [None]:
for filename in X1_image:
    img = load_img( filename, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    img_train.append(img_array)

In [None]:
for filename in X2_image:
    img = load_img( filename, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    img_test.append(img_array)

In [None]:
for filename in X3_image:
    img = load_img( filename, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    img_val.append(img_array)

In [None]:
img_train = np.array(img_train)
img_test = np.array(img_test)
img_val = np.array(img_val)

Define LSTM model

In [None]:
lstm_input = Input(shape=(max_len,))
embedding_layer = Embedding(max_words, 128)(lstm_input)
lstm_layer = LSTM(64, dropout=0.2, recurrent_dropout=0.2)(embedding_layer)
lstm_output = Dense(num_classes, activation='sigmoid')(lstm_layer)
lstm_model = Model(inputs=lstm_input, outputs=lstm_output)

Define efficientnet model

In [None]:
effnet_input = Input(shape=(img_width, img_height, 3))
effnet_model = EfficientNetB0(weights='imagenet', include_top=False, input_tensor=effnet_input, pooling='max')
effnet_output = Dense(num_classes, activation='sigmoid')(effnet_model.output)
effnet_model = Model(inputs=effnet_input, outputs=effnet_output)

Combine LSTM and effnet models with intermediate fusion

In [None]:
print(np.shape(lstm_model.output))
print(np.shape(effnet_model.output))
combined_input = concatenate([lstm_model.output, effnet_model.output])
fusion_output = Dense(num_classes, activation='sigmoid')(combined_input)
fusion_model = Model(inputs=[lstm_model.input, effnet_model.input], outputs=fusion_output)

Compile the fusion model

In [None]:
fusion_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Train the fusion model

In [None]:
tempTr=[]
tempTe=[]
tempDe=[]

In [None]:
for i in range(0, len(Y1_text)):
    if Y1_text[i] == 'informative':
        tempTr.append(0)
    else:
        tempTr.append(1)
for i in range(0, len(Y2_text)):
    if Y2_text[i] == 'informative':
        tempTe.append(0)
    else:
        tempTe.append(1)
for i in range(0, len(Y3_text)):
    if Y3_text[i] == 'informative':
        tempDe.append(0)
    else:
        tempDe.append(1)
labels_train = tempTr
labels_test = tempTe
labels_val = tempDe
y_train = to_categorical(labels_train, num_classes=num_classes)
y_test = to_categorical(labels_test, num_classes=num_classes)
y_val = to_categorical(labels_val, num_classes=num_classes)

In [None]:
history = fusion_model.fit([x_train, img_train], y_train,
                           epochs=15, batch_size=40,
                           validation_data=([x_val, img_val], y_val))

Evaluate the fusion model

In [None]:
score = fusion_model.evaluate([x_test, img_test], y_test, verbose=0)

Print accuracy, precision, and F1 score

In [None]:
y_pred = fusion_model.predict([x_test, img_test])
y_pred = np.argmax(y_pred, axis=1)
y_test = np.argmax(y_test, axis=1)
acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print("Accuracy: {:.2f}%".format(acc*100))
print("Precision: {:.2f}%".format(precision*100))
print("F1 Score: {:.2f}%".format(f1*100))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))