In [1]:
import pickle
import fastText # For text classifier
import numpy as np
from sklearn import svm
import re
import string
from sklearn.metrics import classification_report,accuracy_score


In [2]:
## Load Text classifier
text_classifier = fastText.load_model('../../models/text_classfier.bin')

In [3]:
## Load Image classifier
image_classifier = pickle.load(open('../../models/image_classifier_71precision.pkl', 'rb'))
image_classifier.classes_

array(['creepy', 'gore', 'happy', 'rage'], dtype='<U6')

In [4]:
## Load Test set
image_features_test = pickle.load(open('../../processed_data/image_testing_features.pkl', 'rb'))
text_test = pickle.load(open('../../processed_data/text_testing.pkl', 'rb'))
labels_test = pickle.load(open('../../processed_data/testing_labels.pkl', 'rb'))

# image_features_test = pickle.load(open('../../processed_data/image_validation_features.pkl', 'rb'))
# text_test = pickle.load(open('../../processed_data/text_validation.pkl', 'rb'))
# labels_test = pickle.load(open('../../processed_data/validation_labels.pkl', 'rb'))

In [5]:
## Text classifier prediction
labels = text_classifier.predict(text_test[0][0], k = 4) #change to real test
print(labels)

(("__label__b'gore'", "__label__b'creepy'", "__label__b'rage'", "__label__b'happy'"), array([0.58987284, 0.40007737, 0.00524552, 0.0048442 ]))


In [6]:
## Image classifier prediction
image_classifier.predict_proba(image_features_test[0,:].reshape(1, -1))

array([[0.84261397, 0.00709259, 0.06727856, 0.08301487]])

In [7]:
def reorder_text_prob(labels):
    # ref : ['creepy', 'gore', 'happy', 'rage']
    labels = np.array(labels)
    i = 0
    prob_vector = [np.nan, np.nan, np.nan, np.nan]
    for label in labels[0,:]:
        label = re.sub('__label__b', '', label)
        labels[0,i] = re.sub(r'\W+', '', label)
        i = i + 1
    new_labels = []
    for label in np.transpose(labels):
        if label[0] == 'creepy':
            prob_vector[0] = np.float32(label[1])
        elif label[0] == 'gore':
            prob_vector[1] = np.float32(label[1])
        elif label[0] == 'happy':
            prob_vector[2] = np.float32(label[1])
        elif label[0] == 'rage':
            prob_vector[3] = np.float32(label[1])
    return prob_vector

In [8]:
def preprocessText(text):
    if text == None:
        return ""
    else:
        # Remove all unicode characters
        text = (text.encode('ascii', 'ignore')).decode('utf-8')
        # First we lower case the text
        text = text.lower()
        # remove links
        text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','',text)
        #Remove usernames
        text = re.sub('@[^\s]+','', text)
        # replace hashtags by just words
        text = re.sub(r'#([^\s]+)', r'\1', text)
        #correct all multiple white spaces to a single white space
        text = re.sub('[\s]+', ' ', text)
        # Remove all punctuation
        text = text.translate(str.maketrans('', '', string.punctuation))
        # Additional clean up : removing words less than 3 chars, and remove space at the beginning and teh end
        text = re.sub(r'\W*\b\w{1,2}\b', '', text)
        text = text.strip()
        
        return text

In [9]:
i = 0
late_fusion_prob_array = []
text_pred = []
for line in labels_test:
    vect_prob_img = image_classifier.predict_proba(image_features_test[i,:].reshape(1, -1))[0]
    text_processed = preprocessText(text_test[i][0])
    vect_prob_text = text_classifier.predict(text_processed, k = 4)
    temp = re.sub('__label__b', '', vect_prob_text[0][0])
    text_pred.append(re.sub(r'\W+', '', temp))
    vect_prob_text = reorder_text_prob(vect_prob_text)
    late_fusion_prob = np.multiply(vect_prob_img, vect_prob_text)
    late_fusion_prob_array.append(late_fusion_prob)
    i = i + 1
text_pred = np.array(text_pred).reshape(-1,1)
late_fusion_prob_array = np.array(late_fusion_prob_array)
idx = np.argmax(late_fusion_prob_array, axis=1)
late_fusion_prediction = np.take(image_classifier.classes_,idx)

image_pred = image_classifier.predict(image_features_test).reshape(-1,1)
print("Text Accuracy: "+str(accuracy_score(labels_test, text_pred)))
print("Image Accuracy: "+str(accuracy_score(labels_test, image_pred)))
print("Late Fusion Accuracy: "+str(accuracy_score(labels_test, late_fusion_prediction)))

Text Accuracy: 0.6588419405320813
Image Accuracy: 0.7120500782472613
Late Fusion Accuracy: 0.7949921752738655
