In [1]:
import pandas as pd
import numpy as np

df_items = pd.read_csv('items-Copy1.csv') # read dataset into pandas dataframe

# replace field that's entirely space (or empty) with NaN
df_items = df_items.replace(np.nan, '', regex=True)

In [2]:
labels_list_word = [] # holds list of labels in word form
labels_list_numerical = [] # holds list of labels in numerical form
image_list = [] # holds list of descriptions

# binary classification so either anger or not
for i, row in df_items.iterrows():
    row['tag'] = row['tag'].lower() # convert tags to lowercase
    if 'fear' in row['tag']:
        labels_list_word.append('both') # anger
        image_list.append(row['image']) # add description
        labels_list_numerical.append(1) # 1
    else:
        labels_list_word.append('none') # not anger
        image_list.append(row['image']) # add description
        labels_list_numerical.append(0) # 0

df_fear = pd.DataFrame() # create empty dataframe
df_fear['label word'] = labels_list_word # append labels_list_word to df with column header 'label word'
df_fear['label numerical'] = labels_list_numerical # append labels_list_numerical to df with column header 'label numerical'
df_fear['image'] = image_list # append descriptions_list to df with column header 'description'
df_fear.loc[df_fear['label word'] == 'fear']

Unnamed: 0,label word,label numerical,image


In [3]:
df_fear

Unnamed: 0,label word,label numerical,image
0,both,1,https://mith.umd.edu/irads/files/original/f255...
1,none,0,https://mith.umd.edu/irads/files/original/f72d...
2,none,0,https://mith.umd.edu/irads/files/original/ccce...
3,none,0,https://mith.umd.edu/irads/files/original/11f2...
4,none,0,https://mith.umd.edu/irads/files/original/f178...
...,...,...,...
3007,none,0,https://mith.umd.edu/irads/files/original/5c5e...
3008,none,0,https://mith.umd.edu/irads/files/original/4e71...
3009,none,0,https://mith.umd.edu/irads/files/original/d84a...
3010,none,0,https://mith.umd.edu/irads/files/original/dc10...


In [4]:
# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 300
INIT_LR = 1e-2
BS = 32
IMAGE_DIMS = (224, 224, 3)

In [5]:
import urllib.request
from PIL import Image
import cv2
from keras.preprocessing.image import img_to_array

data = []
labels = []


for i, row in df_fear.iterrows():
    print('image # : ' + str(i) + ' ' + row['image'])
    print('numerical label: ' + str(row['label numerical']))
    

    with urllib.request.urlopen(row['image']) as url:
        with open('temp.jpg', 'wb') as f:
            f.write(url.read())

    image = Image.open('temp.jpg')

    image = cv2.resize(np.float32(image), (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = img_to_array(image)
    data.append(image)
    labels.append(row['label numerical'])

Using TensorFlow backend.


image # : 0 https://mith.umd.edu/irads/files/original/f2553aadda4c253205d6f7b019093782.png
numerical label: 1
image # : 1 https://mith.umd.edu/irads/files/original/f72dcc230e06e82f2594ffa7fde59afa.png
numerical label: 0
image # : 2 https://mith.umd.edu/irads/files/original/ccce26606c77044ce7688abff51c7709.png
numerical label: 0
image # : 3 https://mith.umd.edu/irads/files/original/11f221b5426c75559337eb9d1f722ef5.png
numerical label: 0
image # : 4 https://mith.umd.edu/irads/files/original/f1789844c00190b45d5a6fa04f3c6631.png
numerical label: 0
image # : 5 https://mith.umd.edu/irads/files/original/9ccded30f9c79cf3fe582d74bafb3963.png
numerical label: 0
image # : 6 https://mith.umd.edu/irads/files/original/5c74410e353a67c44e8b6be13351cb54.png
numerical label: 0
image # : 7 https://mith.umd.edu/irads/files/original/aed1ba13168b8d355d938d54fce3fb18.png
numerical label: 0
image # : 8 https://mith.umd.edu/irads/files/original/fce96313055ed9df3b383a61d3916fc1.png
numerical label: 0
image # : 

KeyboardInterrupt: 

In [None]:
import numpy as np

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("[INFO] data matrix: {:.2f}MB".format(
    data.nbytes / (1024 * 1000.0)))

In [None]:
data[0]
labels[0]

In [None]:
from keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import resnet50
from keras.models import load_model

class Img2Vec(object):

    def __init__(self):
        # load model
        model = load_model(r'C:\Users\tobby\Documents\russian_disinformation\fear_classification_duplicated_resnet50_300_epochs\fear_minority_duplicated_resnet50_300')
        layer_name = 'avg_pool'
        self.intermediate_layer_model = Model(inputs=model.input, 
                                              outputs=model.get_layer(layer_name).output)


    def get_vec(self, images):
        intermediate_output = self.intermediate_layer_model.predict(images)

        return intermediate_output

In [None]:
img2vec = Img2Vec()

In [None]:
x = img2vec.get_vec(data)

In [None]:
x.shape 

In [None]:
x[0]

In [None]:
list_to_save = []

dataset_order = 0

for i, arr in enumerate(x):
    list_to_save.append(np.array([[dataset_order], [labels[i]], [arr]], dtype=object))
    
    dataset_order += 1

In [None]:
list_to_save

In [None]:
np.savez("fear_resnet50_image_embeddings.npz", *list_to_save)

In [None]:
data = np.load("fear_resnet50_image_embeddings.npz", allow_pickle=True)

data.files
data['arr_65']

In [None]:
np.array(data.files).shape