In [204]:
# ! pip install -q tensorflow numpy pandas scikit-learn mlxtend dataprep
# ! cp drive/My\ Drive/Colab\ Notebooks/*.csv ./

In [205]:
import pandas as pd

df = pd.read_csv('dankmemes_task1_train.csv')
embedding = pd.read_csv('dankmemes_task1_train_embeddings.csv', header=None)

In [206]:
import tensorflow as tf
from tensorflow.keras import Model

class MyModel(Model):
  def __init__(self):
    super(MyModel, self).__init__()
    self.dense1 = tf.keras.layers.Dense(1024, activation=tf.keras.activations.relu)
    self.drop = tf.keras.layers.Dropout(rate=0.5)
    self.dense2 = tf.keras.layers.Dense(512, activation=tf.keras.activations.relu)
    self.dense3 = tf.keras.layers.Dense(128, activation=tf.keras.activations.relu)
    self.dense4 = tf.keras.layers.Dense(32, activation=tf.keras.activations.relu)
    self.dense5 = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid) 

  def call(self, x):
    x = self.dense1(x)
    x = self.drop(x)
    x = self.dense2(x)
    x = self.dense3(x)
    x = self.dense4(x)
    return self.dense5(x)

model = MyModel()

In [207]:
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.RMSprop()

In [208]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')

In [209]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

In [210]:
@tf.function
def test_step(images, labels):
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [211]:
def ds_from_df(df, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Meme')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [212]:
import numpy as np

X, y = np.array([embedding[1][i].split() for i in range(1600)]).astype(float), df[['Meme']].values

In [213]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

ssc = StandardScaler()
mms = MinMaxScaler()
ohe = OneHotEncoder()

In [214]:
from datetime import date

temp = mms.fit_transform(np.array([(date(int(i[0].split('-')[0]), int(i[0].split('-')[1]), int(i[0].split('-')[2])) - date(2015, 1, 1)).days for i in df[['Date']].values.tolist()]).reshape(1600, 1))
print(temp.shape)
X = np.hstack((X, temp))
print(X.shape)

temp = ssc.fit_transform(df[['Engagement']].values)
print(temp.shape)
X = np.hstack((X, temp))
print(X.shape)

temp = ohe.fit_transform(df[['Visual']].values.reshape(-1, 1)).toarray().astype(float)
print(temp.shape)
X = np.hstack((X, temp))
print(X.shape)

temp = df[['Manipulation']].values
print(temp.shape)
X = np.hstack((X, temp))
print(X.shape)

(1600, 1)
(1600, 2049)
(1600, 1)
(1600, 2050)
(1600, 153)
(1600, 2203)
(1600, 1)
(1600, 2204)


In [215]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
ds_train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
ds_test = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [216]:
print(ds_train)
print(ds_test)

<TensorSliceDataset shapes: ((2204,), (1,)), types: (tf.float64, tf.int64)>
<TensorSliceDataset shapes: ((2204,), (1,)), types: (tf.float64, tf.int64)>


In [217]:
buffer_size = 10000
batch_size = 64
num_epochs = 100

In [218]:
ds_train = ds_train.shuffle(buffer_size=buffer_size,
                            reshuffle_each_iteration=False)
ds_test = ds_test.batch(batch_size)
ds_train = ds_train.batch(batch_size)

In [219]:
for epoch in range(100):
  train_loss.reset_states()
  train_accuracy.reset_states()
  test_loss.reset_states()
  test_accuracy.reset_states()

  for features, labels in ds_train:
    train_step(features, labels)

  for test_features, test_labels in ds_test:
    test_step(test_features, test_labels)

  template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print(template.format(epoch + 1,
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        test_loss.result(),
                        test_accuracy.result() * 100))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1, Loss: 1.4627102613449097, Accuracy: 55.78125, Test Loss: 0.8230292201042175, Test Accuracy: 49.6875
Epoch 2, Loss: 0.6329343914985657, Accuracy: 67.65625, Test Loss: 0.645976185798645, Test Accuracy: 67.1875
Epoch 3, Loss: 0.5864748358726501, Accuracy: 70.234375, Test Loss: 0.5516324043273926, Test Accuracy: 72.8125
Epoch 4, Loss: 0.5742389559745789, Accuracy: 72.03125, Test Loss: 0.5645105242729187, Test Accuracy: 70.625
Epoch 5, Loss: 0.5317530632019043, Accuracy: 74.765625, Test Loss: 0.5221964120864868, Test Accuracy: 72.8125
Epoch 6, Loss: 0.5303512811660767, Accuracy: 74.921875, Test Loss: 0.5291194319725037, Test Accuracy: 72.5
Epoch 7, Loss: 0.5025731325149536, Accuracy: 76