In [1]:
! pip install -q tensorflow numpy pandas scikit-learn mlxtend
! cp drive/My\ Drive/Colab\ Notebooks/*.csv ./

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from datetime import date
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

In [3]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Meme')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [4]:
df = pd.read_csv('dankmemes_task1_train.csv')
embedding = pd.read_csv('dankmemes_task1_train_embeddings.csv', header=None)

In [None]:
print(np.unique(df[['Visual']].values))

In [9]:
X, y = np.array([embedding[1][i].split() for i in range(1600)]).astype(float), df[['Meme']].values
print(X.shape)

(1600, 2048)


In [8]:
X = df[['Engagement', 'Date', 'Manipulation', 'Visual',]]
print(X)

Index(['File', 'Engagement', 'Date', 'Manipulation', 'Visual', 'Text', 'Meme'], dtype='object')


In [None]:
feature_columns = []

In [None]:
for header in ['Embedding', 'Engagement', 'Date', 'Manipulation']:
  feature_columns.append(feature_column.numeric_column(header))

In [None]:
ssc = StandardScaler()
mms = MinMaxScaler()
ohe = OneHotEncoder()

In [None]:
days_df = np.array([(date(int(i[0].split('-')[0]), int(i[0].split('-')[1]), int(i[0].split('-')[2])) - date(2015, 1, 1)).days for i in df[['Date']].values.tolist()]).reshape(1600, 1)
days_df_mms = mms.fit_transform(days_df)

In [None]:
print(days_df.shape)
print(days_df_mms)
print(days_df_mms.shape)
feature_columns.append(days_df)

In [None]:
engagement_df = ssc.fit_transform(df[['Engagement']].values)
feature_columns.append(engagement_df)

In [None]:
manipulation_df = df[['Manipulation']].values
feature_columns.append(manipulation_df)

In [None]:
visual_df = ohe.fit_transform(df[['Visual']].values.reshape(-1, 1)).toarray()
print(visual_df)
feature_columns.append(visual_df)

In [None]:
print(len(feature_columns))

In [None]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [None]:
train, test = train_test_split(df, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

In [None]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
model = tf.keras.Sequential([
  feature_layer,
  tf.keras.layers.Dense(1024, activation=tf.keras.activations.relu),
  tf.keras.layers.Dropout(rate=0.5),
  tf.keras.layers.Dense(512, activation=tf.keras.activations.relu),
  tf.keras.layers.Dropout(rate=0.5),
  tf.keras.layers.Dense(512, activation=tf.keras.activations.relu),
  tf.keras.layers.Dropout(rate=0.5),
  tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid)])

model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.003),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.BinaryAccuracy])

hist = model.fit(train_ds,
                 validation_data=val_ds,
                 epochs=100)