In [None]:
import sys
import time
import random

In [None]:
import keras
from keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape
from keras.layers import BatchNormalization
from keras.models import Model
from keras.datasets import mnist
from keras.losses import binary_crossentropy
from keras import backend as K

In [None]:
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

***

In [None]:
# Genuine users and tweets
gen_users = pd.read_csv('dataset/users.csv')
gen_tweets = pd.read_csv('dataset/tweets.csv')

In [None]:
gen_tweets

In [None]:
def create_digital_dna_from_profile(users_df):
    df = users_df
    
    # Applying necessary replacements
    # D - Description is available, E - Description not available
    # U - URL is available, V - URL is not available
    df['description'] = np.where(pd.isnull(users_df['description']) == True, "E", "D")
    df['url'] = np.where(pd.isnull(users_df['url']) == True, "V", "U")
    
    # Changed user data
    return df

def create_digital_dna_from_tweets(tweets_df):
    '''For each user id in tweets_df return a digital DNA string based on posting behaviour.'''
    
    # Add columns for counts of tweets, replies and retweets.
    tweets_df['num_retweets'] = np.where(tweets_df['retweeted_status_id'] == 0, 0, 1)
    tweets_df['num_replies'] = np.where(tweets_df['in_reply_to_status_id'] == 0, 0, 1)
    tweets_df['num_tweets'] = np.where((tweets_df['num_retweets'] == 0) & (tweets_df['num_replies'] == 0), 1, 0)

    # DNA alphabet for tweet (A), retweet (C) and reply (T).
    tweets = tweets_df['num_tweets'] == 1
    retweets = tweets_df['num_retweets'] == 1
    replies = tweets_df['num_replies'] == 1

    tweets_df.loc[:, 'DNA'] = np.where(retweets, 'C', np.where(replies, 'T', 'A'))

    # Sort tweets by timestamp.
    tweets_df = tweets_df[['user_id', 'timestamp', 'DNA']]
    tweets_df = tweets_df.sort_values(by=['timestamp'])

    # Create digital DNA string for account.
    dna = tweets_df.groupby(by=['user_id'])['DNA'].agg(lambda x: ''.join(x))
    
    return dna

In [None]:
print('Users shape:', gen_users.shape)
print('Tweets shape:', gen_tweets.shape)

In [None]:
# Filtering data which we need
filtered_user_data = gen_users.filter(["id", "statuses_count", "followers_count",
                                      "friends_count", "favourites_count", "listed_count",
                                      "url", "description", "timestamp", "updated"])

In [None]:
# Processing user data and tweets of users
processed_user_data = create_digital_dna_from_profile(filtered_user_data)
processed_user_data['tweets_dna'] = ""
processed_tweets = create_digital_dna_from_tweets(gen_tweets)

print('(Processed) Users shape:', processed_user_data.shape)
print('(Processed) Tweets shape:', processed_tweets.shape)

# Compile user data with dna tweets
for i in range(processed_user_data.shape[0]):
    user_id = processed_user_data['id'][i]
    dna = processed_tweets[user_id]
    processed_user_data['tweets_dna'][i] = dna

In [None]:
twitter_account = processed_user_data.filter(["statuses_count", "followers_count", "friends_count", 
                                              "favourites_count", "listed_count", "url", "description",
                                              "timestamp", "updated", "tweets_dna"])

In [None]:
input_train = twitter_account.values.reshape(twitter_account.shape[0], twitter_account.shape[1])
input_shape = (1, 10)

batch_size = 20
no_epochs = 50
validation_split = 0.1
verbosity = 1

latent_dim = 1

In [None]:
input_train.shape

## Encoder

In [None]:
i       = Input(shape=input_shape, name='encoder_input')
cx      = Conv2D(filters=8, kernel_size=3, strides=2, padding='same', activation='relu')(i)
cx      = BatchNormalization()(cx)
cx      = Conv2D(filters=16, kernel_size=3, strides=2, padding='same', activation='relu')(cx)
cx      = BatchNormalization()(cx)
x       = Flatten()(cx)
x       = Dense(20, activation='relu')(x)
x       = BatchNormalization()(x)
mu      = Dense(latent_dim, name='latent_mu')(x)
sigma   = Dense(latent_dim, name='latent_sigma')(x)

In [None]:
# Get Conv2D shape for Conv2DTranspose operation in decoder
conv_shape = K.int_shape(cx)

In [None]:
# Define sampling with reparameterization trick
def sample_z(args):
  mu, sigma = args
  batch     = K.shape(mu)[0]
  dim       = K.int_shape(mu)[1]
  eps       = K.random_normal(shape=(batch, dim))
  return mu + K.exp(sigma / 2) * eps

In [None]:
# Use reparameterization trick to ensure correct gradient
z       = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([mu, sigma])

In [None]:
# Instantiate encoder
encoder = Model(i, [mu, sigma, z], name='encoder')
encoder.summary()

## Decoder

In [None]:
d_i   = Input(shape=(latent_dim, ), name='decoder_input')
x     = Dense(conv_shape[1] * conv_shape[2] * conv_shape[3], activation='relu')(d_i)
x     = BatchNormalization()(x)
x     = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
cx    = Conv2DTranspose(filters=16, kernel_size=3, strides=2, padding='same', activation='relu')(x)
cx    = BatchNormalization()(cx)
cx    = Conv2DTranspose(filters=8, kernel_size=3, strides=2, padding='same',  activation='relu')(cx)
cx    = BatchNormalization()(cx)
o     = Conv2DTranspose(filters=num_channels, kernel_size=3, activation='sigmoid', padding='same', name='decoder_output')(cx)

In [None]:
# Instantiate decoder
decoder = Model(d_i, o, name='decoder')
decoder.summary()

# VAE

In [None]:
# Instantiate VAE
vae_outputs = decoder(encoder(i)[2])
vae         = Model(i, vae_outputs, name='vae')
vae.summary()