In [2]:
import tensorflow as tf
import tensorflow.contrib.distributions as tfd

import edward as ed
from edward.models import Normal
from edward.models import MultivariateNormalFullCovariance
from edward.models import MultivariateNormalTriL

%matplotlib inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import csv
import IPython
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import requests
from IPython.display import display

plt.style.use('ggplot')

In [7]:
data = pd.read_csv('../../doc/tc_fda.csv')
data = data.drop_duplicates()
data['sales'] = data['cont_sales'] + data['test_sales_post_contact']
data = data.join(pd.get_dummies(data['row_type']))


In [6]:
x_pd_train = data.iloc[:, -2:]
y_pd_train = data.loc[:, ['sales']]
y_pd_train = np.log(y_pd_train + 1)

x_train = x_pd_train.as_matrix()
y_train = np.squeeze(y_pd_train.as_matrix())

<h1>fixed effect model - assume store is not from the sample design

In [9]:
sess = ed.get_session()

N, D = x_train.shape
fixed_effects = tf.placeholder(tf.float32, [N, D])

beta_fixed_effects = Normal(loc=tf.zeros(D), scale=tf.ones(D))
alpha = Normal(loc=tf.zeros(1), scale=tf.ones(1))

# simple fxied effect model
mu_y = alpha + ed.dot(fixed_effects, beta_fixed_effects)
y = Normal(loc=mu_y, scale=tf.ones(N))

# latent fixed effects
q_beta_fixed_effects = Normal(
    loc=tf.Variable(tf.random_normal([D])),
    scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))
)
q_alpha = Normal(
    loc=tf.Variable(tf.random_normal([1])),
    scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))
)

latent_vars = {
    beta_fixed_effects: q_beta_fixed_effects,
    alpha: q_alpha
}

sess.run(tf.global_variables_initializer())
inference = ed.KLqp(latent_vars, data={fixed_effects: x_train, y: y_train})
inference.run(n_samples=20, n_iter=500)

500/500 [100%] ██████████████████████████████ Elapsed: 10s | Loss: 3199.923


In [10]:
# fixed effect estimate
q_beta_approx = np.mean(q_beta_fixed_effects.sample(500).eval(), axis=0)
q_alpha_approx = np.mean(q_alpha.sample(500).eval(), axis=0)

df = [pd.DataFrame({x_pd_train.columns.values[i]:q_beta_approx[i]}, index=[0]) for i in range(2)]
display(pd.DataFrame(q_alpha_approx, columns=['alpha']).join(df))
display('lift: ' + str((1 - q_beta_approx[0]/q_beta_approx[1])))

Unnamed: 0,alpha,cont,test
0,0.60844,0.832406,0.863445


'lift: 0.03594785928726196'

In [11]:
def compute_mean_absolute_error(y_posterior, X_val_feed_dict, y_val):
    data = {y_posterior: y_val}
    data.update(X_val_feed_dict)
    mae = ed.evaluate('mean_absolute_error', data=data)
    return mae
    
def plot_residuals(y_posterior, X_val_feed_dict, title, y_val):
    y_posterior_preds = y_posterior.eval(feed_dict=X_val_feed_dict)
    plt.figure(figsize=(9, 6))
    plt.hist(y_posterior_preds - y_val, edgecolor='white', linewidth=1, bins=30, alpha=.7)
    plt.axvline(0, color='#A60628', linestyle='--')
    plt.xlabel('`y_posterior_preds - y_val`', fontsize=14)
    plt.ylabel('Count', fontsize=14)
    plt.title(title, fontsize=16)

X_feed_dict = {
  fixed_effects: x_train
}

y_posterior = ed.copy(y, latent_vars)

mae = compute_mean_absolute_error(y_posterior, X_feed_dict, y_train)
print('mae=', mae)

mae= 2.3183417


<h1>mixed effect - equal store covariance model

In [12]:
store_train = pd.Categorical(data.Store).codes + 1
n_store = len(set(store_train))

# random-effect placeholder
store_data = tf.placeholder(tf.int32, [N])

# random-effect parameter : assume equal covariance structure in store
sigma_store = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([])))) * tf.ones(n_store)
alpha_store = Normal(loc=tf.zeros(n_store), scale=sigma_store)
    
# random effect model
alpha_random_effects = tf.gather(alpha_store, store_data)
mu_y = alpha + alpha_random_effects + ed.dot(fixed_effects, beta_fixed_effects)
y = Normal(loc=mu_y, scale=tf.ones(N))

# approximate random-effect distribution
q_alpha_store = Normal(
    loc=tf.Variable(tf.random_normal([n_store])),
    scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_store])))
)

latent_vars = {
    beta_fixed_effects: q_beta_fixed_effects,
    alpha: q_alpha,
    alpha_store: q_alpha_store
}

sess.run(tf.global_variables_initializer())
print('init G=', sess.run(sigma_store)) # G
inference = ed.KLqp(latent_vars, data={fixed_effects: x_train, store_data: store_train, y: y_train})
inference.run(n_samples=20, n_iter=500)

init G= [0.8262286 0.8262286 0.8262286 0.8262286 0.8262286 0.8262286 0.8262286
 0.8262286 0.8262286 0.8262286 0.8262286 0.8262286 0.8262286 0.8262286
 0.8262286]
500/500 [100%] ██████████████████████████████ Elapsed: 13s | Loss: 3120.764


In [13]:
# fixed effect estimate
q_beta_approx = np.mean(q_beta_fixed_effects.sample(500).eval(), axis=0)
q_alpha_approx = np.mean(q_alpha.sample(500).eval(), axis=0)

df = [pd.DataFrame({x_pd_train.columns.values[i]:q_beta_approx[i]}, index=[0]) for i in range(2)]
display(pd.DataFrame(q_alpha_approx, columns=['alpha']).join(df))
display('lift: ' + str((1 - q_beta_approx[0]/q_beta_approx[1])))

Unnamed: 0,alpha,cont,test
0,0.4637,1.286143,1.34775


'lift: 0.04571110010147095'

In [18]:
# randon effect Z and G estimate
q_alpha_store_approx = np.mean(q_alpha_store.sample(500).eval(), axis=0)
G = sess.run(sigma_store)

data['store_code'] = pd.Categorical(data.Store).codes + 1
Gdata = data.loc[:, ['Store', 'store_code']].drop_duplicates().sort_values('store_code').reset_index(drop=True)
Z_df = pd.DataFrame(q_alpha_store_approx, columns=['Z'])
G_df = pd.DataFrame(G, columns=['G'])
Gdata.join(Z_df).join(G_df).iloc[:, 1:]

Unnamed: 0,store_code,Z,G
0,1,0.03068,0.866025
1,2,-0.26516,0.866025
2,3,0.180868,0.866025
3,4,-0.584251,0.866025
4,5,0.417737,0.866025
5,6,-1.246331,0.866025
6,7,-0.550649,0.866025
7,8,-1.085242,0.866025
8,9,-0.432471,0.866025
9,10,-0.621422,0.866025


In [19]:
X_feed_dict = {
    fixed_effects: x_train,
    store_data: store_train
}

y_posterior = ed.copy(y, latent_vars)

mae = compute_mean_absolute_error(y_posterior, X_feed_dict, y_train)
print('mae=', mae)

mae= 2.2102077


<h1> mixed effect - unequal store covariance model

In [20]:
# n_store x n_store diag cov matrix
sigma_store_cov = tf.diag(tf.sqrt(tf.exp(tf.Variable(tf.random_normal([n_store])))))
alpha_store = MultivariateNormalFullCovariance(loc=tf.zeros(n_store), covariance_matrix=sigma_store_cov)

# approximate random-effect distribution
alpha_random_effects = tf.gather(alpha_store, store_data)
mu_y = alpha + alpha_random_effects + ed.dot(fixed_effects, beta_fixed_effects)
y = Normal(loc=mu_y, scale=tf.ones(N))

# approximate random-effect distribution
q_alpha_store = Normal(
    loc=tf.Variable(tf.random_normal([n_store])),
    scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_store])))
)

latent_vars = {
    beta_fixed_effects: q_beta_fixed_effects,
    alpha: q_alpha,
    alpha_store: q_alpha_store
}

sess.run(tf.global_variables_initializer())
print('init G=', sess.run(tf.diag_part(sigma_store_cov))) # G
inference = ed.KLqp(latent_vars, data={fixed_effects: x_train, store_data: store_train, y: y_train})
inference.run(n_samples=20, n_iter=500)

init G= [0.8720654  1.5931737  0.82884234 0.5776716  1.0093937  0.8402702
 1.3447489  0.6262644  0.97905767 0.55013746 1.5606128  0.68117565
 0.8256269  1.2945778  0.5599462 ]
500/500 [100%] ██████████████████████████████ Elapsed: 42s | Loss: 3114.836


In [21]:
# fixed effect estimate
q_beta_approx = np.mean(q_beta_fixed_effects.sample(500).eval(), axis=0)
q_alpha_approx = np.mean(q_alpha.sample(500).eval(), axis=0)

df = [pd.DataFrame({x_pd_train.columns.values[i]:q_beta_approx[i]}, index=[0]) for i in range(2)]
display(pd.DataFrame(q_alpha_approx, columns=['alpha']).join(df))
display('lift: ' + str((1 - q_beta_approx[0]/q_beta_approx[1])))

Unnamed: 0,alpha,cont,test
0,0.698813,0.60942,0.671946


'lift: 0.09305208921432495'

In [23]:
# randon effect Z and G estimate
q_alpha_store_approx = np.mean(q_alpha_store.sample(500).eval(), axis=0)
G =  sess.run(tf.diag_part(sigma_store_cov))

data['store_code'] = pd.Categorical(data.Store).codes + 1
Gdata = data.loc[:, ['Store', 'store_code']].drop_duplicates().sort_values('store_code').reset_index(drop=True)
Z_df = pd.DataFrame(q_alpha_store_approx, columns=['Z'])
G_df = pd.DataFrame(G, columns=['G'])
Gdata.join(Z_df).join(G_df).iloc[:, 1:]

Unnamed: 0,store_code,Z,G
0,1,-0.029793,2.385794
1,2,0.081055,0.012081
2,3,0.570032,0.357328
3,4,-0.03685,0.004951
4,5,0.842657,0.744312
5,6,-0.805648,0.691579
6,7,-0.067491,0.021327
7,8,-0.67405,0.480821
8,9,-0.015424,0.001727
9,10,-0.03398,0.008834


In [24]:
X_feed_dict = {
    fixed_effects: x_train,
    store_data: store_train
}

y_posterior = ed.copy(y, latent_vars)

mae = compute_mean_absolute_error(y_posterior, X_feed_dict, y_train)
print('mae=', mae)

mae= 2.2140877
