Skip to content

Commit

Permalink
Merge pull request #258 from intuit/ab-simulation
Browse files Browse the repository at this point in the history
ab-simulation contribution
  • Loading branch information
dillard committed Jun 19, 2017
2 parents 48d6f97 + 8573744 commit 5c6ca56
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 0 deletions.
3 changes: 3 additions & 0 deletions contrib/ab-simulation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# ab-simulation

A Python 2.7 library to simulate AB tests and analyze results.
204 changes: 204 additions & 0 deletions contrib/ab-simulation/simulation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# A Python 2.7 library to simulate AB tests and analyze results.

###############################################################################
# Copyright 2016 Intuit
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################


import math, time, numpy, pandas
import scipy.stats
import matplotlib.pyplot as plt
from pandas import DataFrame, Series


### estimate and confidence interval methods for binomial distributions

def WaldEstimate(successes, trials, significance=0.05):
z = scipy.stats.norm.ppf(1 - significance / 2.)
value = 1.*successes / trials
interval = z*math.sqrt(value * (1 - value) / trials)
return value, interval

# Agresti-Coull
def ACEstimate(successes, trials, significance=0.05):
z = scipy.stats.norm.ppf(1 - significance / 2.)
value = 1.*(successes + z**2 / 2.) / (trials + z**2)
interval = z*math.sqrt(value*(1 - value) / (trials + z**2))
return value, interval


### scaling significance to account for multiple test reads
# fuction used to fit simulated results
def modifiedSignificance(numImpressions, significance, firstLook, scale):
numIndependentReads = (numpy.log(1.*numImpressions / firstLook) /
numpy.log(scale)) + 1
modSig = 1 - (1 - significance)**numIndependentReads
return modSig


### helper methods

# if the null hypothesis is rejected
def rejectNull(estimate, rate):
value = estimate[0]
interval = estimate[1]
return (rate < value - interval) or (rate > value + interval)


### core simulation
# simulates a single-proportion Z-test
# calculates type I error for a single test read (at end)
# and cumulative error for continuous test reads (after every impression)
def simulate(rates, significances, impressions, numTrials, firstLook=None,
estimateFunction=WaldEstimate, seed=None):
""" simulate a single-proportion Z-test
Args:
rate (list): success rates
significances (list): significance values (1 - confidence)
impressions (int or list): maximum impressions or list of number of
impressions
numTrials (int): number of independent simulations to aggregate over
firstLook (int): first impression at which experiment is evaluated for
continuous evaluation
(defaults to 1)
estimateFunction (function): binomal approximation to use
(defaults to Wald)
seed (int, optional): seed for random number generation
(defaults to current time)
Returns:
avgRejects (DataFrame): simulate single test read at end
avgAnyRejects (DataFrame): simulate conintuous test read after every impression
Both DataFrames contain the estimate and uncertainty on the type I error
(incorrect rejection of null hypothesis) for each rate, significance, and
impression value. Results are aggregated across numTrials independent
experiments.
"""

trials = range(numTrials)
base = [rates, significances, trials]
mi = pandas.MultiIndex.from_product(base, names=['rate', 'significance',
'trial'])

if seed is None:
numpy.random.seed(int(time.time()))
else:
numpy.random.seed(seed)

if type(impressions) == int:
points = range(1, impressions + 1)
else:
points = impressions

avgRejects = None
avgAnyRejects = None

for n in points:
if n <= 0:
raise ValueError("All values in impressions must be positive.")
draws = DataFrame(numpy.random.random([n, len(rates) *
len(significances) *
len(trials)]),
columns=mi)
draws.index = range(1, n + 1)

successes = draws.copy()
rejects = draws.copy()

for rate in rates:
successes[rate] = draws[rate].applymap(lambda x: int(x < rate))
cumSuccesses = successes.apply(numpy.core.fromnumeric.cumsum, raw=True)
cumImpressions = successes.index.values
for rate in rates:
for sig in significances:
for trial in trials:
vals = Series(zip(cumSuccesses.loc[:, (rate, sig, trial)].values,
cumImpressions))
vals.index = cumImpressions
rejects.loc[:, (rate, sig, trial)] = vals.apply(lambda x: \
int(rejectNull(estimateFunction(x[0], x[1], sig), rate)))

if firstLook is not None:
anyRejects = rejects.ix[firstLook:].max()

# apply binomial approximation to estimate type I error rate
if avgRejects is None:
avgRejects = rejects[-1:]. \
groupby(axis=1, level=['rate', 'significance']). \
sum(). \
applymap(lambda x: estimateFunction(x, numTrials))
else:
avgRejects.ix[n] = rejects[-1:]. \
groupby(axis=1, level=['rate', 'significance']). \
sum(). \
applymap(lambda x: estimateFunction(x, numTrials)). \
values[0]

# apply binomial approximation to estimate cumulative type I error rate
if firstLook is not None:
if avgAnyRejects is None:
avgAnyRejects = DataFrame(anyRejects. \
groupby(level=['rate', 'significance']). \
sum(). \
map(lambda x: estimateFunction(x, numTrials))). \
transpose()
avgAnyRejects.index = avgRejects.index.copy()
else:
avgAnyRejects.ix[n] = anyRejects. \
groupby(level=['rate', 'significance']). \
sum(). \
map(lambda x: estimateFunction(x, numTrials)). \
values

return avgRejects, avgAnyRejects


### plotting

def plotRejects(avgRejects, avgAnyRejects):
impressions = avgRejects.index
rates = avgRejects.columns.levels[0]
sigs = avgRejects.columns.levels[1]

colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1), (1, 1, 0), (0, 1, 1), (1, 0, 1),
(1, 1, 1)]
levels = [(val + 1.)/len(sigs) for val in range(len(sigs))]
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(1, 1, 1)
rateIndex = 0
for (rate, baseColor) in zip(rates, colors[:len(rates)]):
for (sig, level) in zip(sigs, levels):
color = tuple(val*level for val in baseColor)
rejectsVals = avgRejects[rate][sig].apply(lambda x: x[0]).values
anyRejectsVals = avgAnyRejects[rate][sig]. \
apply(lambda x: x[0]).values

ax.plot(impressions, rejectsVals, color=color,
label="rate: %.3f; significance: %.3f" % (rate, sig))
ax.plot(impressions, anyRejectsVals, color=color, marker='x',
ls='--')
for sig in sigs:
ax.plot(impressions, [sig]*len(impressions), color='k', ls=':')

ax.set_xlim(0, max(impressions))
ax.set_ylim(0, 1)
ax.set_title('Average Reject Rate', fontsize=24)
ax.set_xlabel('# Impressions', fontsize=20)
ax.set_ylabel('% Rejects', fontsize=20)
ax.legend(loc=1, fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

0 comments on commit 5c6ca56

Please sign in to comment.