Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ab-simulation contribution #258

Merged
merged 2 commits into from
Jun 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions contrib/ab-simulation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# ab-simulation

A Python 2.7 library to simulate AB tests and analyze results.
204 changes: 204 additions & 0 deletions contrib/ab-simulation/simulation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# A Python 2.7 library to simulate AB tests and analyze results.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dillard Please add the copyright header to the python script as this is in the open source repo

###############################################################################
# Copyright 2016 Intuit
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################```

###############################################################################
# Copyright 2016 Intuit
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###############################################################################


import math, time, numpy, pandas
import scipy.stats
import matplotlib.pyplot as plt
from pandas import DataFrame, Series


### estimate and confidence interval methods for binomial distributions

def WaldEstimate(successes, trials, significance=0.05):
z = scipy.stats.norm.ppf(1 - significance / 2.)
value = 1.*successes / trials
interval = z*math.sqrt(value * (1 - value) / trials)
return value, interval

# Agresti-Coull
def ACEstimate(successes, trials, significance=0.05):
z = scipy.stats.norm.ppf(1 - significance / 2.)
value = 1.*(successes + z**2 / 2.) / (trials + z**2)
interval = z*math.sqrt(value*(1 - value) / (trials + z**2))
return value, interval


### scaling significance to account for multiple test reads
# fuction used to fit simulated results
def modifiedSignificance(numImpressions, significance, firstLook, scale):
numIndependentReads = (numpy.log(1.*numImpressions / firstLook) /
numpy.log(scale)) + 1
modSig = 1 - (1 - significance)**numIndependentReads
return modSig


### helper methods

# if the null hypothesis is rejected
def rejectNull(estimate, rate):
value = estimate[0]
interval = estimate[1]
return (rate < value - interval) or (rate > value + interval)


### core simulation
# simulates a single-proportion Z-test
# calculates type I error for a single test read (at end)
# and cumulative error for continuous test reads (after every impression)
def simulate(rates, significances, impressions, numTrials, firstLook=None,
estimateFunction=WaldEstimate, seed=None):
""" simulate a single-proportion Z-test

Args:
rate (list): success rates
significances (list): significance values (1 - confidence)
impressions (int or list): maximum impressions or list of number of
impressions
numTrials (int): number of independent simulations to aggregate over
firstLook (int): first impression at which experiment is evaluated for
continuous evaluation
(defaults to 1)
estimateFunction (function): binomal approximation to use
(defaults to Wald)
seed (int, optional): seed for random number generation
(defaults to current time)

Returns:
avgRejects (DataFrame): simulate single test read at end
avgAnyRejects (DataFrame): simulate conintuous test read after every impression
Both DataFrames contain the estimate and uncertainty on the type I error
(incorrect rejection of null hypothesis) for each rate, significance, and
impression value. Results are aggregated across numTrials independent
experiments.
"""

trials = range(numTrials)
base = [rates, significances, trials]
mi = pandas.MultiIndex.from_product(base, names=['rate', 'significance',
'trial'])

if seed is None:
numpy.random.seed(int(time.time()))
else:
numpy.random.seed(seed)

if type(impressions) == int:
points = range(1, impressions + 1)
else:
points = impressions

avgRejects = None
avgAnyRejects = None

for n in points:
if n <= 0:
raise ValueError("All values in impressions must be positive.")
draws = DataFrame(numpy.random.random([n, len(rates) *
len(significances) *
len(trials)]),
columns=mi)
draws.index = range(1, n + 1)

successes = draws.copy()
rejects = draws.copy()

for rate in rates:
successes[rate] = draws[rate].applymap(lambda x: int(x < rate))
cumSuccesses = successes.apply(numpy.core.fromnumeric.cumsum, raw=True)
cumImpressions = successes.index.values
for rate in rates:
for sig in significances:
for trial in trials:
vals = Series(zip(cumSuccesses.loc[:, (rate, sig, trial)].values,
cumImpressions))
vals.index = cumImpressions
rejects.loc[:, (rate, sig, trial)] = vals.apply(lambda x: \
int(rejectNull(estimateFunction(x[0], x[1], sig), rate)))

if firstLook is not None:
anyRejects = rejects.ix[firstLook:].max()

# apply binomial approximation to estimate type I error rate
if avgRejects is None:
avgRejects = rejects[-1:]. \
groupby(axis=1, level=['rate', 'significance']). \
sum(). \
applymap(lambda x: estimateFunction(x, numTrials))
else:
avgRejects.ix[n] = rejects[-1:]. \
groupby(axis=1, level=['rate', 'significance']). \
sum(). \
applymap(lambda x: estimateFunction(x, numTrials)). \
values[0]

# apply binomial approximation to estimate cumulative type I error rate
if firstLook is not None:
if avgAnyRejects is None:
avgAnyRejects = DataFrame(anyRejects. \
groupby(level=['rate', 'significance']). \
sum(). \
map(lambda x: estimateFunction(x, numTrials))). \
transpose()
avgAnyRejects.index = avgRejects.index.copy()
else:
avgAnyRejects.ix[n] = anyRejects. \
groupby(level=['rate', 'significance']). \
sum(). \
map(lambda x: estimateFunction(x, numTrials)). \
values

return avgRejects, avgAnyRejects


### plotting

def plotRejects(avgRejects, avgAnyRejects):
impressions = avgRejects.index
rates = avgRejects.columns.levels[0]
sigs = avgRejects.columns.levels[1]

colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1), (1, 1, 0), (0, 1, 1), (1, 0, 1),
(1, 1, 1)]
levels = [(val + 1.)/len(sigs) for val in range(len(sigs))]
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(1, 1, 1)
rateIndex = 0
for (rate, baseColor) in zip(rates, colors[:len(rates)]):
for (sig, level) in zip(sigs, levels):
color = tuple(val*level for val in baseColor)
rejectsVals = avgRejects[rate][sig].apply(lambda x: x[0]).values
anyRejectsVals = avgAnyRejects[rate][sig]. \
apply(lambda x: x[0]).values

ax.plot(impressions, rejectsVals, color=color,
label="rate: %.3f; significance: %.3f" % (rate, sig))
ax.plot(impressions, anyRejectsVals, color=color, marker='x',
ls='--')
for sig in sigs:
ax.plot(impressions, [sig]*len(impressions), color='k', ls=':')

ax.set_xlim(0, max(impressions))
ax.set_ylim(0, 1)
ax.set_title('Average Reject Rate', fontsize=24)
ax.set_xlabel('# Impressions', fontsize=20)
ax.set_ylabel('% Rejects', fontsize=20)
ax.legend(loc=1, fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()