In [134]:
import math
from browser_libs import get_collection_items, get_collection_names, get_collection_for_user
from memoize import memoize # pip install memoize2
from collections import Counter
import pandas as pd
import numpy as np
import scipy as sp
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
collection_names = get_collection_names()
for x in collection_names:
  if 'experiment_vars' in x and 'for_goal' not in x:
    print(x)
    break

960c17bec89c59404248b47d_synced:experiment_vars


In [3]:
@memoize
def get_users_with_choose_difficulty():
  collection_names = get_collection_names()
  output = []
  for x in collection_names:
    if x.endswith('_internal:choose_difficulty'):
      user = x.replace('_internal:choose_difficulty', '')
      output.append(user)
  return output


In [4]:
@memoize
def get_choose_difficulty_items_for_user(user):
  collection_items = get_collection_for_user(user, 'internal:choose_difficulty')
  output = []
  for x in collection_items:
    if 'is_new_session' not in x:
      continue
    if x['is_new_session'] != True:
      continue
    if ('developer_mode' in x) and (x['developer_mode'] == True):
      continue
    if ('is_preview_mode' in x) and (x['is_preview_mode'] == True):
      continue
    if ('is_suggestion_mode' in x) and (x['is_suggestion_mode'] == True):
      continue
    output.append(x)
  return output

In [5]:
def get_choose_difficulty_counts_for_user(user):
  collection_items = get_choose_difficulty_items_for_user(user)
  output = {}
  for x in collection_items:
    if 'type' not in x:
      continue
    if x['type'] != 'action':
      continue
    if 'difficulty' not in x:
      continue
    difficulty = x['difficulty']
    if difficulty not in output:
      output[difficulty] = 1
    else:
      output[difficulty] += 1
  return output

In [6]:
def get_choose_difficulty_counts_for_user_list(user_list):
  output = Counter()
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    for difficulty,count in difficulty_counts.items():
      output[difficulty] += count
  return output

In [74]:
def get_choose_difficulty_counts_for_user_list_user_normalized(user_list):
  output = Counter()
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    difficulty_counts = to_percent_dict(difficulty_counts)
    for difficulty,count in difficulty_counts.items():
      output[difficulty] += count
  return to_percent_dict(output)

In [7]:
def get_choose_difficulty_counts_for_all_users():
  user_list = get_users_with_choose_difficulty()
  return get_choose_difficulty_counts_for_user_list(user_list)

In [8]:
def get_total_counts_for_user(user):
  difficulty_counts = get_choose_difficulty_counts_for_user(user)
  return sum(difficulty_counts.values())

In [9]:
def num_types_tried(user):
  difficulty_counts = get_choose_difficulty_counts_for_user(user)
  return len(difficulty_counts.keys())

In [10]:
def did_user_try_multiple(user):
  return num_types_tried(user) > 1

In [11]:
def get_try_multiple_counts_for_all_users():
  output = {
    'none_total': 0,
    'one_total': 0,
    'one_tried': 0,
    'multiple_tried': 0,
  }
  for user in get_users_with_choose_difficulty():
    total_counts = get_total_counts_for_user(user)
    if total_counts == 0:
      output['none_total'] += 1
      continue
    if total_counts == 1:
      output['one_total'] += 1
      continue
    if did_user_try_multiple(user):
      output['multiple_tried'] += 1
    else:
      output['one_tried'] += 1
  return output

In [12]:
def get_breakdown_for_one_tried():
  output = Counter()
  for user in get_users_with_choose_difficulty():
    total_counts = get_total_counts_for_user(user)
    if total_counts <= 1:
      continue
    if did_user_try_multiple(user):
      continue
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    chosen_difficulty = list(difficulty_counts.keys())[0]
    output[chosen_difficulty] += difficulty_counts[chosen_difficulty]
  return output

In [13]:
print(len(get_users_with_choose_difficulty()))

645


In [14]:
print(get_choose_difficulty_counts_for_all_users())

Counter({'nothing': 22346, 'easy': 8228, 'medium': 4119, 'hard': 1765})


In [15]:
print(get_try_multiple_counts_for_all_users())

{'none_total': 33, 'one_total': 111, 'one_tried': 148, 'multiple_tried': 353}


In [16]:
print(get_breakdown_for_one_tried())

Counter({'nothing': 2291, 'easy': 651, 'medium': 447, 'hard': 101})


In [17]:
@memoize
def get_abtest_settings(user):
  output = {}
  collection_items = get_collection_for_user(user, 'synced:experiment_vars')
  for item in collection_items:
    if 'key' not in item:
      continue
    if 'val' not in item:
      continue
    key = item['key']
    val = item['val']
    output[key] = val
  return output


In [18]:
def get_abtest_options_for_group(user_list):
  conditions = {}
  for user in user_list:
    abtest_settings = get_abtest_settings(user)
    for k,v in abtest_settings.items():
      if k == 'intervention_firstimpression_notice_seenlist':
        continue
      if k not in conditions:
        conditions[k] = []
      if v not in conditions[k]:
        conditions[k].append(v)
  output = {}
  for abtest_name,options in conditions.items():
    if len(options) > 1:
      output[abtest_name] = options
  return output

In [19]:
def get_abtest_condition_to_user_list(abtest_name):
  # note this only applies to users in the get_users_with_choose_difficulty experiment currently
  output = {}
  for user in get_users_with_choose_difficulty():
    abtest_settings = get_abtest_settings(user)
    if abtest_name not in abtest_settings:
      continue
    abtest_option = abtest_settings[abtest_name]
    if abtest_option not in output:
      output[abtest_option] = []
    output[abtest_option].append(user)
  return output

In [20]:
difficulty_interface_to_users = get_abtest_condition_to_user_list('choose_difficulty_interface')

In [40]:
print(difficulty_interface_to_users)

{'time_afford': ['e0ea34c81d4b50cddc7bd752', '6e7271d2eeace3391528efea', '53f3f933b4ec99eb16a8580c', '5ed53fe1b396c2936200bfbf', '8617b058a51286a5ab7b9e1c', '3056a2d989be02294adab708', 'bc2a2a9aa64af2b5cd1b403a', '09d27c86d835df2af5f0be24', '61376097ff6748baf63e881b', 'b7223983c51cdb868ed9126d', '95ee5208b83ce0800cbf5152', 'e18af57bfabb4e0f6da914b5', '46b27427d80b5394dc9f09dc', 'cf8b97a7bb2605921d756284', '41f24a3dd6bceb8d0a8df5c8', 'b18e869555ee012d017456c5', '82029ae7e0cf58810a19534a', 'c1603b3c13098a82375f9963', 'd90890b5c7b8c4efcda28c42', 'f4ac0658a07d13fab3de2498', 'dbce3d338b755aaadf7b4264', '599efe06863304b00287b7ac', 'c68a708fd971bde157484962', '23eab855cbe7f8f50e7986dc', '9c478b2a501d89dc2a68c8b4', 'c4d719399c459d7a65236f29', '09c312bcf7705bbb5e790d7a', '1589a7c5610e7fb5e3788736', '35119e013ddaa441b9c3f28c', '5fa00d34242c8c6fc66f69a7', '4631e0cbc3f222bee5fc4664', 'df0b4e455f45c5ae692175bc', '1122098b21d2edd8438f5303', '54eed4fc0cf5850c7c589b60', '08dcb4c0626fa03a27f70274', '99

In [22]:
def get_choose_difficulty_level_mean_for_user_list(users):
  difficulty_to_counts = get_choose_difficulty_counts_for_user_list(users)
  difficulty_to_value = {
    'nothing': 0,
    'easy': 1,
    'medium': 2,
    'hard': 3,
  }
  values = []
  for difficulty,count in difficulty_to_counts.items():
    value = difficulty_to_value[difficulty]
    values.append(value)
  return np.mean(values)

In [23]:
def get_choose_difficulty_level_mean_by_abtest(abtest_name):
  condition_to_user_list = get_abtest_condition_to_user_list(abtest_name)
  for condition,user_list in condition_to_user_list.items():
    print(condition + ':' + str(get_choose_difficulty_level_mean_for_user_list(user_list)))

In [67]:
def get_key_to_ordering_mappings():
  key_orderings = [
    [
      'this_intervention',
      'time_afford',
      'settings_update',
    ],
    [
      'nothing',
      'easy',
      'medium',
      'hard',
    ],
  ]
  key_to_ordering = {}
  for ordering in key_orderings:
    key = ' '.join(sorted(ordering))
    key_to_ordering[key] = ordering
  return key_to_ordering

def order_list(keys):
  key = ' '.join(sorted(keys))
  key_to_ordering_mappings = get_key_to_ordering_mappings()
  return get_key_to_ordering_mappings()[key]

def printdict(d):
  keys = order_list(d.keys())
  for x in keys:
    print(x + ': ' + str(d[x]))

def to_percent_dict(d):
  output = {}
  total = sum(d.values())
  for k,v in d.items():
    output[k] = v / total
  return output

def printdict_percent(d):
  d = to_percent_dict(d)
  printdict(d)

In [160]:
def plotbar(values, labels=None, title=''):
  data = [go.Bar(
    x=labels,
    y=values,
  )]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plotbarh(values, labels=None, title=''):
  data = [go.Bar(
    y=labels,
    x=values,
    orientation='h',
  )]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plothist(values, title=''):
  data = [go.Histogram(x=values)]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)


In [114]:
def plotdict(d, title=''):
  keys = order_list(d.keys())
  values = [d[k] for k in keys]
  plotbarh(values, keys, title)

In [24]:
#for condition,user_list in condition_to_user_list.items():
#  print(condition + ':' + str(get_choose_difficulty_level_mean_for_user_list(user_list)))

In [42]:
get_choose_difficulty_level_mean_by_abtest('choose_difficulty_interface')

time_afford:1.5
this_intervention:1.5
settings_update:1.5


In [43]:
get_choose_difficulty_level_mean_by_abtest('frequency_of_choose_difficulty')

1.0:1.5
0.0:1.5
0.5:1.5
0.25:1.5


In [70]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['time_afford']))

nothing: 0.4391691394658754
easy: 0.31008902077151335
medium: 0.13501483679525222
hard: 0.11572700296735905


In [71]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['this_intervention']))

nothing: 0.42681159420289855
easy: 0.31521739130434784
medium: 0.24057971014492754
hard: 0.017391304347826087


In [72]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['settings_update']))

nothing: 0.37713139418254765
easy: 0.09027081243731194
medium: 0.4002006018054162
hard: 0.13239719157472418


In [75]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['time_afford']))

nothing: 0.4177374521561519
easy: 0.2000868032423844
medium: 0.28510530964848574
hard: 0.09707043495297807


In [76]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['this_intervention']))

nothing: 0.556428494250055
easy: 0.20084278572230765
medium: 0.20849971329747136
hard: 0.034229006730165865


In [77]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['settings_update']))

nothing: 0.4583650291740032
easy: 0.19402268452680874
medium: 0.275818535876439
hard: 0.07179375042274905


In [124]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['time_afford']), 'Difficulty selections for "time you can afford this visit" interface')

In [125]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['this_intervention']), 'Difficulty selections for "intervention difficulty you want this visit" interface')

In [126]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['settings_update']), 'Difficulty selections for "update your difficulty settings" interface')

In [127]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['1.0']), 'Difficulty selections if selection interface shown with p=1.0')

In [128]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['0.5']), 'Difficulty selections if selection interface shown with p=0.5')

In [129]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['0.25']), 'Difficulty selections if selection interface shown with p=0.25')

In [163]:
def compute_entropy_for_difficulty_selections(difficulty_selection_dict):
  if len(difficulty_selection_dict.keys()) == 0:
    return None
  probs = to_percent_dict(difficulty_selection_dict)
  items_to_sum = []
  for k,prob in probs.items():
    items_to_sum.append(prob * math.log(prob)/math.log(2))
  return -sum(items_to_sum)

def compute_entropy_for_difficulty_selections_for_user(user):
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    return compute_entropy_for_difficulty_selections(difficulty_counts)

def get_entropies_for_user_list(user_list):
  entropies = []
  for user in user_list:
    entropy = compute_entropy_for_difficulty_selections_for_user(user)
    if entropy == None:
      continue
    entropies.append(entropy)
  return entropies

def get_entropies_for_all_users():
  user_list = get_users_with_choose_difficulty()
  return get_entropies_for_user_list()

#print(user_list[0])
#print(compute_entropy_for_difficulty_selections({'a': 0.25, 'b': 0.75}))


In [164]:
#print(np.mean(entropies))
#print(np.sum(entropies))
entropies = get_entropies_for_all_users()
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram)')

mean entropy per user 0.4167251069285489
median entropy per user 0.18791460213862887


In [166]:
#print(np.mean(entropies))
#print(np.sum(entropies))
entropies = get_entropies_for_user_list(difficulty_interface_to_users['time_afford'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=time_afford')

mean entropy per user 0.5957766515109777
median entropy per user 0.33055564508284524


In [168]:
entropies = get_entropies_for_user_list(difficulty_interface_to_users['this_intervention'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=this_intervention')

mean entropy per user 0.4560157491547438
median entropy per user 0.32499952858771153


In [169]:
entropies = get_entropies_for_user_list(difficulty_interface_to_users['settings_update'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=settings_update')

mean entropy per user 0.44861068198392046
median entropy per user 0.18022046943747377


In [170]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['1.0'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=1.0')

mean entropy per user 0.4941722578468712
median entropy per user 0.39258712260714623


In [171]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['0.5'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=0.5')

mean entropy per user 0.43227977883610436
median entropy per user 0.22228483068568797


In [172]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['0.25'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=0.25')

mean entropy per user 0.4130896585595495
median entropy per user 0.18717625687320816


In [189]:
#import moment
#moment.unix(1544559305512.0)
#import arrow
#arrow.get(1544559305512.0 / 1000)

<Arrow [2018-12-11T20:15:05.512000+00:00]>

In [228]:
def get_daynum_to_difficulty_choices(user):
  difficulty_choices = get_choose_difficulty_items_for_user(user)
  first_timestamp = None
  output = {}
  for item in difficulty_choices:
    if 'type' not in item:
      continue
    if item['type'] != 'action':
      continue
    if 'difficulty' not in item:
      continue
    difficulty = item['difficulty']
    timestamp = item['timestamp_local']
    if first_timestamp == None or timestamp < first_timestamp:
      first_timestamp = timestamp
  for item in difficulty_choices:
    if 'type' not in item:
      continue
    if item['type'] != 'action':
      continue
    if 'difficulty' not in item:
      continue
    difficulty = item['difficulty']
    timestamp = item['timestamp_local']
    daynum = (timestamp - first_timestamp) / (1000 * 3600 * 24)
    daynum = int(math.floor(daynum))
    if daynum not in output:
      output[daynum] = {}
    if difficulty not in output[daynum]:
      output[daynum][difficulty] = 0
    output[daynum][difficulty] += 1
  return output

def get_user_to_daynum_to_difficulty_choices():
  output = {}
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    if len(difficulty_counts.keys()) == 0:
      continue
    daynum_to_difficulty_choices = get_daynum_to_difficulty_choices(user)
    if len(daynum_to_difficulty_choices.keys()) == 0:
      continue
    output[user] = daynum_to_difficulty_choices
  return output

def get_daynum_to_difficulty_choices_over_n_days(num_days):
  user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
  output = []
  for daynum in range(num_days):
    item = {}
    for x in 'nothing easy medium hard'.split(' '):
      item[x] = 0
    output.append(item)
  for user,daynum_to_difficulty_choices in user_to_daynum_to_difficulty_choices.items():
    has_data = True
    for daynum in range(num_days):
      if not daynum in daynum_to_difficulty_choices:
        has_data = False
        break
    if not has_data:
      continue
    for daynum in range(num_days):
      for difficulty,num_chosen in to_percent_dict(daynum_to_difficulty_choices[daynum]).items():
        output[daynum][difficulty] += num_chosen
      output[daynum] = to_percent_dict(output[daynum])
  return output

def list_of_dictionaries_to_dictionary_with_list_values(dlist):
  output = {}
  keys = dlist[0].keys()
  for k in keys:
    output[k] = []
  for d in dlist:
    for k,v in d.items():
      output[k].append(v)
  return output

def plotline(values, title=''):
  trace = go.Scatter(
    x = list(range(len(values))),
    y=values,
  )
  data = [trace]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plotlines(dict_to_values, title=''):
  data = []
  for label,values in dict_to_values.items():
    trace = go.Scatter(
      x = list(range(len(values))),
      y=values,
      name=label,
    )
    data.append(trace)
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

#plotline([3,5,2])
#plotlines({'a': [3,5,2], 'b': [7,7,7]})

In [229]:
plotlines(list_of_dictionaries_to_dictionary_with_list_values(get_daynum_to_difficulty_choices_over_n_days(10)), 'Difficulty chosen over first 10 days of install')

In [230]:
def compute_entropy_over_n_days(num_days):
  daynum_to_difficulty_choices = get_daynum_to_difficulty_choices_over_n_days(num_days)
  output = []
  for daynum in range(num_days):
    difficulty_choices = daynum_to_difficulty_choices[daynum]
    entropy_for_day = compute_entropy_for_difficulty_selections(difficulty_choices)
    output.append(entropy_for_day)
  return output

#print(compute_entropy_over_n_days(10))
plotline(compute_entropy_over_n_days(10), 'Entropy of difficulty choice selections over first 10 days of install')

In [231]:
plotline(compute_entropy_over_n_days(5), 'Entropy of difficulty choice selections over first 5 days of install')

In [203]:
user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
print(len(user_to_daynum_to_difficulty_choices.keys()))

612


In [69]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['time_afford']))

Counter({'nothing': 296, 'easy': 209, 'medium': 91, 'hard': 78})


In [28]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['this_intervention']))

Counter({'nothing': 589, 'easy': 435, 'medium': 332, 'hard': 24})


In [29]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['settings_update']))

Counter({'medium': 399, 'nothing': 376, 'hard': 132, 'easy': 90})


In [30]:
frequency_of_choose_difficulty_to_users = get_abtest_condition_to_user_list('frequency_of_choose_difficulty')

In [31]:
frequency_of_choose_difficulty_to_users.keys()

dict_keys(['1.0', '0.0', '0.5', '0.25'])

In [32]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['1.0']))

Counter({'nothing': 14006, 'easy': 4294, 'medium': 1154, 'hard': 609})


In [33]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.5']))

Counter({'nothing': 4306, 'easy': 2512, 'medium': 1433, 'hard': 823})


In [34]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.25']))

Counter({'nothing': 3402, 'easy': 1279, 'medium': 987, 'hard': 310})


In [35]:
#print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.0']))

In [36]:
#print('foobar')

In [37]:
#def get_lifetime_and_whether_attritioned(user):
  

In [38]:
#choose_difficulty_set = set()
#for x in collection_names:
#  if 'difficulty' in x:
#    choose_difficulty_set.add(x)

In [39]:
#print(choose_difficulty_set)
#for x in choose_difficulty_set:
  