In [13]:
import math
from browser_libs import get_collection_items, get_collection_names, get_collection_for_user
from memoize import memoize # pip install memoize2
from collections import Counter
import pandas as pd
import numpy as np
import scipy as sp
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [12]:
collection_names = get_collection_names()
for x in collection_names:
  if 'experiment_vars' in x and 'for_goal' not in x:
    print(x)
    break

960c17bec89c59404248b47d_synced:experiment_vars


In [14]:
@memoize
def get_users_with_choose_difficulty():
  collection_names = get_collection_names()
  output = []
  for x in collection_names:
    if x.endswith('_internal:choose_difficulty'):
      user = x.replace('_internal:choose_difficulty', '')
      output.append(user)
  return output


In [16]:
@memoize
def get_choose_difficulty_items_for_user(user):
  collection_items = get_collection_for_user(user, 'internal:choose_difficulty')
  output = []
  for x in collection_items:
    if 'is_new_session' not in x:
      continue
    if x['is_new_session'] != True:
      continue
    if ('developer_mode' in x) and (x['developer_mode'] == True):
      continue
    if ('is_preview_mode' in x) and (x['is_preview_mode'] == True):
      continue
    if ('is_suggestion_mode' in x) and (x['is_suggestion_mode'] == True):
      continue
    output.append(x)
  return output

In [17]:
def get_choose_difficulty_counts_for_user(user):
  collection_items = get_choose_difficulty_items_for_user(user)
  output = {}
  for x in collection_items:
    if 'type' not in x:
      continue
    if x['type'] != 'action':
      continue
    if 'difficulty' not in x:
      continue
    difficulty = x['difficulty']
    if difficulty not in output:
      output[difficulty] = 1
    else:
      output[difficulty] += 1
  return output

In [18]:
def get_choose_difficulty_counts_for_user_list(user_list):
  output = Counter()
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    for difficulty,count in difficulty_counts.items():
      output[difficulty] += count
  return output

In [19]:
def get_choose_difficulty_counts_for_user_list_user_normalized(user_list):
  output = Counter()
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    difficulty_counts = to_percent_dict(difficulty_counts)
    for difficulty,count in difficulty_counts.items():
      output[difficulty] += count
  return to_percent_dict(output)

In [20]:
def get_random_counts():
  user_list = get_users_with_choose_difficulty()
  random_counts = Counter()
  for user in user_list:
    collection_items = get_choose_difficulty_items_for_user(user)
    for item in collection_items:
      if 'is_random' in item:
        is_random = item['is_random']
        random_counts[is_random] += 1
  return random_counts

In [21]:


print(get_random_counts())

def get_random_counts2():
  user_to_random_counts = {}
  for user in user_list:
    collection_items = get_choose_difficulty_items_for_user(user)
    for item in collection_items:
      if 'is_random' in item:
        is_random = item['is_random']
        random_counts[is_random] += 1

Counter({True: 13885, False: 8949})


In [22]:
def get_choose_difficulty_counts_for_all_users():
  user_list = get_users_with_choose_difficulty()
  return get_choose_difficulty_counts_for_user_list(user_list)

In [23]:
def get_total_counts_for_user(user):
  difficulty_counts = get_choose_difficulty_counts_for_user(user)
  return sum(difficulty_counts.values())

In [24]:
def num_types_tried(user):
  difficulty_counts = get_choose_difficulty_counts_for_user(user)
  return len(difficulty_counts.keys())

In [25]:
def did_user_try_multiple(user):
  return num_types_tried(user) > 1

In [26]:
def get_try_multiple_counts_for_all_users():
  output = {
    'none_total': 0,
    'one_total': 0,
    'one_tried': 0,
    'multiple_tried': 0,
  }
  for user in get_users_with_choose_difficulty():
    total_counts = get_total_counts_for_user(user)
    if total_counts == 0:
      output['none_total'] += 1
      continue
    if total_counts == 1:
      output['one_total'] += 1
      continue
    if did_user_try_multiple(user):
      output['multiple_tried'] += 1
    else:
      output['one_tried'] += 1
  return output

In [27]:
def get_breakdown_for_one_tried():
  output = Counter()
  for user in get_users_with_choose_difficulty():
    total_counts = get_total_counts_for_user(user)
    if total_counts <= 1:
      continue
    if did_user_try_multiple(user):
      continue
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    chosen_difficulty = list(difficulty_counts.keys())[0]
    output[chosen_difficulty] += difficulty_counts[chosen_difficulty]
  return output

In [28]:
print(len(get_users_with_choose_difficulty()))

1976


In [29]:
print(get_choose_difficulty_counts_for_all_users())

Counter({'nothing': 38243, 'easy': 19520, 'medium': 11301, 'hard': 6717})


In [30]:
print(get_try_multiple_counts_for_all_users())

{'none_total': 416, 'one_total': 219, 'one_tried': 172, 'multiple_tried': 1169}


In [31]:
print(get_breakdown_for_one_tried())

Counter({'nothing': 1768, 'medium': 288, 'easy': 259, 'hard': 28})


In [32]:
@memoize
def get_abtest_settings(user):
  output = {}
  collection_items = get_collection_for_user(user, 'synced:experiment_vars')
  for item in collection_items:
    if 'key' not in item:
      continue
    if 'val' not in item:
      continue
    key = item['key']
    val = item['val']
    output[key] = val
  return output


In [33]:
def get_abtest_options_for_group(user_list):
  conditions = {}
  for user in user_list:
    abtest_settings = get_abtest_settings(user)
    for k,v in abtest_settings.items():
      if k == 'intervention_firstimpression_notice_seenlist':
        continue
      if k not in conditions:
        conditions[k] = []
      if v not in conditions[k]:
        conditions[k].append(v)
  output = {}
  for abtest_name,options in conditions.items():
    if len(options) > 1:
      output[abtest_name] = options
  return output

In [34]:
def get_abtest_condition_to_user_list(abtest_name):
  # note this only applies to users in the get_users_with_choose_difficulty experiment currently
  output = {}
  for user in get_users_with_choose_difficulty():
    abtest_settings = get_abtest_settings(user)
    if abtest_name not in abtest_settings:
      continue
    abtest_option = abtest_settings[abtest_name]
    if abtest_option not in output:
      output[abtest_option] = []
    output[abtest_option].append(user)
  return output

In [35]:
difficulty_interface_to_users = get_abtest_condition_to_user_list('choose_difficulty_interface')

In [36]:
print(difficulty_interface_to_users)

{'this_intervention_toast': ['e0ea34c81d4b50cddc7bd752', 'f6e9666fa6fd68a8ff0e2781', 'cd23b0b59ec703eaf9cfcce8', 'e3b19f3c864f8dd7af248d02', 'c51926d259247f8b31c0d01b', 'a94ee2df5080dead6c550d8e', 'edaf0e9904e39285a2ae74fd', 'e05c2c37fc88d54684c7f2fb', '039a5ab92063f09a59340e92', 'cf0d62c2cd12a5900eeaefcc', '7b9202fc61594c45a1e96f82', 'ef64cb81ca663176daa85dec', '9e2210bf3fb69f3b241a3cbc', '9fde12966c687b2051a06786', '6ffae4b0e0a58695ae4ee174', '962979c1f36542194cbe64e8', 'cf2b84b289d13a0fe56a7f71', '9ba55c3765b5fcfdb1753a1b', '39987e39f3e03b1dfc68bd5c', '6ddc12a3c376c61b9b5cd40f', 'e507909d613ba0280509942d', 'c1603b3c13098a82375f9963', 'ad5638c428073e3f8464c860', '429bd37364e6e80caec5edac', 'cddefc078d3a25a18798e6b1', '0cd2af2a1e4819f5f4f4189f', '243ea8620fd06631645f326a', 'cd35f84b81596b0930f8d9b5', '26553fe0e77b38a210ba874f', '034dea3b2abb7b2c4644b0d3', 'd56263f0671bfe181ae03695', '5f5801d2a0074e8d05ab016b', '7be4f9cc8a0397f79583ac41', '10faebf95ab8ab9ba2f9582f', 'f62c0929108931d73b

In [37]:
frequency_of_choose_difficulty_to_users = get_abtest_condition_to_user_list('frequency_of_choose_difficulty')

In [38]:
def get_choose_difficulty_level_mean_for_user_list(users):
  difficulty_to_counts = get_choose_difficulty_counts_for_user_list(users)
  difficulty_to_value = {
    'nothing': 0,
    'easy': 1,
    'medium': 2,
    'hard': 3,
  }
  values = []
  for difficulty,count in difficulty_to_counts.items():
    value = difficulty_to_value[difficulty]
    values.append(value)
  return np.mean(values)

In [39]:
def get_choose_difficulty_level_mean_by_abtest(abtest_name):
  condition_to_user_list = get_abtest_condition_to_user_list(abtest_name)
  for condition,user_list in condition_to_user_list.items():
    print(condition + ':' + str(get_choose_difficulty_level_mean_for_user_list(user_list)))

In [40]:
def get_key_to_ordering_mappings():
  key_orderings = [
    [
      'this_intervention',
      'time_afford',
      'settings_update',
    ],
    [
      'nothing',
      'easy',
      'medium',
      'hard',
    ],
  ]
  key_to_ordering = {}
  for ordering in key_orderings:
    key = ' '.join(sorted(ordering))
    key_to_ordering[key] = ordering
  return key_to_ordering

def order_list(keys):
  key = ' '.join(sorted(keys))
  key_to_ordering_mappings = get_key_to_ordering_mappings()
  return get_key_to_ordering_mappings()[key]

def printdict(d):
  keys = order_list(d.keys())
  for x in keys:
    print(x + ': ' + str(d[x]))

def to_percent_dict(d):
  output = {}
  total = sum(d.values())
  for k,v in d.items():
    output[k] = v / total
  return output

def printdict_percent(d):
  d = to_percent_dict(d)
  printdict(d)

In [41]:
def plotbar(values, labels=None, title=''):
  data = [go.Bar(
    x=labels,
    y=values,
  )]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plotbarh(values, labels=None, title=''):
  data = [go.Bar(
    y=labels,
    x=values,
    orientation='h',
  )]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plothist(values, title=''):
  data = [go.Histogram(x=values)]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)


In [42]:
def plotdict(d, title=''):
  keys = order_list(d.keys())
  values = [d[k] for k in keys]
  plotbarh(values, keys, title)

In [43]:
#for condition,user_list in condition_to_user_list.items():
#  print(condition + ':' + str(get_choose_difficulty_level_mean_for_user_list(user_list)))

In [44]:
get_choose_difficulty_level_mean_by_abtest('choose_difficulty_interface')

this_intervention_toast:1.5
this_intervention:1.5
time_afford:1.5
settings_update:1.5


In [45]:
get_choose_difficulty_level_mean_by_abtest('frequency_of_choose_difficulty')

0.5:1.5
1.0:1.5
0.0:1.5
0.25:1.5


In [46]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['time_afford']))

nothing: 0.38329312424607964
easy: 0.20174909529553678
medium: 0.30247285886610376
hard: 0.11248492159227985


In [47]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['this_intervention']))

nothing: 0.3818063677712801
easy: 0.3082521117608837
medium: 0.18369070825211176
hard: 0.12625081221572448


In [48]:
printdict_percent(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['settings_update']))

nothing: 0.3536449638286032
easy: 0.25069560378408456
medium: 0.24791318864774625
hard: 0.14774624373956594


In [49]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['time_afford']))

nothing: 0.3902041249206794
easy: 0.20210886408191103
medium: 0.31703253611343557
hard: 0.09065447488397406


In [50]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['this_intervention']))

nothing: 0.3549591990905958
easy: 0.31827423278293615
medium: 0.2040028136776861
hard: 0.12276375444878194


In [51]:
printdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['settings_update']))

nothing: 0.473847629972027
easy: 0.23849230438005103
medium: 0.18283301845462585
hard: 0.10482704719329623


In [52]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['time_afford']), 'Difficulty selections for "time you can afford this visit" interface')

In [53]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['this_intervention']), 'Difficulty selections for "intervention difficulty you want this visit" interface')

In [54]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(difficulty_interface_to_users['settings_update']), 'Difficulty selections for "update your difficulty settings" interface')

In [55]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['1.0']), 'Difficulty selections if selection interface shown with p=1.0')

In [56]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['0.5']), 'Difficulty selections if selection interface shown with p=0.5')

In [57]:
plotdict(get_choose_difficulty_counts_for_user_list_user_normalized(frequency_of_choose_difficulty_to_users['0.25']), 'Difficulty selections if selection interface shown with p=0.25')

In [58]:
def get_daynum_to_difficulty_choices(user):
  difficulty_choices = get_choose_difficulty_items_for_user(user)
  first_timestamp = None
  output = {}
  for item in difficulty_choices:
    if 'type' not in item:
      continue
    if item['type'] != 'action':
      continue
    if 'difficulty' not in item:
      continue
    difficulty = item['difficulty']
    timestamp = item['timestamp_local']
    if first_timestamp == None or timestamp < first_timestamp:
      first_timestamp = timestamp
  for item in difficulty_choices:
    if 'type' not in item:
      continue
    if item['type'] != 'action':
      continue
    if 'difficulty' not in item:
      continue
    difficulty = item['difficulty']
    timestamp = item['timestamp_local']
    daynum = (timestamp - first_timestamp) / (1000 * 3600 * 24)
    daynum = int(math.floor(daynum))
    if daynum not in output:
      output[daynum] = {}
    if difficulty not in output[daynum]:
      output[daynum][difficulty] = 0
    output[daynum][difficulty] += 1
  return output

def get_user_to_daynum_to_difficulty_choices():
  output = {}
  user_list = get_users_with_choose_difficulty()
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    if len(difficulty_counts.keys()) == 0:
      continue
    daynum_to_difficulty_choices = get_daynum_to_difficulty_choices(user)
    if len(daynum_to_difficulty_choices.keys()) == 0:
      continue
    output[user] = daynum_to_difficulty_choices
  return output

def get_daynum_to_difficulty_choices_over_n_days(num_days):
  user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
  output = []
  for daynum in range(num_days):
    item = {}
    for x in 'nothing easy medium hard'.split(' '):
      item[x] = 0
    output.append(item)
  for user,daynum_to_difficulty_choices in user_to_daynum_to_difficulty_choices.items():
    has_data = True
    for daynum in range(num_days):
      if not daynum in daynum_to_difficulty_choices:
        has_data = False
        break
    if not has_data:
      continue
    for daynum in range(num_days):
      for difficulty,num_chosen in to_percent_dict(daynum_to_difficulty_choices[daynum]).items():
        output[daynum][difficulty] += num_chosen
      output[daynum] = to_percent_dict(output[daynum])
  return output

def list_of_dictionaries_to_dictionary_with_list_values(dlist):
  output = {}
  keys = dlist[0].keys()
  for k in keys:
    output[k] = []
  for d in dlist:
    for k,v in d.items():
      output[k].append(v)
  return output

def plotline(values, title=''):
  trace = go.Scatter(
    x = list(range(len(values))),
    y=values,
  )
  data = [trace]
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

def plotlines(dict_to_values, title=''):
  data = []
  for label,values in dict_to_values.items():
    trace = go.Scatter(
      x = list(range(len(values))),
      y=values,
      name=label,
    )
    data.append(trace)
  layout = go.Layout(title=title)
  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

#plotline([3,5,2])
#plotlines({'a': [3,5,2], 'b': [7,7,7]})

In [59]:
def compute_entropy_for_difficulty_selections(difficulty_selection_dict):
  if len(difficulty_selection_dict.keys()) == 0:
    return None
  probs = to_percent_dict(difficulty_selection_dict)
  items_to_sum = []
  for k,prob in probs.items():
    items_to_sum.append(prob * math.log(prob)/math.log(2))
  return -sum(items_to_sum)

def compute_entropy_for_difficulty_selections_for_user(user):
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    return compute_entropy_for_difficulty_selections(difficulty_counts)

def get_entropies_for_user_list(user_list):
  entropies = []
  for user in user_list:
    entropy = compute_entropy_for_difficulty_selections_for_user(user)
    if entropy == None:
      continue
    entropies.append(entropy)
  return entropies

def get_entropies_for_all_users():
  user_list = get_users_with_choose_difficulty()
  return get_entropies_for_user_list(user_list)

def get_entropies_for_all_users_more_than_5():
  user_list = get_users_with_choose_difficulty()
  return get_entropies_for_user_list_with_more_than_5(user_list)

def get_entropies_for_all_users_with_10_first_days():
  user_list = get_users_with_choose_difficulty()
  return get_entropies_for_user_list_with_10_first_days(user_list)

def get_entropies_for_user_list_with_more_than_5(user_list):
  entropies = []
  for user in user_list:
    difficulty_counts = get_choose_difficulty_counts_for_user(user)
    total = sum(difficulty_counts.values())
    if total < 5:
      continue
    entropy = compute_entropy_for_difficulty_selections_for_user(user)
    if entropy == None:
      continue
    entropies.append(entropy)
  return entropies


#print(user_list[0])
#print(compute_entropy_for_difficulty_selections({'a': 0.25, 'b': 0.75}))

def get_entropies_for_user_list_with_10_first_days(user_list):
  entropies = []
  user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
  for user in user_list:
    #difficulty_counts = get_choose_difficulty_counts_for_user(user)
    if user not in user_to_daynum_to_difficulty_choices:
      continue
    daynum_to_difficulty_choices = user_to_daynum_to_difficulty_choices[user]
    is_valid = True
    for i in range(10):
      if i not in daynum_to_difficulty_choices:
        is_valid = False
        break
    if not is_valid:
      continue
    #total = sum(difficulty_counts.values())
    #if total < 5:
    #  continue
    entropy = compute_entropy_for_difficulty_selections_for_user(user)
    if entropy == None:
      continue
    entropies.append(entropy)
  return entropies


In [60]:
#print(np.mean(entropies))
#print(np.sum(entropies))
entropies = get_entropies_for_all_users_with_10_first_days()
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user among users who used habitlab for 10+ days, in bits (histogram)')

mean entropy per user 0.8890993744583879
median entropy per user 0.765747309567398


In [61]:
get_choose_difficulty_counts_for_user('d45b8b82dec8ec3c591fdbd7')

{}

In [62]:
user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
print(user_to_daynum_to_difficulty_choices)
#print(user_to_daynum_to_difficulty_choices['d45b8b82dec8ec3c591fdbd7'])

{'e0ea34c81d4b50cddc7bd752': {0: {'medium': 1}}, 'f4d95006c663a799d9185576': {0: {'medium': 1, 'hard': 1}}, '8d2c9eb27dee2dc85bca705b': {0: {'medium': 33, 'hard': 1, 'nothing': 6}, 1: {'nothing': 19, 'medium': 11, 'hard': 12}, 2: {'medium': 28, 'hard': 1, 'nothing': 4}, 3: {'medium': 10, 'nothing': 22}, 4: {'nothing': 7, 'medium': 3}, 5: {'nothing': 21, 'medium': 4, 'easy': 1, 'hard': 2}, 6: {'hard': 4, 'nothing': 11, 'medium': 2}, 7: {'nothing': 28, 'medium': 2, 'hard': 1}, 8: {'nothing': 33, 'medium': 1}, 9: {'nothing': 50, 'easy': 1}, 10: {'nothing': 65, 'hard': 1, 'medium': 2}, 11: {'nothing': 31, 'hard': 10}, 12: {'nothing': 25}, 13: {'nothing': 8}, 14: {'nothing': 32, 'hard': 12}, 15: {'nothing': 22, 'hard': 6}, 16: {'nothing': 20, 'hard': 3}, 17: {'hard': 5, 'nothing': 28}, 18: {'nothing': 31, 'hard': 1}, 19: {'nothing': 4}, 20: {'nothing': 19}, 21: {'nothing': 17, 'hard': 2}, 22: {'nothing': 16, 'hard': 2}, 23: {'nothing': 34}, 24: {'nothing': 35}, 25: {'nothing': 6, 'hard': 1}

In [63]:
#print(np.mean(entropies))
#print(np.sum(entropies))
entropies = get_entropies_for_user_list(difficulty_interface_to_users['time_afford'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=time_afford')

mean entropy per user 0.8849907437971919
median entropy per user 0.9959482086700628


In [64]:
entropies = get_entropies_for_user_list(difficulty_interface_to_users['this_intervention'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=this_intervention')

mean entropy per user 1.0676469878484651
median entropy per user 1.2470394553949502


In [65]:
entropies = get_entropies_for_user_list(difficulty_interface_to_users['settings_update'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with interface=settings_update')

mean entropy per user 0.6335112094483013
median entropy per user 0.5916727785823274


In [66]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['1.0'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=1.0')

mean entropy per user 0.6755898500363616
median entropy per user 0.5916727785823274


In [67]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['0.5'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=0.5')

mean entropy per user 1.0275326230025583
median entropy per user 1.07120902160176


In [68]:
entropies = get_entropies_for_user_list(frequency_of_choose_difficulty_to_users['0.25'])
print('mean entropy per user', np.mean(entropies))
print('median entropy per user', np.median(entropies))
plothist(entropies, 'Entropies per user, in bits (histogram), for users with frequency=0.25')

mean entropy per user 0.612005044915718
median entropy per user 0.5301790624412356


In [69]:
#import moment
#moment.unix(1544559305512.0)
#import arrow
#arrow.get(1544559305512.0 / 1000)

In [70]:
plotlines(list_of_dictionaries_to_dictionary_with_list_values(get_daynum_to_difficulty_choices_over_n_days(10)), 'Difficulty chosen over first 10 days of install')

In [71]:
def compute_entropy_over_n_days(num_days):
  daynum_to_difficulty_choices = get_daynum_to_difficulty_choices_over_n_days(num_days)
  output = []
  for daynum in range(num_days):
    difficulty_choices = daynum_to_difficulty_choices[daynum]
    entropy_for_day = compute_entropy_for_difficulty_selections(difficulty_choices)
    output.append(entropy_for_day)
  return output

#print(compute_entropy_over_n_days(10))
plotline(compute_entropy_over_n_days(10), 'Entropy of difficulty choice selections over first 10 days of install')

In [72]:
plotline(compute_entropy_over_n_days(5), 'Entropy of difficulty choice selections over first 5 days of install')

In [73]:
user_to_daynum_to_difficulty_choices = get_user_to_daynum_to_difficulty_choices()
print(len(user_to_daynum_to_difficulty_choices.keys()))

1560


In [74]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['time_afford']))

Counter({'nothing': 1271, 'medium': 1003, 'easy': 669, 'hard': 373})


In [75]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['this_intervention']))

Counter({'nothing': 5876, 'easy': 4744, 'medium': 2827, 'hard': 1943})


In [76]:
print(get_choose_difficulty_counts_for_user_list(difficulty_interface_to_users['settings_update']))

Counter({'nothing': 1271, 'easy': 901, 'medium': 891, 'hard': 531})


In [77]:
frequency_of_choose_difficulty_to_users = get_abtest_condition_to_user_list('frequency_of_choose_difficulty')

In [78]:
frequency_of_choose_difficulty_to_users.keys()

dict_keys(['0.5', '1.0', '0.0', '0.25'])

In [79]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['1.0']))

Counter({'nothing': 18392, 'easy': 7667, 'medium': 2436, 'hard': 1515})


In [80]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.5']))

Counter({'nothing': 13715, 'easy': 9668, 'medium': 6354, 'hard': 4380})


In [81]:
print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.25']))

Counter({'nothing': 5076, 'easy': 1842, 'medium': 1397, 'hard': 672})


In [82]:
import pytorch

ModuleNotFoundError: No module named 'pytorch'

In [None]:
#print(get_choose_difficulty_counts_for_user_list(frequency_of_choose_difficulty_to_users['0.0']))

In [None]:
#print('foobar')

In [None]:
#def get_lifetime_and_whether_attritioned(user):
  

In [None]:
#choose_difficulty_set = set()
#for x in collection_names:
#  if 'difficulty' in x:
#    choose_difficulty_set.add(x)

In [None]:
#print(choose_difficulty_set)
#for x in choose_difficulty_set:
  