In [None]:
import sys
!{sys.executable} -m pip install pymongo
!{sys.executable} -m pip install pyyaml
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install scipy
!{sys.executable} -m pip install pandas
import os
print(os.getcwd())
# Get Mongo database
from yaml import load
from pymongo import MongoClient
from getsecret import getsecret

client = MongoClient(getsecret("MONGODB_URI"))
db = client[getsecret("DB_NAME")]
# Get all synced accounts and their respective users.
import urllib.request as req
import json
accounts = json.loads(req.urlopen("http://localhost:5000/synced_emails").read().decode("utf-8"))
# counter for figures
counter = 0

In [None]:
# Find number of users who didn't make it past onboarding.
num_users = 0
num_quit_users = 0
collections = db.list_collection_names()
for account in accounts:
    for user in account["android"]:
        num_users += 1
        if user+"_sessions" not in collections:
            num_quit_users += 1
print("Total Android users: " + str(num_users))
print("Android users who didn't make it past onboarding: " + str(num_quit_users))
print("% of quitters: " + str(num_quit_users/num_users))

In [None]:
# Avg num of sessions:
import pandas
from statistics import mean
import matplotlib.pyplot as pyplot
total_sessions_per_day = 0
count_sessions_per_day = 0
avg_sessions_per_day_per_user = []
for account in accounts:
    for user in account["android"]:
        sessions_per_day = {}
        for session in db[user + "_sessions"].find():
            ts = pandas.Timestamp(ts_input=session["timestamp"], unit="ms")
            day = str(ts.year) + str(ts.month) + str(ts.day)
            if day not in sessions_per_day:
                sessions_per_day[day] = 0
            sessions_per_day[day] += 1
        arr = [sessions_per_day[day] for day in sessions_per_day]
        avg_sessions_per_day_per_user.extend(arr)
        if (len(arr) > 0):
            avg = mean(arr)
            total_sessions_per_day += avg
            count_sessions_per_day += 1
print("Average Number of sessions per day per user: " + str(total_sessions_per_day/count_sessions_per_day))
pyplot.figure(counter)
pyplot.title("Average Sessions Per Day Per User")
pyplot.ylabel("Frequency")
pyplot.xlabel("Average number of sessions per day per user")
pyplot.hist(avg_sessions_per_day_per_user, bins=50)

In [None]:
# See how many days we missed session logs when a user installed HabitLab
import pandas
import datetime
skipped_days = 0
num_users = 0
skipped_days_arr = []
for account in accounts:
    for user in account["android"]:
        prev_day = 0
        skipped_days_for_user = 0
        for session in db[user + "_sessions"].find():
            ts = pandas.Timestamp(ts_input=session["timestamp"], unit="ms")
            now_date = pandas.to_datetime(ts)
            day = (now_date-datetime.datetime.utcfromtimestamp(0)).days
            if prev_day == 0:
                prev_day = day
            if day - prev_day > 1:
                # The logging skipped a day!
                skipped_days_for_user += day - prev_day
            prev_day = day
        skipped_days_arr.append(skipped_days_for_user)
        skipped_days += skipped_days_for_user
print("Skipped Days: " + str(skipped_days))
counter += 1
pyplot.figure(counter)
pyplot.title("Skipped Days of Session Logs For Users")
pyplot.xlabel("Total number of skipped days")
pyplot.ylabel("Frequency")
pyplot.hist(skipped_days_arr)

In [None]:
# Now let's analyze which interventions are most effective.
interventions = {}
for account in accounts:
    for user in account["android"]:
        for session in db[user + "_sessions"].find({"enabled": True}):
            if "interventions" in session:
                for intervention in session["interventions"]:
                    intervention_name = intervention["intervention"]
                    if intervention_name not in interventions:
                        interventions[intervention_name] = {"total": 0, "count": 0}
                    interventions[intervention_name]["total"] += session["duration"]
                    interventions[intervention_name]["count"] += 1 
pyplot.figure(counter)
counter += 1
pyplot.title('Average Session Duration When Assigned Interventions')
pyplot.ylabel('Average Session Duration (s)')
pyplot.xticks(rotation=90)
pyplot.bar([name for name in interventions], [interventions[name]["total"]/interventions[name]["count"] for name in interventions])