In [1]:
import pandas as pd
import requests
import urllib
import simplejson
import os
import time
import numpy as np

In [2]:
df = pd.read_csv('./mHealth_iOS.csv', header=None)
df.rename(columns={0: 'appName'}, inplace=True)
df.head()

Unnamed: 0,appName
0,Peak– Brain Training
1,Lumosity
2,Sleep Cycle alarm clock
3,Calm
4,Headspace


In [3]:
baseURL = 'https://itunes.apple.com/search?{}'
paramDict = lambda x: {"term": x, "entity": "software", "limit": 1} 
urlEncoder = lambda x: urllib.request.quote('&'.join(['{}={}'.format(k,v) for k,v in x.items()]), safe="&=")
getURL = lambda x: baseURL.format(urlEncoder(paramDict(x)))

In [140]:
def getAppInfo(name):
    response = urllib.request.urlopen(getURL(name))
    output = simplejson.load(response)
    if output['resultCount'] == 1:
        return output['results'][0]
    else:
        raise Exception('resultCount', output['resultCount'])

In [122]:
def saveAppInfo(name):  
    res = getAppInfo(name)
    res['searchString'] = name
    fname = os.path.join('ios', res['bundleId'].replace('.', '__')+'.json')
    with open(fname, 'w') as f:
        f.write(simplejson.dumps(res))
    return fname

In [9]:
getURL(df.iloc[45].appName)

'https://itunes.apple.com/search?term=RR%3A%20Eating%20Disorder%20Management&entity=software&limit=1'

In [123]:
saveAppInfo('Calm')

'ios/com__calm__calmapp.json'

In [126]:
with open('ios/com__calm__calmapp.json', 'r') as f:
    d = simplejson.load(f)
    print(d['searchString'])

Calm


In [11]:
for i, row in df.iterrows():
    try:
        saveAppInfo(row.appName)
        time.sleep(5 + (np.random.rand()-0.5)*5)
    except urllib.error.HTTPError:
        print('hit rate limit', i, row.appName)
        break
    except Exception:
        print('something wrong', i, row.appName)

something wrong 79 Naturespace - Relax Meditate Focus Sleep and Rest with 3D Sounds, sonic therapy for anxiety and stress relief
something wrong 106 iSleep Easy - Meditations for Restful Sleep (Paid)
something wrong 107 Unwind HD: Calm ambiance to breathe & stop anxiety
something wrong 110 Moodfit - Stress & Anxiety
something wrong 114 Daily Cardio Workout (Paid)
something wrong 119 iPrevail Anxiety, Stress, Depression Chat & Relief
something wrong 128 Vital-EQ Respiroguide Pro (Paid)
something wrong 136 Relax: Stress and Anxiety Relief (Paid)
something wrong 164 Sleep Science HQ: alarm clock (paid)
something wrong 172 Panic Relief (Paid)
something wrong 178 SARDAA Health Storylines
something wrong 202 Mindfulness by Digipill (Paid)
something wrong 210 Anxiety Island - work with your child to help overcome fear, phobia and anxiety
something wrong 211 Emergency First Aid - Acupressure Massage Points! (Paid)
something wrong 217 Get Rid Of Depression with Acupressure Massage! (Paid)
somet

**Reza**: wants to know engagement and uptake

**Anisha**: wants to know for apps that have more user engagement:
  * passive or active or both data collection or no data collection
  * gamified or not gamified
  * social media or not social media
  * mostly educational or not
  * category: stress/anxiety, sleep, memory, etc
  * link to external sensors (e.g. fitbit) or not

### Analysis: December 1 2018

In [144]:
with open('mhealthapps-66ff0-votes-export.json', 'r') as f:
    data_ugly = simplejson.load(f)

In [188]:
df_users = pd.read_csv('mHealth_iOS_descriptive.csv')
df_users.rename(columns={"Unnamed: 0": 'appName'}, inplace=True)

In [177]:
fb_data = [v for k,v in data_ugly.items()]
from glob import glob
jsons = glob('ios/*.json')
metadata = []
for j in jsons:
    with open(j,'r') as f:
        d = simplejson.load(f)
        if "searchString" not in d.keys():
            print(j)
        metadata.append(d)
        
        
for meta in metadata:
    meta['sampleId'] = meta['bundleId'].replace('.', '__')

ios/com__sandboxx__sandboxx.json


In [181]:
df_meta = pd.DataFrame(metadata)

In [183]:
df_meta['tags'] = df_meta.sampleId.map(getFBResponses)

data about active users

In [164]:
def getFBResponses(sampleId):
    if sampleId == None:
        # print('none')
        return None
    
    responses = []
    for fb in fb_data:
        if sampleId == fb['sample']:
            if 'response' in fb.keys():
                responses += fb['response']
    return responses

In [192]:
df_merged = pd.merge(df_users, df_meta, left_on='appName', right_on='searchString')
print(df_merged.columns)
df_merged.head()

Index(['appName', 'Total Downloads Since First Tracked (USA)',
       'Total Revenue Since First Tracked (USA)',
       'Total Downloads Since First Tracked (Global)',
       'Total Revenue Since First Tracked (Global)',
       '"USA Daily Active Users (Past 30 days)"',
       '"USA Monthly Active Users (Past 30 days)"',
       '"Global Daily Active Users (Past 30 days)"',
       '"Global Monthly Active Users (Past 30 days)"', 'advisories',
       'appletvScreenshotUrls', 'artistId', 'artistName', 'artistViewUrl',
       'artworkUrl100', 'artworkUrl512', 'artworkUrl60', 'averageUserRating',
       'averageUserRatingForCurrentVersion', 'bundleId',
       'contentAdvisoryRating', 'currency', 'currentVersionReleaseDate',
       'description', 'features', 'fileSizeBytes', 'formattedPrice',
       'genreIds', 'genres', 'ipadScreenshotUrls', 'isGameCenterEnabled',
       'isVppDeviceBasedLicensingEnabled', 'kind', 'languageCodesISO2A',
       'minimumOsVersion', 'price', 'primaryGenreId', 'p

Unnamed: 0,appName,Total Downloads Since First Tracked (USA),Total Revenue Since First Tracked (USA),Total Downloads Since First Tracked (Global),Total Revenue Since First Tracked (Global),"""USA Daily Active Users (Past 30 days)""","""USA Monthly Active Users (Past 30 days)""","""Global Daily Active Users (Past 30 days)""","""Global Monthly Active Users (Past 30 days)""",advisories,...,trackCensoredName,trackContentRating,trackId,trackName,trackViewUrl,userRatingCount,userRatingCountForCurrentVersion,version,wrapperType,tags
0,Peak– Brain Training,9100000,"$4,900,000.00",29100000,"$13,900,000",1900000,7200000,4300000,20100000,[],...,Peak - Brain Training,4+,806223188,Peak - Brain Training,https://itunes.apple.com/us/app/peak-brain-tra...,106911.0,1026.0,4.28.4,software,[]
1,Lumosity,5900000,"$10,900,000.00",11700000,"$16,200,000",1500000,4700000,2900000,9200000,[],...,Lumosity: Daily Brain Games,4+,577232024,Lumosity: Daily Brain Games,https://itunes.apple.com/us/app/lumosity-daily...,151288.0,1675.0,9.48.1,software,"[gamified, active data entry, Subscriptions]"
2,Sleep Cycle alarm clock,3500000,"$5,100,000.00",13700000,"$14,400,000",931600,2800000,3300000,10800000,[],...,Sleep Time zZz — Sleep Cycle Alarm Clock with ...,4+,963536705,Sleep Time zZz — Sleep Cycle Alarm Clock with ...,https://itunes.apple.com/us/app/sleep-time-zzz...,21.0,12.0,2.0.1,software,[]
3,Calm,7800000,"$21,600,000.00",15700000,"$33,000,000",115700,633200,236800,1400000,[],...,Calm,4+,571800810,Calm,https://itunes.apple.com/us/app/calm/id5718008...,216377.0,4514.0,4.3,software,"[Meditation , sleep, Audio, Subscriptions ]"
4,Headspace,6400000,"$23,100,000.00",14400000,"$40,900,000",93100,563300,212800,1300000,[],...,Headspace: Meditation,4+,493145008,Headspace: Meditation,https://itunes.apple.com/us/app/headspace-medi...,463844.0,7199.0,3.39.0,software,"[Meditation, Subscriptions, sensors, Audio]"


In [199]:
allTags = []
for t in df_merged.tags.values:
    allTags += [s.lower().strip() for s in t]

In [206]:
names, counts = np.unique(allTags, return_counts=True)
for i, n in enumerate(names):
    print('|' * counts[i], n)

| act
||||||||||||||||||||||||||||||||||||||||||||| active data entry
| alcohol
|||||||||||||||||| anxiety
| attention
||||||||||| audio
|| bipolar
| brain training
||||||| breathing
|||||||||||||||||| cbt
|| cognitive
| cognitive training
| cpt
|||||||||||| depression
| eating disorder
||||||||||||||||||||||||||| educational
||||||||||||| gamified
| guided meditation
| heart rate
|||||| hypnosis
| in app purchase
| insomnia
|||||||||||||||||| journal
||| massage
||||||||||||||||||| meditation
|| memory
||| mindfulness
|||||||||| music
|||| no data collection
|||||| no data entry
|| not english
|| not mental health
||||||||||||||||||||||| not relevant
|| ocd
| pain
||||||| passive data entry
|| phobia
| productivity
|||||| ptsd
| resilience
| schizophrenia
|||||||||||| sensors
||||||||||||||| sleep
|||| smoking
||||||||||||| social media
|||||||||||| sounds
|||||| stress
|||| subscription
||||||||||||||||| subscriptions
| suicide
| workout


** AK TODO: add app screenshots **

1. Relevant/Not Relevant: Is this designed to help with a mental health problem?
  - veer on the side of inclusion (ambiguous ones yet a yes)
  - sleep count? (yes)
  - meditation count? (anxiety) yes (breathing also counts)
  - menstrual cycle tracker no
  - pizza game, no
  - not english language, no
  
2. Data collection or content-only
  - e.g. apps that only provide sounds/reading materials are content-only
  - sensor, journalling, = data collection
  
3. Interoperable with sensors/wearables?
  - from description/screenshots. if its not
  
4. Social connection (yes/no)
  - highlight ("social media") , "social", "friends"
  
5. Gamification (yes/no)
  - leaderboard / badges /
  - secondary step / alternate world / metaphor
  - highlight "points, badges, game, play" etc

6. What disorder is it targeting? (multiselect)
  - substance use
  - general behavioral health
  - depression
  - PTSD
  - anxiety & stress (include meditation)
  - sleep
  - schizophrenia
  - bipolar
  - cognition
  - eating disorder
  - attention
  - phobias
  - physical exercise 
  - other (fill in)
  
7. primary treatment strategy
  - CBT / ACT/ any subdomains
  - mindfulness or meditation
  - journal
  - hypnosis
  - other (fill in)
 

In [None]:
groups: {
    'payment': ['subscription', 'subscriptions', 'in app purchase'],
    'disease-specific': ['suicide','smoking','schizophrenia',
                         'ocd', 'pain', 'depression', 'eating disorder',
                         'bipolar', 'attention', 'anxiety'
                        ],
}