In [1]:
# enables the %%R magic, not necessary if you've already done this
%load_ext rpy2.ipython

In [2]:
import pandas as pd
import json

In [3]:
df = pd.DataFrame({
    'cups_of_coffee': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
    'productivity': [2, 5, 6, 8, 9, 8, 0, 1, 0, -1]
})

In [4]:
df = pd.read_csv('sessions_for_intervention.csv')

In [5]:
all_intervention_info_list = json.load(open('mobile_intervention_info_list.json'))
intervention_name_to_info = {intervention_info['shortname']: intervention_info for intervention_info in all_intervention_info_list}

def is_target_app(intervention_name):
  intervention_info = intervention_name_to_info[intervention_name]
  return intervention_info['target'] == 'app'

def get_intervention_names(intervention_json):
  intervention_info_list = json.loads(intervention_json.replace("'", '"'))
  intervention_name_list = [x['intervention'] for x in intervention_info_list]
  intervention_name_list = [x for x in intervention_name_list if is_target_app(x)]
  return intervention_name_list

def get_intervention_name(intervention_json):
  intervention_name_list = get_intervention_names(intervention_json)
  if len(intervention_name_list) == 0:
    return ''
  if len(intervention_name_list) == 1:
    return intervention_name_list[0]
  raise ValueError('have multiple target app interventions: ' + intervention_json)


In [6]:
def is_valid_row(intervention_json):
  intervention_info_list = json.loads(intervention_json.replace("'", '"'))
  intervention_name_list = [x['intervention'] for x in intervention_info_list]
  for intervention_name in intervention_name_list:
    if intervention_name not in intervention_name_to_info:
      return False
  return True


df['is_valid_row'] = df['interventions'].map(is_valid_row)
df = df[df.is_valid_row == True].copy()

In [7]:
#print(df)

In [8]:
df['first_intervention'] = df['interventions'].map(get_intervention_name)

In [9]:
df['num_interventions'] = df['interventions'].map(lambda x: len(json.loads(x.replace("'", '"'))))

In [10]:
df['duration'] = df['duration (log(s))']
ndata = df

In [11]:
%%R -i ndata -w 5 -h 5 --units in -r 200

#install.packages('ez')
#install.packages('lme4')

library(lme4)
#library(ez)

  res = PandasDataFrame.from_items(items)



In [33]:
%%R -i ndata -w 5 -h 5 --units in -r 200

# SIGNIFCANT: does the frequency have an effect on duration, controlling for user and goal?

results <- glmer(duration ~ frequent + (1|user) + (1|goal), data = ndata, family = Gamma)

resultsnull <- glmer(duration ~ (1|user) + (1|goal), data = ndata, family = Gamma)

show(results)
show(anova(resultsnull, results))


Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: Gamma  ( inverse )
Formula: duration ~ frequent + (1 | user) + (1 | goal)
   Data: ndata
      AIC       BIC    logLik  deviance  df.resid 
125191.30 125234.40 -62590.65 125181.30     40887 
Random effects:
 Groups   Name        Std.Dev.
 goal     (Intercept) 0.02839 
 user     (Intercept) 0.01827 
 Residual             0.30869 
Number of obs: 40892, groups:  goal, 285; user, 234
Fixed Effects:
 (Intercept)  frequentTRUE  
    0.261194      0.002406  
Data: ndata
Models:
resultsnull: duration ~ (1 | user) + (1 | goal)
results: duration ~ frequent + (1 | user) + (1 | goal)
            Df    AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)  
resultsnull  4 125196 125230 -62594   125188                          
results      5 125191 125234 -62591   125181 6.514      1     0.0107 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [32]:
%%R -i ndata -w 5 -h 5 --units in -r 200

# SIGNIFCANT: does the intervention have an effect on duration, controlling for user and goal and frequency?

results <- glmer(duration ~ first_intervention + (1|frequent) + (1|user) + (1|goal), data = ndata, family= Gamma)

resultsnull <- glmer(duration ~ (1|frequent) + (1|user) + (1|goal), data = ndata, family=Gamma)


show(results)
show(anova(resultsnull, results))


Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: Gamma  ( inverse )
Formula: duration ~ first_intervention + (1 | frequent) + (1 | user) +  
    (1 | goal)
   Data: ndata
      AIC       BIC    logLik  deviance  df.resid 
125016.31 125180.07 -62489.16 124978.31     40873 
Random effects:
 Groups   Name        Std.Dev. 
 goal     (Intercept) 0.0283876
 user     (Intercept) 0.0182829
 frequent (Intercept) 0.0006328
 Residual             0.3081150
Number of obs: 40892, groups:  goal, 285; user, 234; frequent, 2
Fixed Effects:
                                   (Intercept)  
                                     0.2619959  
          first_interventionAPPLICATION_SLIDER  
                                     0.0099253  
     first_interventionCOUNTDOWN_TIMER_OVERLAY  
                                     0.0188338  
       first_interventionCOUNTUP_TIMER_OVERLAY  
                                     0.0075315  
              first_inter

In [14]:
print(df['num_interventions'].describe())

count    40892.000000
mean         0.606867
std          0.541542
min          0.000000
25%          0.000000
50%          1.000000
75%          1.000000
max          4.000000
Name: num_interventions, dtype: float64


In [15]:
df[df['num_interventions'] == 0].count()

frequent              17156
duration (log(s))     17156
interventions         17156
user                  17156
goal                  17156
is_valid_row          17156
first_intervention    17156
num_interventions     17156
duration              17156
dtype: int64

In [16]:
df[df['num_interventions'] == 1].count()

frequent              22691
duration (log(s))     22691
interventions         22691
user                  22691
goal                  22691
is_valid_row          22691
first_intervention    22691
num_interventions     22691
duration              22691
dtype: int64

In [17]:
df[df['num_interventions'] == 2].count()

frequent              1013
duration (log(s))     1013
interventions         1013
user                  1013
goal                  1013
is_valid_row          1013
first_intervention    1013
num_interventions     1013
duration              1013
dtype: int64

In [18]:
df[df['num_interventions'] == 3].count()

frequent              29
duration (log(s))     29
interventions         29
user                  29
goal                  29
is_valid_row          29
first_intervention    29
num_interventions     29
duration              29
dtype: int64

In [19]:
df[df['num_interventions'] == 4].count()

frequent              3
duration (log(s))     3
interventions         3
user                  3
goal                  3
is_valid_row          3
first_intervention    3
num_interventions     3
duration              3
dtype: int64

In [20]:
%%R -i df -w 5 -h 5 --units in -r 200

summary(df)

  res = PandasDataFrame.from_items(items)


  frequent       duration..log.s.. interventions          user          
 Mode :logical   Min.   : 1.792    Length:40892       Length:40892      
 FALSE:21092     1st Qu.: 2.708    Class :character   Class :character  
 TRUE :19800     Median : 3.611    Mode  :character   Mode  :character  
                 Mean   : 3.737                                         
                 3rd Qu.: 4.595                                         
                 Max.   :11.299                                         
     goal           is_valid_row   first_intervention num_interventions
 Length:40892       Mode:logical   Length:40892       Min.   :0.0000   
 Class :character   TRUE:40892     Class :character   1st Qu.:0.0000   
 Mode  :character                  Mode  :character   Median :1.0000   
                                                      Mean   :0.6069   
                                                      3rd Qu.:1.0000   
                                                      Max

In [21]:
%%R -i df -w 5 -h 5 --units in -r 200
# import df from global environment
# make default figure size 5 by 5 inches with 200 dpi resolution

#install.packages("ggplot2", repos='http://cran.us.r-project.org', quiet=TRUE)
library(ggplot2)
ggplot(df, aes(x=cups_of_coffee, y=productivity)) + geom_line()


Error in library(ggplot2) : there is no package called ‘ggplot2’



