# mBrain: depression

In [1]:
from mobiledna.core.appevents import Appevents
from mobiledna.core.sessions import Sessions
from mobiledna.core.notifications import Notifications

import pandas as pd

In [2]:
ae = Appevents.load_data("./appevents.parquet")
se = Sessions.load_data("./sessions.parquet")
no = Notifications.load_data("./notifications.parquet")

df = ae.get_data()
df_s = se.get_data()
df_n = no.get_data()

2021-10-07 10:46:44 - Recognized file type as <parquet>.
2021-10-07 10:46:44 - 'load' took 0.019 seconds to complete.
2021-10-07 10:46:44 - Recognized file type as <parquet>.
2021-10-07 10:46:44 - 'load' took 0.002 seconds to complete.
2021-10-07 10:46:44 - Recognized file type as <parquet>.
2021-10-07 10:46:44 - 'load' took 0.031 seconds to complete.


In [3]:
ae.add_category(scrape=False).add_time_of_day()
no.add_category(scrape=False)

Adding category: 100%|██████████| 15065/15065 [00:00<00:00, 869951.53it/s]
Adding tod <startTime>: 100%|██████████| 15065/15065 [00:00<00:00, 1682703.25it/s]
Adding category: 100%|██████████| 451218/451218 [00:00<00:00, 1563346.83it/s]


# Variables
### General screen time

In [4]:
general_screen_time = (ae.get_daily_duration() / 60)
general_screen_time

id
0f17152a-ec9e-4105-b58d-25b2ae692388    213.315132
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    186.937611
Name: daily_durations, dtype: float64

### Smartphone use frequency


In [5]:
smartphone_use_frequency = ae.get_daily_events()
smartphone_use_frequency

id
0f17152a-ec9e-4105-b58d-25b2ae692388    233.974359
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    160.540541
Name: daily_events, dtype: float64

### Screen unlocks

In [6]:
screen_unlocks = se.get_daily_sessions()
screen_unlocks

id
0f17152a-ec9e-4105-b58d-25b2ae692388    127.384615
a9ac43fd-cf95-40d5-9c9a-3582b88993fe     60.297297
Name: daily_sessions, dtype: float64

### Time between sessions on notification

In [7]:
def calc_time_between_notification_sessions(df: pd.DataFrame):
    """
    """
    session_firsts = df.groupby(["id", "session"]).head(1)
    session_firsts_notif = session_firsts[session_firsts["notification"] == True]
    session_firsts_notif = session_firsts_notif.assign(start_shift= session_firsts_notif.groupby(["id", "startDate"])[["startTime"]].shift(-1))
    session_firsts_notif = session_firsts_notif.assign(duration_shift=(session_firsts_notif["start_shift"] - session_firsts_notif["endTime"]).dt.total_seconds())

    mean_shift_pd = session_firsts_notif.groupby(["id", "startDate"])["duration_shift"].mean()
    mean_shift = mean_shift_pd.groupby("id").mean() / 60

    return mean_shift.rename("mins_between_notif_sessions")

In [8]:
time_between_notif_sessions = calc_time_between_notification_sessions(df)
time_between_notif_sessions

id
0f17152a-ec9e-4105-b58d-25b2ae692388    176.389771
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    139.453716
Name: mins_between_notif_sessions, dtype: float64

### Variability smartphone use  during week

In [9]:
def calc_weekly_use_variability(df: pd.DataFrame, duration: None):
    """
    """

    if duration:
        name = "duration"
        variability = df.groupby(["id", pd.Grouper(key="startDate", freq="W")])["duration"].sum().groupby("id").std()
    else:
        name = "appevents"
        variability = df.groupby(["id", pd.Grouper(key="startDate", freq="W")])["application"].count().groupby("id").std()

    return variability.rename(f"weekly_variability_{name}")

In [10]:
calc_weekly_use_variability(df=df, duration=None)

id
0f17152a-ec9e-4105-b58d-25b2ae692388    550.593286
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    395.043289
Name: weekly_variability_appevents, dtype: float64

In [11]:
calc_weekly_use_variability(df=df, duration=True)


id
0f17152a-ec9e-4105-b58d-25b2ae692388    23933.893716
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    24602.261844
Name: weekly_variability_duration, dtype: float64

### Social smartphone use
- Duration social smartphone use	Average daily use of social related smartphone use (social networks + MIM + calls)
- Frequency social smartphone use	Average daily social related app events (social networks + MIM + calls)
- Notifications social smartphone use	Average daily social related notifications (social networks + MIM + calls)

In [12]:
def calc_category_use(df: pd.DataFrame, df_n: pd.DataFrame, categories: list, category_name: str, reverse= None):
    """
    Calculates category usage variables:
    - avg daily category duration
    - avg daily category appevents
    - avg daily category notifications
    :param df: appevents DataFrame
    :param df_n: notifications DataFrame
    :param categories: list of category strings to filter on
    :param category_name: name of the category as string
    :param reverse: bool to check whether to include `categories` (None) or exclude (True) them
    :return: DataFrame with calculated category variables
    """
    logdays_pp = df.groupby("id")["startDate"].nunique()

    if reverse:
        mask = ~df["category"].isin(categories)
        n_mask = ~df_n["category"].isin(categories)
    else:
        mask = df["category"].isin(categories)
        n_mask = df_n["category"].isin(categories)

    duration = (df[mask].groupby(["id"])["duration"].sum() / logdays_pp / 60).rename(f"{category_name}_duration_min")
    frequency = (df[mask].groupby(["id"])["application"].count() / logdays_pp).rename(f"{category_name}_frequency")
    notifications = (df_n[n_mask].groupby("id")["application"].count() / logdays_pp).rename(f"{category_name}_notifications")

    res = pd.concat([duration, frequency, notifications], axis=1)

    return res

In [13]:
social_use = calc_category_use(df=df, df_n=df_n, categories=["chat", "social", "calling"], category_name="social")
social_use

Unnamed: 0_level_0,social_duration_min,social_frequency,social_notifications
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,145.920305,137.512821,1262.282051
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,90.800503,79.756757,2089.162162


### Non-social smartphone use
- Duration non social smartphone use 	Average daily use of non-social (process) related apps (news, entertainment, relaxation)
- Frequency non social smartphone use	Average daily app events of non-social (process) related apps (news, entertainment, relaxation)
- Notifications non social smartphone use	Average daily notifications of non-social (process) related apps (news, entertainment, relaxation)



In [14]:
non_social_use = calc_category_use(df=df, df_n=df_n, categories=["chat", "social", "calling"], category_name="non_social", reverse=True)
non_social_use


Unnamed: 0_level_0,non_social_duration_min,non_social_frequency,non_social_notifications
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,67.394826,96.461538,2051.564103
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,96.137108,80.783784,6612.945946


### Browser

In [15]:
browser_use = calc_category_use(df=df, df_n=df_n, categories=["browser"], category_name="browser")
browser_use

Unnamed: 0_level_0,browser_duration_min,browser_frequency,browser_notifications
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,11.58907,6.897436,101.666667
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,6.07703,5.648649,


### News

In [16]:
news_use = calc_category_use(df=df, df_n=df_n, categories=["news&magazines"], category_name="news")
news_use

Unnamed: 0_level_0,news_duration_min,news_frequency,news_notifications
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,1.898018,4.589744,102.717949
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,,,


### Instagram use

In [17]:
def calc_app_use(df: pd.DataFrame, df_n: pd.DataFrame, apps: list, app_name: str, reverse= None):
    """
    Calculates app usage variables:
    - avg daily apps duration
    - avg daily apps appevents
    - avg daily apps notifications

    :param df: appevents DataFrame
    :param df_n: notifications DataFrame
    :param apps: list of application package names strings to filter on
    :param app_name: name of the app as string
    :param reverse: bool to check whether to include `apps` (None) or exclude (True) them

    :return: DataFrame with calculated apps variables
    """
    logdays_pp = df.groupby("id")["startDate"].nunique()

    if reverse:
        mask = ~df["application"].isin(apps)
        n_mask = ~df_n["application"].isin(apps)
    else:
        mask = df["application"].isin(apps)
        n_mask = df_n["application"].isin(apps)

    duration = (df[mask].groupby(["id"])["duration"].sum() / logdays_pp / 60).rename(f"{app_name}_duration_min")
    frequency = (df[mask].groupby(["id"])["application"].count() / logdays_pp).rename(f"{app_name}_frequency")
    notifications = (df_n[n_mask].groupby("id")["application"].count() / logdays_pp).rename(f"{app_name}_notifications")

    evening_duration = (df[(mask)&(df["startTOD"].isin(["eve"]))].groupby(["id"])["duration"].sum() / logdays_pp / 60).rename(f"{app_name}_eve_duration_min")

    res = pd.concat([duration, frequency, notifications, evening_duration], axis=1)

    return res

In [18]:
instagram_use = calc_app_use(df=df, df_n=df_n, apps=["com.instagram.android"], app_name="instagram")
instagram_use

Unnamed: 0_level_0,instagram_duration_min,instagram_frequency,instagram_notifications,instagram_eve_duration_min
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,53.549826,43.333333,12.128205,15.765994
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,18.48859,19.243243,4.0,5.881429


### Time of day
- Frequency smartphone evening
- Duration smartphone evening
- Frequency smartphone night
- Duration smartphone night

In [19]:
def tod_use(df: pd.DataFrame):
    """
    Calculates duration per time of day bin:
    early_morning, morning, noon, eve, night, late_night.

    :param df: appevents DataFrame
    :return: DataFrame with duration per time of day
    """
    logdays_pp = df.groupby("id")["startDate"].nunique()

    tod_use = (df.groupby(["id", "startTOD"])["duration"].sum() / logdays_pp / 60)
    tod_use = tod_use.reset_index(name="duration_min").pivot_table(columns="startTOD", index="id", values="duration_min")

    return tod_use[["early_morning", "morning", "noon", "eve", "night", "late_night"]]

In [20]:
time_of_day = tod_use(df)
time_of_day

startTOD,early_morning,morning,noon,eve,night,late_night
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,13.764464,52.656165,42.158774,55.720026,40.240107,8.775595
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,17.9647,52.384687,38.613657,40.773291,36.791221,0.410055
