# mBrain: actvity

In [6]:
from mobiledna.core.appevents import Appevents
from mobiledna.core.sessions import Sessions
from mobiledna.core.notifications import Notifications

import pandas as pd

In [3]:
ae = Appevents.load_data("./appevents.parquet")
se = Sessions.load_data("./sessions.parquet")
no = Notifications.load_data("./notifications.parquet")

df = ae.get_data()
df_s = se.get_data()
df_n = no.get_data()


2021-06-23 15:48:38 - Recognized file type as <parquet>.
2021-06-23 15:48:38 - 'load' took 0.029 seconds to complete.
2021-06-23 15:48:38 - Recognized file type as <parquet>.
2021-06-23 15:48:38 - 'load' took 0.005 seconds to complete.
2021-06-23 15:48:38 - Recognized file type as <parquet>.
2021-06-23 15:48:38 - 'load' took 0.077 seconds to complete.


In [10]:
ae.add_category(scrape=False).add_time_of_day()
no.add_category(scrape=False)

Adding category: 100%|██████████| 15065/15065 [00:00<00:00, 886178.56it/s]
Adding tod <startTime>: 100%|██████████| 15065/15065 [00:00<00:00, 1159227.82it/s]
Adding category: 100%|██████████| 451218/451218 [00:00<00:00, 1027778.66it/s]


## Variables

### Daily (unique) apps

In [11]:
daily_unique_apps = (df.groupby(["id", "startDate"])["application"].nunique().groupby("id").mean()).rename("daily_unique_apps")
daily_unique_apps

### Daily app frequency
Calculated in `depression.ipynb`

### Daily app duration
Calculated in `depression.ipynb`

### Battery status




In [12]:
def calc_battery_status(df: pd.DataFrame):
    """
    Calculates four battery status variables per participant:
    - daily average battery level
    - daily std dev of battery level
    - daily average charge %
    - daily average discharge %

    :param df: the appevents DataFrame
    :return: results DataFrame with 4 variables per participant
    """
    df = df.copy()

    df = df.sort_values(["id", "startTime"]).assign(battery_shift=df.groupby(["id", "startDate"])["battery"].shift(-1))
    df = df.assign(battery_change=df["battery_shift"] - df["battery"])

    battery_avg = (df.groupby(["id", "startDate"])["battery"].mean().groupby("id").mean()).rename("battery_avg")
    battery_std = (df.groupby(["id", "startDate"])["battery"].mean().groupby("id").std()).rename("battery_std")

    battery_discharge = (df[df["battery_change"] < 0].groupby(["id", "startDate"])["battery_change"].sum().abs().groupby("id").mean()).rename("battery_daily_discharge")
    battery_charge = (df[df["battery_change"] > 0].groupby(["id", "startDate"])["battery_change"].sum().groupby("id").mean()).rename("battery_daily_charge")

    res = pd.concat([
        battery_avg,
        battery_std,
        battery_charge,
        battery_discharge
    ], axis=1)

    return res

In [13]:
battery_status = calc_battery_status(df=df)
battery_status

Unnamed: 0_level_0,battery_avg,battery_std,battery_daily_charge,battery_daily_discharge
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0f17152a-ec9e-4105-b58d-25b2ae692388,49.432072,11.561746,90.714286,110.333333
a9ac43fd-cf95-40d5-9c9a-3582b88993fe,51.377526,16.10309,77.4,53.702703


### Social media frequency
Calculated in `depression.ipynb`

### Average daily time between consecutive sessions

In [14]:
def calc_time_consecutive_session(df_s: pd.DataFrame):
    """
    Calculates the average time between two consecutive sessions (or pickups) per participant.
    :param df_s: the sessions DataFrame
    :return: Series with average duration per participant
    """

    df_s = df_s.copy()
    df_s = df_s.assign(start_shift=df_s.groupby(["id", "startDate"])["startTime"].shift(-1))
    df_s = df_s.assign(shift_duration=(df_s["start_shift"] - df_s["endTime"]).dt.total_seconds() / 60)

    time_between = (df_s.groupby(["id", "startDate"])["shift_duration"].mean().groupby("id").mean()).rename("consecutive_session_time_min")

    return time_between

In [15]:
consecutive_session_time = calc_time_consecutive_session(df_s)
consecutive_session_time

id
0f17152a-ec9e-4105-b58d-25b2ae692388     7.794656
a9ac43fd-cf95-40d5-9c9a-3582b88993fe    15.399400
Name: consecutive_session_time_min, dtype: float64