# Loading data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Import
data = pd.read_csv('dataset_mood_smartphone.csv')

#Convert time column to date time format
data['time']= pd.to_datetime(data['time']) 
data.head()

Unnamed: 0.1,Unnamed: 0,id,time,variable,value
0,1,AS14.01,2014-02-26 13:00:00,mood,6.0
1,2,AS14.01,2014-02-26 15:00:00,mood,6.0
2,3,AS14.01,2014-02-26 18:00:00,mood,6.0
3,4,AS14.01,2014-02-26 21:00:00,mood,7.0
4,5,AS14.01,2014-02-27 09:00:00,mood,6.0


# Changing to multi index with time and ID

In [2]:
data = data.set_index(['id', 'time'])
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,variable,value
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AS14.01,2014-02-26 13:00:00,1,mood,6.0
AS14.01,2014-02-26 15:00:00,2,mood,6.0
AS14.01,2014-02-26 18:00:00,3,mood,6.0
AS14.01,2014-02-26 21:00:00,4,mood,7.0
AS14.01,2014-02-27 09:00:00,5,mood,6.0


# Aggregating the data into multiple time window observations for each patient

We want to reformat the data such that we have a number of observations for each patient, with each observation consisting of some time period e.g one week. Each observation will have a measurement for each attribute (e.g., Avg time spent on weather app over period) with the dependent variable as the average mood over the period. 

In [4]:
#reshaping the dataframe so that each column is a feature, indexed first by patient then time
data2 = pd.pivot_table(data, index=['id', 'time'], columns='variable', values='value')
data2.head()

Unnamed: 0_level_0,variable,activity,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.utilities,appCat.weather,call,circumplex.arousal,circumplex.valence,mood,screen,sms
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AS14.01,2014-02-17 12:04:42.394,,,,,,,,,,,,,,1.0,,,,,
AS14.01,2014-02-17 18:28:25.520,,,,,,,,,,,,,,1.0,,,,,
AS14.01,2014-02-18 09:29:51.257,,,,,,,,,,,,,,1.0,,,,,
AS14.01,2014-02-19 14:43:30.575,,,,,,,,,,,,,,1.0,,,,,
AS14.01,2014-02-19 17:29:10.378,,,,,,,,,,,,,,1.0,,,,,


In [5]:
#Replace missing values with 0's - since we intend to take averages 
data2 = data2.fillna(value=0)
data2.head()

Unnamed: 0_level_0,variable,activity,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.utilities,appCat.weather,call,circumplex.arousal,circumplex.valence,mood,screen,sms
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AS14.01,2014-02-17 12:04:42.394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
AS14.01,2014-02-17 18:28:25.520,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
AS14.01,2014-02-18 09:29:51.257,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
AS14.01,2014-02-19 14:43:30.575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
AS14.01,2014-02-19 17:29:10.378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


# Want to obtain an average of all the values for each day - not working as desired

In [8]:
level_values = data2.index.get_level_values
result = (data2.groupby([level_values(i) for i in [0,1]]
                      +[pd.Grouper(freq='1D', level=-1)]).mean())


In [11]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,variable,activity,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.utilities,appCat.weather,call,circumplex.arousal,circumplex.valence,mood,screen,sms
id,time,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AS14.01,2014-02-17 12:04:42.394,2014-02-17,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-17 18:28:25.520,2014-02-17,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-18 09:29:51.257,2014-02-18,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 14:43:30.575,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 17:29:10.378,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 17:42:16.499,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 17:42:34.292,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,1.0
AS14.01,2014-02-19 17:43:18.061,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 17:44:01.594,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
AS14.01,2014-02-19 18:07:50.055,2014-02-19,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.000,0.0,1.0,0.0,0.0,0.0,0.000,0.0
