In [7]:
import pandas as pd
import numpy as np

In [52]:
# load
data_df = pd.read_csv('datasets/mood_preprocessed_aggr_per_day_features.csv')
data_df['date'] = pd.to_datetime(data_df['date'])
# drop weekday column
data_df.drop(columns=['weekday'], inplace=True)
data_df

Unnamed: 0,date,id,screen,call,sms,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,...,appCat.weather,circumplex.arousal,circumplex.valence,activity,mood,is_weekend,is_summer,is_winter,is_spring,is_autumn
0,2014-02-17,AS14.01,0.000000,2.0,0.0,0.000,0.000,0.0,0.0,0.0,...,0.0,-1.000000,0.0,0.071429,6.0,0,0,1,0,0
1,2014-02-17,AS14.02,0.000000,4.0,2.0,0.000,0.000,0.0,0.0,0.0,...,0.0,0.000000,1.0,0.142857,7.0,0,0,1,0,0
2,2014-02-17,AS14.03,0.000000,0.0,1.0,0.000,0.000,0.0,0.0,0.0,...,0.0,1.000000,2.0,0.290000,9.0,0,0,1,0,0
3,2014-02-17,AS14.06,0.000000,5.0,0.0,0.000,0.000,0.0,0.0,0.0,...,0.0,-1.000000,0.0,0.120000,7.0,0,0,1,0,0
4,2014-02-17,AS14.08,0.000000,0.0,4.0,0.000,0.000,0.0,0.0,0.0,...,0.0,0.000000,1.0,0.000000,7.0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1968,2014-06-05,AS14.24,1620.056999,11.0,0.0,801.994,795.073,0.0,0.0,0.0,...,0.0,0.223077,1.0,0.253627,7.0,0,1,0,0,0
1969,2014-06-06,AS14.24,2169.781000,16.0,0.0,838.897,1205.799,0.0,0.0,0.0,...,0.0,0.702128,1.0,0.287322,7.0,0,1,0,0,0
1970,2014-06-07,AS14.24,1121.516999,0.0,2.0,171.285,411.638,0.0,0.0,0.0,...,0.0,0.333333,1.0,0.317301,8.0,1,1,0,0,0
1971,2014-06-08,AS14.24,111.459000,0.0,0.0,88.239,3.042,0.0,0.0,0.0,...,0.0,-1.409091,1.0,0.102054,7.0,1,1,0,0,0


In [ ]:
# remove weekday column

In [53]:
sum_cols = [
    'screen', 'call', 'sms', 'appCat.builtin', 'appCat.communication',
    'appCat.entertainment', 'appCat.finance', 'appCat.game',
    'appCat.office', 'appCat.other', 'appCat.social', 'appCat.travel',
    'appCat.unknown', 'appCat.utilities', 'appCat.weather'
] 
mean_cols = ['circumplex.arousal', 'circumplex.valence', 'activity', 'mood']
max_cols = ['is_weekend', 'is_winter', 'is_spring', 'is_summer', 'is_autumn']

In [54]:
n_days = 5

data_df.sort_values(by=['id', 'date'], inplace=True)
data_df['period'] = data_df.groupby('id').cumcount() // n_days

In [55]:
data_df[['id', 'period']]

Unnamed: 0,id,period
0,AS14.01,0
23,AS14.01,0
44,AS14.01,0
64,AS14.01,0
82,AS14.01,0
...,...,...
1952,AS14.33,19
1955,AS14.33,19
1958,AS14.33,19
1961,AS14.33,19


In [56]:
# Define aggregation functions
aggregations = {
    **{col: 'sum' for col in sum_cols},
    **{col: 'mean' for col in mean_cols},
    **{col: 'max' for col in max_cols}
}

# Group by 'id' and 'period' and aggregate
aggregated_df = data_df.groupby(['id', 'period']).agg(aggregations).reset_index()

# Rename the median mood column
aggregated_df.rename(columns={'mood': 'average_mood'}, inplace=True)

# Function to fetch the mood of the next available day after the last day of each period
def get_next_available_day_mood(row, original_df):
    last_date_of_period = original_df[(original_df['id'] == row['id']) & (original_df['period'] == row['period'])]['date'].max()
    next_available_days = original_df[(original_df['id'] == row['id']) & (original_df['date'] > last_date_of_period)]
    if not next_available_days.empty:
        return next_available_days.iloc[0]['mood']  # Mood of the next available day
    return np.nan  # Return NaN if no data available after the last date of the period

# Apply the function to fetch the next available day's mood
aggregated_df['mood'] = aggregated_df.apply(get_next_available_day_mood, axis=1, original_df=data_df)

print(aggregated_df)

          id  period       screen  call  sms  appCat.builtin  \
0    AS14.01       0     0.000000  12.0  6.0           0.000   
1    AS14.01       1     0.000000  10.0  3.0           0.000   
2    AS14.01       2     0.000000   8.0  1.0           0.000   
3    AS14.01       3     0.000000  18.0  2.0           0.000   
4    AS14.01       4     0.000000   2.0  7.0           0.000   
..       ...     ...          ...   ...  ...             ...   
400  AS14.33      16  6941.189998   9.0  2.0        2614.790   
401  AS14.33      17  5778.654001  13.0  1.0        1619.139   
402  AS14.33      18  7786.173998   9.0  2.0        2007.933   
403  AS14.33      19  9933.320998  30.0  7.0        3109.109   
404  AS14.33      20     0.000000   0.0  0.0           0.000   

     appCat.communication  appCat.entertainment  appCat.finance  appCat.game  \
0                   0.000                 0.000             0.0          0.0   
1                   0.000                 0.000             0.0        

In [57]:
# drop rows with nan in mood
print(aggregated_df.shape)
aggregated_df.dropna(subset=['mood'], inplace=True)
aggregated_df.shape

(405, 27)


(378, 27)

In [58]:
# print rows with nan
aggregated_df[aggregated_df.isnull().any(axis=1)]

Unnamed: 0,id,period,screen,call,sms,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,...,circumplex.arousal,circumplex.valence,activity,average_mood,is_weekend,is_winter,is_spring,is_summer,is_autumn,mood


In [59]:
aggregated_df.to_csv('datasets/mood_preprocessed_aggr_per_5_days_svm.csv', index=False)

In [60]:
aggregated_df

Unnamed: 0,id,period,screen,call,sms,appCat.builtin,appCat.communication,appCat.entertainment,appCat.finance,appCat.game,...,circumplex.arousal,circumplex.valence,activity,average_mood,is_weekend,is_winter,is_spring,is_summer,is_autumn,mood
0,AS14.01,0,0.000000,12.0,6.0,0.000,0.000,0.000,0.0,0.0,...,-1.000000,0.000000,0.071429,6.0,0,1,0,0,0,6.0
1,AS14.01,1,0.000000,10.0,3.0,0.000,0.000,0.000,0.0,0.0,...,-0.228571,0.438095,0.071429,6.2,1,1,0,0,0,7.0
2,AS14.01,2,0.000000,8.0,1.0,0.000,0.000,0.000,0.0,0.0,...,1.000000,1.000000,0.071429,7.0,1,0,1,0,0,7.0
3,AS14.01,3,0.000000,18.0,2.0,0.000,0.000,0.000,0.0,0.0,...,1.000000,1.000000,0.071429,7.0,0,0,1,0,0,7.0
4,AS14.01,4,0.000000,2.0,7.0,0.000,0.000,0.000,0.0,0.0,...,1.000000,1.000000,0.071429,7.0,1,0,1,0,0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,AS14.33,15,8257.180001,21.0,4.0,3027.711,4408.295,446.972,0.0,0.0,...,-0.561333,0.344067,0.119155,6.0,1,0,1,0,0,7.0
400,AS14.33,16,6941.189998,9.0,2.0,2614.790,3707.217,395.409,0.0,0.0,...,-0.189519,0.383177,0.105005,6.2,1,0,1,0,0,7.0
401,AS14.33,17,5778.654001,13.0,1.0,1619.139,1581.079,304.165,0.0,0.0,...,-0.268950,0.344988,0.075447,6.2,1,0,1,0,0,7.0
402,AS14.33,18,7786.173998,9.0,2.0,2007.933,4231.449,105.806,0.0,0.0,...,-0.615803,-0.032416,0.080745,6.0,1,0,1,0,0,5.0
