In [1]:
import pandas as pd

In [2]:
# Logic : All files under app_running follows similar structure.
# Strategy : Reading a sample file and using it to generalize the rest 

df_00 = pd.read_csv('dataset/dinning/u01.txt',names=['datetime','restaurent','meal'],usecols=['datetime','meal'],parse_dates=['datetime'])
df_00.head()

Unnamed: 0,datetime,meal
0,2013-01-06 17:42:49,Supper
1,2013-01-07 09:32:57,Breakfast
2,2013-01-07 14:16:07,Lunch
3,2013-01-08 12:51:22,Lunch
4,2013-01-09 13:46:44,Lunch


In [3]:
df_00 = df_00.sort_values(by='datetime')

In [4]:
df_00['date'] = df_00.datetime.dt.date
df_00

Unnamed: 0,datetime,meal,date
0,2013-01-06 17:42:49,Supper,2013-01-06
1,2013-01-07 09:32:57,Breakfast,2013-01-07
2,2013-01-07 14:16:07,Lunch,2013-01-07
3,2013-01-08 12:51:22,Lunch,2013-01-08
4,2013-01-09 13:46:44,Lunch,2013-01-09
...,...,...,...
229,2013-05-29 00:26:54,Snack,2013-05-29
230,2013-05-29 12:31:55,Lunch,2013-05-29
231,2013-05-30 14:35:05,Lunch,2013-05-30
232,2013-05-30 19:25:47,Supper,2013-05-30


In [5]:
final_df = df_00.groupby(by='date').agg(
    skipped_meal = ('meal', lambda x: not({'Breakfast', 'Lunch', 'Supper'}  <=  set(x.unique()))),
    meals_history = ('meal','sum'),
    had_snack = ('meal',lambda x: 'Snack' in x.unique()),
    total_meals = ('meal','count')
)

In [6]:
final_df['skipped_meal'] = final_df['skipped_meal'].astype('int')
final_df['had_snack'] = final_df['had_snack'].astype('int')

In [7]:
final_df.describe()

Unnamed: 0,skipped_meal,had_snack,total_meals
count,119.0,119.0,119.0
mean,0.991597,0.352941,1.966387
std,0.09167,0.479905,0.891892
min,0.0,0.0,1.0
25%,1.0,0.0,1.0
50%,1.0,0.0,2.0
75%,1.0,1.0,2.0
max,1.0,1.0,5.0


In [12]:
def get_dinning_df(filename):
    df_00 = pd.read_csv(
        filename,
        names=['datetime','restaurent','meal'],
        usecols=['datetime','meal'],
        parse_dates=['datetime']
    )
    
    df_00['date'] = df_00.datetime.dt.date
    
    final_df = df_00.groupby(by='date').agg(
        skipped_meal = ('meal', lambda x: not({'Breakfast', 'Lunch', 'Supper'}  <=  set(x.unique()))),
        total_meals = ('meal','count')
    )
    final_df['skipped_meal'] = final_df['skipped_meal'].astype('int')
    final_df['uid'] = filename.split('/')[-1].replace('.txt','')
    
    
    return final_df

In [14]:
# Merging all student datas
import os
root_dir = '../dataset/dinning'
# os.path.join('dataset','app_usage')
user_files = os.listdir(path=root_dir)
combined_dfs = []
for file in user_files:
    print('Ingesting : ',file)
    file_path = os.path.join(root_dir,file)
    df= get_dinning_df(file_path)
    combined_dfs.append(df)
dinning_df = pd.concat(combined_dfs)

Ingesting :  u22.txt
Ingesting :  u02.txt
Ingesting :  u10.txt
Ingesting :  u36.txt
Ingesting :  u49.txt
Ingesting :  u42.txt
Ingesting :  u04.txt
Ingesting :  u20.txt
Ingesting :  u09.txt
Ingesting :  u19.txt
Ingesting :  u30.txt
Ingesting :  u05.txt
Ingesting :  u18.txt
Ingesting :  u12.txt
Ingesting :  u54.txt
Ingesting :  u25.txt
Ingesting :  u01.txt
Ingesting :  u46.txt
Ingesting :  u15.txt
Ingesting :  u57.txt
Ingesting :  u14.txt
Ingesting :  u47.txt
Ingesting :  u33.txt
Ingesting :  u16.txt
Ingesting :  u43.txt
Ingesting :  u08.txt
Ingesting :  u27.txt
Ingesting :  u59.txt
Ingesting :  u32.txt
Ingesting :  u24.txt
Ingesting :  u07.txt


In [15]:
dinning_df

Unnamed: 0_level_0,skipped_meal,total_meals,uid
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-06,1,3,u22
2013-01-07,1,2,u22
2013-01-08,1,2,u22
2013-01-09,1,2,u22
2013-01-10,0,3,u22
...,...,...,...
2013-05-26,1,1,u07
2013-05-27,1,2,u07
2013-05-28,1,2,u07
2013-05-29,1,1,u07


In [16]:
dinning_df.isna().sum()

skipped_meal    0
total_meals     0
uid             0
dtype: int64