In [36]:
import pandas as pd
import numpy as np

In [37]:
data = pd.read_csv('dataNEW.csv')
data.head()

Unnamed: 0,subject,date,day,time,logged_food,amount,unit,calorie,total_carb,sugar,protein,time_of_day,gender,datetime,hour,class,simplified_food
0,1.0,2020-02-13,1,18:00,Berry Smoothie,20.0,fluid ounce,456.0,85.0,83.0,16.0,Night,Female,2020-02-13 18:00:00,18,Beverage,smoothie
1,1.0,2020-02-14,2,07:10,Natrel Lactose Free 2 Percent,8.0,fluid ounce,120.0,9.0,8.0,12.0,Early Morning,Female,2020-02-14 07:10:00,7,Beverage,milk
2,1.0,2020-02-14,2,07:10,Standard Breakfast,0.75,cup,110.0,26.0,10.0,1.0,Early Morning,Female,2020-02-14 07:10:00,7,Meal,standard breakfast
3,1.0,2020-02-14,2,09:38,Breakfast Trail Mix,0.5,cup,280.0,30.0,22.0,4.0,Morning,Female,2020-02-14 09:38:00,9,Snack,standard breakfast
4,1.0,2020-02-14,2,12:38,Spinach Salad W/ Strawberries And Cheese,200.0,grams,286.0,14.0,8.5,7.6,Noon,Female,2020-02-14 12:38:00,12,Meal,salad


In [38]:
data.isna().any()

subject            False
date               False
day                False
time               False
logged_food        False
amount              True
unit                True
calorie            False
total_carb         False
sugar              False
protein             True
time_of_day        False
gender             False
datetime           False
hour               False
class              False
simplified_food    False
dtype: bool

In [39]:
from collections import Counter

def mode(x):
    counts = Counter(x)
    most_common = counts.most_common(1)  # Get the most common element(s)
    if most_common:
        return most_common[0][0]  # Return the most common element
    else:
        return None  # Handle the case where the list is empty

pop_foods = data.groupby(['day','hour', 'class']).agg({
    'simplified_food': mode  # Get the most popular logged food item
}).reset_index()
pop_foods.head()

Unnamed: 0,day,hour,class,simplified_food
0,1,6,Beverage,coffee
1,1,6,Meal,seafood
2,1,8,Beverage,milk
3,1,8,Meal,cereal
4,1,8,Snack,snack bar


In [40]:
nutrition_stats = (
    data.groupby('simplified_food')[['total_carb', 'sugar', 'calorie']]
    .mean()
    .reset_index()
    .rename(columns={'total_carb': 'avg carb', 'sugar': 'avg sugar', 'calorie': 'avg calorie'})
)

In [43]:
avg_foods = pop_foods.merge(nutrition_stats, how='left', on='simplified_food')
avg_foods

Unnamed: 0,day,hour,class,simplified_food,avg carb,avg sugar,avg calorie
0,1,6,Beverage,coffee,5.415267,2.257252,42.806107
1,1,6,Meal,seafood,11.554545,2.763636,258.472727
2,1,8,Beverage,milk,8.387778,7.595556,95.266667
3,1,8,Meal,cereal,44.131915,14.597872,192.176596
4,1,8,Snack,snack bar,37.414286,18.350000,230.071429
...,...,...,...,...,...,...,...
383,10,17,Snack,nuts,6.277419,1.467742,147.945161
384,10,19,Meal,desserts,46.802273,23.536364,332.002273
385,10,19,Snack,chips,56.095238,2.209524,430.714286
386,10,20,Beverage,soda,20.493651,20.230159,76.309524


In [44]:
avg_foods.to_csv('avg_foods.csv')