# Handling Missing Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
string_data = pd.Series(['aardvark', 'artichoke', np.nan, 'avocado'])

In [3]:
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

check NaN values

In [4]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

dropping na values

In [5]:
string_data.dropna()

0     aardvark
1    artichoke
3      avocado
dtype: object

In [6]:
from numpy import nan as NA
data = pd.DataFrame([[1., 6.5, 3.], [1., NA, NA],
                     [NA, NA, NA], [NA, 6.5, 3.]])

In [7]:
data

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [8]:
data.dropna()

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


you can set any all options
<br> all only drops if all the values are NAN
<br> any drops even if one data point is NA

In [9]:
data.dropna(how="all")

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
3,,6.5,3.0


## Filling In Missing Data

In [10]:
data.fillna(0)

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,6.5,3.0


you can assign fill values differently for each column

In [11]:
data.fillna({1: 0.5, 2: 0})

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,0.5,0.0
2,,0.5,0.0
3,,6.5,3.0


# Removing Duplicates

In [12]:
 data = pd.DataFrame({'k1': ['one', 'two'] * 3 + ['two'],
       ....:                      'k2': [1, 1, 2, 3, 3, 4, 4]})


In [13]:
data

Unnamed: 0,k1,k2
0,one,1
1,two,1
2,one,2
3,two,3
4,one,3
5,two,4
6,two,4


you can check the duplicated rows

In [14]:
data.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

you can change the column to be checked

In [15]:
data.duplicated(subset="k1")

0    False
1    False
2     True
3     True
4     True
5     True
6     True
dtype: bool

to drop duplicates you can use drop_duplicates()

In [16]:
data.drop_duplicates()

Unnamed: 0,k1,k2
0,one,1
1,two,1
2,one,2
3,two,3
4,one,3
5,two,4


In [17]:
data.drop_duplicates(subset="k1")

Unnamed: 0,k1,k2
0,one,1
1,two,1


with keep option you can select which rows to be kept

In [18]:
data.drop_duplicates(subset="k1",keep="last")

Unnamed: 0,k1,k2
4,one,3
6,two,4


# GroupBy Mechanics

split-apply-combine for describing group operation

<img src="attachment:Screen%20Shot%202018-11-19%20at%2013.05.09.png" width="600" height="400">

In [19]:
df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],
'key2' : ['one', 'two', 'one', 'two', 'one'],
'data1' : np.random.randn(5),
'data2' : np.random.randn(5)})

In [20]:
df

Unnamed: 0,data1,data2,key1,key2
0,0.094272,1.68044,a,one
1,-0.55002,-0.100884,a,two
2,0.15416,-0.237727,b,one
3,1.182051,0.687564,b,two
4,0.444474,-1.53655,a,one


Suppose you wanted to compute the mean of the data1 column using the labels from key1.

There are a number of ways to do this. One is to access data1 and call groupby with the column (a Series) at key1:

In [21]:
 grouped = df['data1'].groupby(df['key1'])

In [22]:
grouped.groups

{'a': Int64Index([0, 1, 4], dtype='int64'),
 'b': Int64Index([2, 3], dtype='int64')}

In [23]:
grouped.mean()

key1
a   -0.003758
b    0.668105
Name: data1, dtype: float64

The result index has the name 'key1' because the DataFrame column df['key1'] did.

alternatively:

In [24]:
df.groupby('key1').mean()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.003758,0.014336
b,0.668105,0.224919


that there is no key2 column in the result. Because df['key2'] is not numeric data

if you use more than one column for grouping, resulting dataframe have multiindex format

In [25]:
df

Unnamed: 0,data1,data2,key1,key2
0,0.094272,1.68044,a,one
1,-0.55002,-0.100884,a,two
2,0.15416,-0.237727,b,one
3,1.182051,0.687564,b,two
4,0.444474,-1.53655,a,one


In [26]:
df.groupby(['key1',"key2"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,0.269373,0.071945
a,two,-0.55002,-0.100884
b,one,0.15416,-0.237727
b,two,1.182051,0.687564


# Iterating Over Groups

The GroupBy object supports iteration, generating a sequence of 2-tuples containing the group name along with the chunk of data.

In [27]:
for name, group in df.groupby('key1'): 
    print(name)
    print(group)

a
      data1     data2 key1 key2
0  0.094272  1.680440    a  one
1 -0.550020 -0.100884    a  two
4  0.444474 -1.536550    a  one
b
      data1     data2 key1 key2
2  0.154160 -0.237727    b  one
3  1.182051  0.687564    b  two


In the case of multiple keys, the first element in the tuple will be a tuple of key values:

In [28]:
 for (k1, k2), group in df.groupby(['key1', 'key2']):
    print((k1, k2))
    print(group)

('a', 'one')
      data1    data2 key1 key2
0  0.094272  1.68044    a  one
4  0.444474 -1.53655    a  one
('a', 'two')
     data1     data2 key1 key2
1 -0.55002 -0.100884    a  two
('b', 'one')
     data1     data2 key1 key2
2  0.15416 -0.237727    b  one
('b', 'two')
      data1     data2 key1 key2
3  1.182051  0.687564    b  two


## Selecting a Column or Subset of Columns

you can select a column or a subset of columns

In [29]:
df.groupby('key1')['data1']

<pandas.core.groupby.SeriesGroupBy object at 0x112fba7b8>

In [30]:
df.groupby('key1')[["data1"]]

<pandas.core.groupby.DataFrameGroupBy object at 0x112fe70f0>

depending on the selection it can return dataframe or series, even though we selected one series!!

In [31]:
df.groupby('key1')[["data1"]].mean()

Unnamed: 0_level_0,data1
key1,Unnamed: 1_level_1
a,-0.003758
b,0.668105


In [32]:
df.groupby('key1')['data1'].mean()

key1
a   -0.003758
b    0.668105
Name: data1, dtype: float64

# Data Aggregation

Aggregations refer to any data transformation that produces scalar values from arrays

Most commonly used operations optimzed in pandas

<img src="attachment:Screen%20Shot%202018-11-19%20at%2016.32.17.png" width="600" height="400" >

To use your own aggregation functions, pass any function that aggregates an array to the aggregate or agg method:

In [33]:
def peak_to_peak(arr):
    return arr.max() - arr.min()

In [34]:
df.groupby('key1').agg(peak_to_peak)

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.994494,3.216991
b,1.027891,0.925291


# Example: TIPS data

In [39]:
tips = pd.read_csv('tips.csv')

In [41]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [42]:
#calculate the tip percentage
tips['tip_pct'] = tips['tip'] / tips['total_bill']

In [43]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.5,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.13978
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808


In [44]:
tips.groupby(["day","smoker"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,size,tip_pct
day,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Fri,No,18.42,2.8125,2.25,0.15165
Fri,Yes,16.813333,2.714,2.066667,0.174783
Sat,No,19.661778,3.102889,2.555556,0.158048
Sat,Yes,21.276667,2.875476,2.47619,0.147906
Sun,No,20.506667,3.167895,2.929825,0.160113
Sun,Yes,24.12,3.516842,2.578947,0.18725
Thur,No,17.113111,2.673778,2.488889,0.160298
Thur,Yes,19.190588,3.03,2.352941,0.163863


## Applying more than one function 

In [45]:
tips.groupby(["day","smoker"]).agg([np.mean,np.std])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,tip,tip,size,size,tip_pct,tip_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
day,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Fri,No,18.42,5.059282,2.8125,0.898494,2.25,0.5,0.15165,0.028123
Fri,Yes,16.813333,9.086388,2.714,1.077668,2.066667,0.593617,0.174783,0.051293
Sat,No,19.661778,8.939181,3.102889,1.642088,2.555556,0.78496,0.158048,0.039767
Sat,Yes,21.276667,10.069138,2.875476,1.63058,2.47619,0.862161,0.147906,0.061375
Sun,No,20.506667,8.130189,3.167895,1.224785,2.929825,1.032674,0.160113,0.042347
Sun,Yes,24.12,10.442511,3.516842,1.261151,2.578947,0.901591,0.18725,0.154134
Thur,No,17.113111,7.721728,2.673778,1.282964,2.488889,1.179796,0.160298,0.038774
Thur,Yes,19.190588,8.355149,3.03,1.113491,2.352941,0.701888,0.163863,0.039389


In [46]:
tips.groupby(["day","smoker"]).agg(["mean","std",peak_to_peak])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,peak_to_peak,mean,std,peak_to_peak,mean,std,peak_to_peak,mean,std,peak_to_peak
day,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Fri,No,18.42,5.059282,10.29,2.8125,0.898494,2.0,2.25,0.5,1,0.15165,0.028123,0.067349
Fri,Yes,16.813333,9.086388,34.42,2.714,1.077668,3.73,2.066667,0.593617,3,0.174783,0.051293,0.159925
Sat,No,19.661778,8.939181,41.08,3.102889,1.642088,8.0,2.555556,0.78496,3,0.158048,0.039767,0.235193
Sat,Yes,21.276667,10.069138,47.74,2.875476,1.63058,9.0,2.47619,0.862161,4,0.147906,0.061375,0.290095
Sun,No,20.506667,8.130189,39.4,3.167895,1.224785,4.99,2.929825,1.032674,4,0.160113,0.042347,0.193226
Sun,Yes,24.12,10.442511,38.1,3.516842,1.261151,5.0,2.578947,0.901591,3,0.18725,0.154134,0.644685
Thur,No,17.113111,7.721728,33.68,2.673778,1.282964,5.45,2.488889,1.179796,5,0.160298,0.038774,0.19335
Thur,Yes,19.190588,8.355149,32.77,3.03,1.113491,3.0,2.352941,0.701888,2,0.163863,0.039389,0.15124


## applying diferent functions to different columns

In [47]:
tips.groupby(["day","smoker"]).agg({'tip' : np.max, 'size' : 'sum'})

Unnamed: 0_level_0,Unnamed: 1_level_0,tip,size
day,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Fri,No,3.5,9
Fri,Yes,4.73,31
Sat,No,9.0,115
Sat,Yes,10.0,104
Sun,No,6.0,167
Sun,Yes,6.5,49
Thur,No,6.7,112
Thur,Yes,5.0,40


In [48]:
tips.groupby(["day","smoker"]).agg({'tip_pct' : ['min', 'max', 'mean', 'std'], 'size' : 'sum'})

Unnamed: 0_level_0,Unnamed: 1_level_0,tip_pct,tip_pct,tip_pct,tip_pct,size
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean,std,sum
day,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Fri,No,0.120385,0.187735,0.15165,0.028123,9
Fri,Yes,0.103555,0.26348,0.174783,0.051293,31
Sat,No,0.056797,0.29199,0.158048,0.039767,115
Sat,Yes,0.035638,0.325733,0.147906,0.061375,104
Sun,No,0.059447,0.252672,0.160113,0.042347,167
Sun,Yes,0.06566,0.710345,0.18725,0.154134,49
Thur,No,0.072961,0.266312,0.160298,0.038774,112
Thur,Yes,0.090014,0.241255,0.163863,0.039389,40


# Returning Aggregated Data Without Row Indexes

In [49]:
tips.groupby(["day","smoker"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,size,tip_pct
day,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Fri,No,18.42,2.8125,2.25,0.15165
Fri,Yes,16.813333,2.714,2.066667,0.174783
Sat,No,19.661778,3.102889,2.555556,0.158048
Sat,Yes,21.276667,2.875476,2.47619,0.147906
Sun,No,20.506667,3.167895,2.929825,0.160113
Sun,Yes,24.12,3.516842,2.578947,0.18725
Thur,No,17.113111,2.673778,2.488889,0.160298
Thur,Yes,19.190588,3.03,2.352941,0.163863


In [50]:
tips.groupby(["day","smoker"],as_index=False).mean()

Unnamed: 0,day,smoker,total_bill,tip,size,tip_pct
0,Fri,No,18.42,2.8125,2.25,0.15165
1,Fri,Yes,16.813333,2.714,2.066667,0.174783
2,Sat,No,19.661778,3.102889,2.555556,0.158048
3,Sat,Yes,21.276667,2.875476,2.47619,0.147906
4,Sun,No,20.506667,3.167895,2.929825,0.160113
5,Sun,Yes,24.12,3.516842,2.578947,0.18725
6,Thur,No,17.113111,2.673778,2.488889,0.160298
7,Thur,Yes,19.190588,3.03,2.352941,0.163863


Of course, it’s always possible to obtain the result in this format by calling reset_index on the result. 
<br> Using the as_index=False method avoids some unneces‐ sary computations.

# using apply function with groupby 

In [53]:
#this function sorts a dataframe and return the top n elements
def top(df, n=5, column='tip_pct'):
    return df.sort_values(by=column)[-n:]

In [52]:
top(tips, n=6)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525
183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535
232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667
172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345


In [54]:
tips.groupby('smoker').apply(top)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,sex,smoker,day,time,size,tip_pct
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
No,88,24.71,5.85,Male,No,Thur,Lunch,2,0.236746
No,185,20.69,5.0,Male,No,Sun,Dinner,5,0.241663
No,51,10.29,2.6,Female,No,Sun,Dinner,2,0.252672
No,149,7.51,2.0,Male,No,Thur,Lunch,2,0.266312
No,232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199
Yes,109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525
Yes,183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535
Yes,67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733
Yes,178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667
Yes,172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345


The top function is called on each row group from the DataFrame, and then the results are glued together using pandas.concat, labeling the pieces with the group names.

If you pass a function to apply that takes other arguments or keywords, you can pass these after the function:

In [55]:
tips.groupby(['smoker', 'day']).apply(top, n=1, column='total_bill')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_bill,tip,sex,smoker,day,time,size,tip_pct
smoker,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
No,Fri,94,22.75,3.25,Female,No,Fri,Dinner,2,0.142857
No,Sat,212,48.33,9.0,Male,No,Sat,Dinner,4,0.18622
No,Sun,156,48.17,5.0,Male,No,Sun,Dinner,6,0.103799
No,Thur,142,41.19,5.0,Male,No,Thur,Lunch,5,0.121389
Yes,Fri,95,40.17,4.73,Male,Yes,Fri,Dinner,4,0.11775
Yes,Sat,170,50.81,10.0,Male,Yes,Sat,Dinner,3,0.196812
Yes,Sun,182,45.35,3.5,Male,Yes,Sun,Dinner,3,0.077178
Yes,Thur,197,43.11,5.0,Female,Yes,Thur,Lunch,4,0.115982


## Suppressing the Group Keys

In [56]:
tips.groupby('smoker', group_keys=False).apply(top)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
88,24.71,5.85,Male,No,Thur,Lunch,2,0.236746
185,20.69,5.0,Male,No,Sun,Dinner,5,0.241663
51,10.29,2.6,Female,No,Sun,Dinner,2,0.252672
149,7.51,2.0,Male,No,Thur,Lunch,2,0.266312
232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199
109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525
183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667
172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345


# Pivot Tables 

A pivot table is a data summarization tool frequently found in spreadsheet programs and other data analysis software. It aggregates a table of data by one or more keys, arranging the data in a rectangle with some of the group keys along the rows and some along the columns. Pivot tables in Python with pandas are made possible through the groupby facility described in this chapter combined with reshape operations utilizing hierarchical indexing.

In [58]:
 tips.pivot_table(index=['day', 'smoker'])

Unnamed: 0_level_0,Unnamed: 1_level_0,size,tip,tip_pct,total_bill
day,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Fri,No,2.25,2.8125,0.15165,18.42
Fri,Yes,2.066667,2.714,0.174783,16.813333
Sat,No,2.555556,3.102889,0.158048,19.661778
Sat,Yes,2.47619,2.875476,0.147906,21.276667
Sun,No,2.929825,3.167895,0.160113,20.506667
Sun,Yes,2.578947,3.516842,0.18725,24.12
Thur,No,2.488889,2.673778,0.160298,17.113111
Thur,Yes,2.352941,3.03,0.163863,19.190588


In [59]:
tips.pivot_table(['tip_pct', 'size'], index=['time', 'day'],columns='smoker')

Unnamed: 0_level_0,Unnamed: 1_level_0,size,size,tip_pct,tip_pct
Unnamed: 0_level_1,smoker,No,Yes,No,Yes
time,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Dinner,Fri,2.0,2.222222,0.139622,0.165347
Dinner,Sat,2.555556,2.47619,0.158048,0.147906
Dinner,Sun,2.929825,2.578947,0.160113,0.18725
Dinner,Thur,2.0,,0.159744,
Lunch,Fri,3.0,1.833333,0.187735,0.188937
Lunch,Thur,2.5,2.352941,0.160311,0.163863


In [60]:
tips.pivot_table(['tip_pct', 'size'], index=['time', 'day'], columns='smoker', margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,smoker,No,Yes,All,No,Yes,All
time,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Dinner,Fri,2.0,2.222222,2.166667,0.139622,0.165347,0.158916
Dinner,Sat,2.555556,2.47619,2.517241,0.158048,0.147906,0.153152
Dinner,Sun,2.929825,2.578947,2.842105,0.160113,0.18725,0.166897
Dinner,Thur,2.0,,2.0,0.159744,,0.159744
Lunch,Fri,3.0,1.833333,2.0,0.187735,0.188937,0.188765
Lunch,Thur,2.5,2.352941,2.459016,0.160311,0.163863,0.161301
All,,2.668874,2.408602,2.569672,0.159328,0.163196,0.160803


In [61]:
tips.pivot_table('tip_pct', index=['time', 'size', 'smoker'], columns='day', aggfunc='mean', fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,day,Fri,Sat,Sun,Thur
time,size,smoker,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dinner,1,No,0.0,0.137931,0.0,0.0
Dinner,1,Yes,0.0,0.325733,0.0,0.0
Dinner,2,No,0.139622,0.162705,0.168859,0.159744
Dinner,2,Yes,0.171297,0.148668,0.207893,0.0
Dinner,3,No,0.0,0.154661,0.152663,0.0
Dinner,3,Yes,0.0,0.144995,0.15266,0.0
Dinner,4,No,0.0,0.150096,0.148143,0.0
Dinner,4,Yes,0.11775,0.124515,0.19337,0.0
Dinner,5,No,0.0,0.0,0.206928,0.0
Dinner,5,Yes,0.0,0.106572,0.06566,0.0
