Comandos equivalentes de SQL en Pandas y R dplyr
===

* *60 min* | Última modificación: Julio 04, 2019

Adaptado de: https://pandas.pydata.org/pandas-docs/stable/getting_started/comparison/comparison_with_sql.html

## Preparación

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext rpy2.ipython

## Carga de datos

In [2]:
df = pd.read_csv(
    "https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv",
    sep = ',',         
    thousands = None,  
    decimal = '.')

In [3]:
%%R
library(dplyr)

R[write to console]: 
Attaching package: ‘dplyr’


R[write to console]: The following objects are masked from ‘package:stats’:

    filter, lag


R[write to console]: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Select

In [4]:
##
##  SELECT 
##      total_bill, 
##      tip, 
##      smoker, 
##      time
##  FROM 
##      df
##  LIMIT 5;
##
df[['total_bill', 'tip', 'smoker', 'time']].head(5)

Unnamed: 0,total_bill,tip,smoker,time
0,16.99,1.01,No,Dinner
1,10.34,1.66,No,Dinner
2,21.01,3.5,No,Dinner
3,23.68,3.31,No,Dinner
4,24.59,3.61,No,Dinner


In [5]:
%%R -i df
df %>% select(total_bill, tip , smoker, time) %>% head(5)

  total_bill  tip smoker   time
0      16.99 1.01     No Dinner
1      10.34 1.66     No Dinner
2      21.01 3.50     No Dinner
3      23.68 3.31     No Dinner
4      24.59 3.61     No Dinner


## Where

In [6]:
##
##  SELECT 
##      *
##  FROM 
##      df
##  WHERE 
##      time = 'Dinner'
##  LIMIT 5;
##
df[df['time'] == 'Dinner'].head(5)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [7]:
%%R
df %>% filter(time == 'Dinner') %>% head(5)

  total_bill  tip    sex smoker day   time size
1      16.99 1.01 Female     No Sun Dinner    2
2      10.34 1.66   Male     No Sun Dinner    3
3      21.01 3.50   Male     No Sun Dinner    3
4      23.68 3.31   Male     No Sun Dinner    2
5      24.59 3.61 Female     No Sun Dinner    4


In [8]:
is_dinner = df['time'] == 'Dinner'
is_dinner.value_counts()

True     176
False     68
Name: time, dtype: int64

In [9]:
df[is_dinner].head(5)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [10]:
##
##  SELECT 
##      *
##  FROM 
##      df
##  WHERE 
##      time = 'Dinner' AND tip > 5.00;
##
df[(df['time'] == 'Dinner') & (df['tip'] > 5.00)]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
23,39.42,7.58,Male,No,Sat,Dinner,4
44,30.4,5.6,Male,No,Sun,Dinner,4
47,32.4,6.0,Male,No,Sun,Dinner,4
52,34.81,5.2,Female,No,Sun,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
116,29.93,5.07,Male,No,Sun,Dinner,4
155,29.85,5.14,Female,No,Sun,Dinner,5
170,50.81,10.0,Male,Yes,Sat,Dinner,3
172,7.25,5.15,Male,Yes,Sun,Dinner,2
181,23.33,5.65,Male,Yes,Sun,Dinner,2


In [11]:
%%R 
df %>% filter(time == 'Dinner', tip > 5.00 )

   total_bill   tip    sex smoker day   time size
1       39.42  7.58   Male     No Sat Dinner    4
2       30.40  5.60   Male     No Sun Dinner    4
3       32.40  6.00   Male     No Sun Dinner    4
4       34.81  5.20 Female     No Sun Dinner    4
5       48.27  6.73   Male     No Sat Dinner    4
6       29.93  5.07   Male     No Sun Dinner    4
7       29.85  5.14 Female     No Sun Dinner    5
8       50.81 10.00   Male    Yes Sat Dinner    3
9        7.25  5.15   Male    Yes Sun Dinner    2
10      23.33  5.65   Male    Yes Sun Dinner    2
11      23.17  6.50   Male    Yes Sun Dinner    4
12      25.89  5.16   Male    Yes Sat Dinner    4
13      48.33  9.00   Male     No Sat Dinner    4
14      28.17  6.50 Female    Yes Sat Dinner    3
15      29.03  5.92   Male     No Sat Dinner    3


In [12]:
%%R 
df %>% filter(time == 'Dinner' & tip > 5.00 )

   total_bill   tip    sex smoker day   time size
1       39.42  7.58   Male     No Sat Dinner    4
2       30.40  5.60   Male     No Sun Dinner    4
3       32.40  6.00   Male     No Sun Dinner    4
4       34.81  5.20 Female     No Sun Dinner    4
5       48.27  6.73   Male     No Sat Dinner    4
6       29.93  5.07   Male     No Sun Dinner    4
7       29.85  5.14 Female     No Sun Dinner    5
8       50.81 10.00   Male    Yes Sat Dinner    3
9        7.25  5.15   Male    Yes Sun Dinner    2
10      23.33  5.65   Male    Yes Sun Dinner    2
11      23.17  6.50   Male    Yes Sun Dinner    4
12      25.89  5.16   Male    Yes Sat Dinner    4
13      48.33  9.00   Male     No Sat Dinner    4
14      28.17  6.50 Female    Yes Sat Dinner    3
15      29.03  5.92   Male     No Sat Dinner    3


In [13]:
##
##  SELECT 
##      *
##  FROM
##      df
##  WHERE
##      size >= 5 OR total_bill > 45;
##
df[(df['size'] >= 5) | (df['total_bill'] > 45)]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
59,48.27,6.73,Male,No,Sat,Dinner,4
125,29.8,4.2,Female,No,Thur,Lunch,6
141,34.3,6.7,Male,No,Thur,Lunch,6
142,41.19,5.0,Male,No,Thur,Lunch,5
143,27.05,5.0,Female,No,Thur,Lunch,6
155,29.85,5.14,Female,No,Sun,Dinner,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3
182,45.35,3.5,Male,Yes,Sun,Dinner,3
185,20.69,5.0,Male,No,Sun,Dinner,5


In [14]:
%%R
df %>% filter(size >= 5 | total_bill > 45)

   total_bill   tip    sex smoker  day   time size
1       48.27  6.73   Male     No  Sat Dinner    4
2       29.80  4.20 Female     No Thur  Lunch    6
3       34.30  6.70   Male     No Thur  Lunch    6
4       41.19  5.00   Male     No Thur  Lunch    5
5       27.05  5.00 Female     No Thur  Lunch    6
6       29.85  5.14 Female     No  Sun Dinner    5
7       48.17  5.00   Male     No  Sun Dinner    6
8       50.81 10.00   Male    Yes  Sat Dinner    3
9       45.35  3.50   Male    Yes  Sun Dinner    3
10      20.69  5.00   Male     No  Sun Dinner    5
11      30.46  2.00   Male    Yes  Sun Dinner    5
12      48.33  9.00   Male     No  Sat Dinner    4
13      28.15  3.00   Male    Yes  Sat Dinner    5


## Group by

In [15]:
##
##  SELECT 
##      sex, 
##      count(*)
##  FROM 
##      df
##  GROUP BY 
##      sex;
##
df.groupby('sex').size()

sex
Female     87
Male      157
dtype: int64

In [16]:
%%R
df %>% group_by(sex) %>% summarize(n())

[90m# A tibble: 2 x 2[39m
  sex    `n()`
  [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m
[90m1[39m Female    87
[90m2[39m Male     157


In [17]:
df.groupby('sex').count()

Unnamed: 0_level_0,total_bill,tip,smoker,day,time,size
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,87,87,87,87,87,87
Male,157,157,157,157,157,157


In [18]:
df.groupby('sex')['total_bill'].count()

sex
Female     87
Male      157
Name: total_bill, dtype: int64

In [19]:
##
##  SELECT 
##      day, 
##      AVG(tip), 
##      COUNT(*)
##  FROM 
##      df
##  GROUP BY 
##      day;
##
df.groupby('day').agg({'tip': np.mean, 'day': np.size})

Unnamed: 0_level_0,tip,day
day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,2.734737,19
Sat,2.993103,87
Sun,3.255132,76
Thur,2.771452,62


In [20]:
%%R
df %>% group_by(day) %>% summarize(mean_tip=mean(tip), n_day=n())

[90m# A tibble: 4 x 3[39m
  day   mean_tip n_day
  [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<int>[39m[23m
[90m1[39m Fri       2.73    19
[90m2[39m Sat       2.99    87
[90m3[39m Sun       3.26    76
[90m4[39m Thur      2.77    62


In [21]:
##
##  SELECT 
##      smoker, 
##      day, 
##      COUNT(*), 
##      AVG(tip)
##  FROM 
##      df
##  GROUP BY 
##      smoker, 
##      day;
##
df.groupby(['smoker', 'day']).agg({'tip': [np.size, np.mean]})

Unnamed: 0_level_0,Unnamed: 1_level_0,tip,tip
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean
smoker,day,Unnamed: 2_level_2,Unnamed: 3_level_2
No,Fri,4.0,2.8125
No,Sat,45.0,3.102889
No,Sun,57.0,3.167895
No,Thur,45.0,2.673778
Yes,Fri,15.0,2.714
Yes,Sat,42.0,2.875476
Yes,Sun,19.0,3.516842
Yes,Thur,17.0,3.03


In [22]:
%%R
df %>% group_by(smoker, day) %>% summarize(mean(tip), n())

[90m# A tibble: 8 x 4[39m
[90m# Groups:   smoker [2][39m
  smoker day   `mean(tip)` `n()`
  [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m       [3m[90m<dbl>[39m[23m [3m[90m<int>[39m[23m
[90m1[39m No     Fri          2.81     4
[90m2[39m No     Sat          3.10    45
[90m3[39m No     Sun          3.17    57
[90m4[39m No     Thur         2.67    45
[90m5[39m Yes    Fri          2.71    15
[90m6[39m Yes    Sat          2.88    42
[90m7[39m Yes    Sun          3.52    19
[90m8[39m Yes    Thur         3.03    17


In [23]:
##
## Funciones que pueden ser aplicadas a un DataFrame:
##
##    abs        all       any       clip    clip_lower  clip_upper
##    corr       corrwith  count     cov     cummax      cummin
##    cumprod    cumsum    describe  diff    eval        kurt
##    mad        max       mean      median  min         mode
##    ct_change  prod      quantile  rank    round       sem
##    skew       sum       std       var
##

## Join

In [24]:
df1 = pd.DataFrame({
    'key': ['A', 'B', 'C', 'D'],
    'value': np.random.randn(4)})
 
df2 = pd.DataFrame({
    'key': ['B', 'D', 'D', 'E'],
    'value': np.random.randn(4)})

### Inner join

In [25]:
## 
##  SELECT 
##      *
##  FROM 
##      df1
##  INNER JOIN 
##      df2
##    ON df1.key = df2.key;
##
pd.merge(df1, df2, on='key')

Unnamed: 0,key,value_x,value_y
0,B,1.350272,0.347365
1,D,0.26338,0.814754
2,D,0.26338,-0.460513


### Left outer join

In [26]:
##
##  SELECT 
##      *
##  FROM 
##      df1
##  LEFT OUTER JOIN 
##       df2
##    ON 
##       df1.key = df2.key;
##
pd.merge(df1, df2, on='key', how='left')

Unnamed: 0,key,value_x,value_y
0,A,1.160352,
1,B,1.350272,0.347365
2,C,-0.088324,
3,D,0.26338,0.814754
4,D,0.26338,-0.460513


### Right join

In [27]:
##
##  SELECT 
##      *
##  FROM 
##      df1
##  RIGHT OUTER JOIN 
##      df2
##  ON 
##      df1.key = df2.key;
##
pd.merge(df1, df2, on='key', how='right')

Unnamed: 0,key,value_x,value_y
0,B,1.350272,0.347365
1,D,0.26338,0.814754
2,D,0.26338,-0.460513
3,E,,-0.175408


### Full join

In [28]:
##
##  SELECT 
##      *
##  FROM 
##      df1
##  FULL OUTER JOIN 
##       df2
##    ON 
##       df1.key = df2.key;
##
pd.merge(df1, df2, on='key', how='outer')

Unnamed: 0,key,value_x,value_y
0,A,1.160352,
1,B,1.350272,0.347365
2,C,-0.088324,
3,D,0.26338,0.814754
4,D,0.26338,-0.460513
5,E,,-0.175408


## Union

In [29]:
df1 = pd.DataFrame({
    'city': ['Chicago', 'San Francisco', 'New York City'],
    'rank': range(1, 4)})

df2 = pd.DataFrame({
    'city': ['Chicago', 'Boston', 'Los Angeles'],
    'rank': [1, 4, 5]})

In [30]:
##
##  SELECT city, rank
##  FROM df1
##  UNION ALL
##  SELECT city, rank
##  FROM df2;
##
pd.concat([df1, df2])

Unnamed: 0,city,rank
0,Chicago,1
1,San Francisco,2
2,New York City,3
0,Chicago,1
1,Boston,4
2,Los Angeles,5


In [31]:
##
##  SELECT 
##      city, 
##     rank
##  FROM 
##      df1
##  UNION
##      SELECT 
##          city, 
##          rank
##      FROM 
##          df2
##
pd.concat([df1, df2]).drop_duplicates()

Unnamed: 0,city,rank
0,Chicago,1
1,San Francisco,2
2,New York City,3
1,Boston,4
2,Los Angeles,5


## Funciones analíticas y de agregación

### Top N rows with offset

In [32]:
##
##  SELECT 
##      * 
##  FROM 
##      df
##  ORDER BY 
##      tip DESC
##  LIMIT 10 OFFSET 5;
##
df.nlargest(10 + 5, columns='tip').tail(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
183,23.17,6.5,Male,Yes,Sun,Dinner,4
214,28.17,6.5,Female,Yes,Sat,Dinner,3
47,32.4,6.0,Male,No,Sun,Dinner,4
239,29.03,5.92,Male,No,Sat,Dinner,3
88,24.71,5.85,Male,No,Thur,Lunch,2
181,23.33,5.65,Male,Yes,Sun,Dinner,2
44,30.4,5.6,Male,No,Sun,Dinner,4
52,34.81,5.2,Female,No,Sun,Dinner,4
85,34.83,5.17,Female,No,Thur,Lunch,4
211,25.89,5.16,Male,Yes,Sat,Dinner,4


In [33]:
%%R
df %>% arrange(desc(tip)) %>% head(15) %>% tail(10)

   total_bill  tip    sex smoker  day   time size
6       23.17 6.50   Male    Yes  Sun Dinner    4
7       28.17 6.50 Female    Yes  Sat Dinner    3
8       32.40 6.00   Male     No  Sun Dinner    4
9       29.03 5.92   Male     No  Sat Dinner    3
10      24.71 5.85   Male     No Thur  Lunch    2
11      23.33 5.65   Male    Yes  Sun Dinner    2
12      30.40 5.60   Male     No  Sun Dinner    4
13      34.81 5.20 Female     No  Sun Dinner    4
14      34.83 5.17 Female     No Thur  Lunch    4
15      25.89 5.16   Male    Yes  Sat Dinner    4


### Top N rows per group

In [34]:
##
##  SELECT * FROM (
##    SELECT
##      t.*,
##      ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
##    FROM df t
##  )
##  WHERE rn < 3
##  ORDER BY day, rn;
##  
(df.assign(rn=df.sort_values(['total_bill'], ascending=False)
             .groupby(['day'])
             .cumcount() + 1)
 .query('rn < 3')
 .sort_values(['day', 'rn']))

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,rn
95,40.17,4.73,Male,Yes,Fri,Dinner,4,1
90,28.97,3.0,Male,Yes,Fri,Dinner,2,2
170,50.81,10.0,Male,Yes,Sat,Dinner,3,1
212,48.33,9.0,Male,No,Sat,Dinner,4,2
156,48.17,5.0,Male,No,Sun,Dinner,6,1
182,45.35,3.5,Male,Yes,Sun,Dinner,3,2
197,43.11,5.0,Female,Yes,Thur,Lunch,4,1
142,41.19,5.0,Male,No,Thur,Lunch,5,2


In [35]:
%%R
df  %>% group_by(day) %>% arrange(desc(total_bill)) %>% mutate(rn=1:n()) %>% filter(rn < 3) %>% arrange(day, rn)

[90m# A tibble: 8 x 8[39m
[90m# Groups:   day [4][39m
  total_bill   tip sex    smoker day   time    size    rn
       [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m
[90m1[39m       40.2  4.73 Male   Yes    Fri   Dinner     4     1
[90m2[39m       29.0  3    Male   Yes    Fri   Dinner     2     2
[90m3[39m       50.8 10    Male   Yes    Sat   Dinner     3     1
[90m4[39m       48.3  9    Male   No     Sat   Dinner     4     2
[90m5[39m       48.2  5    Male   No     Sun   Dinner     6     1
[90m6[39m       45.4  3.5  Male   Yes    Sun   Dinner     3     2
[90m7[39m       43.1  5    Female Yes    Thur  Lunch      4     1
[90m8[39m       41.2  5    Male   No     Thur  Lunch      5     2


In [36]:
(df.assign(rnk=df.groupby(['day'])['total_bill']
             .rank(method='first', ascending=False))
 .query('rnk < 3')
 .sort_values(['day', 'rnk']))

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,rnk
95,40.17,4.73,Male,Yes,Fri,Dinner,4,1.0
90,28.97,3.0,Male,Yes,Fri,Dinner,2,2.0
170,50.81,10.0,Male,Yes,Sat,Dinner,3,1.0
212,48.33,9.0,Male,No,Sat,Dinner,4,2.0
156,48.17,5.0,Male,No,Sun,Dinner,6,1.0
182,45.35,3.5,Male,Yes,Sun,Dinner,3,2.0
197,43.11,5.0,Female,Yes,Thur,Lunch,4,1.0
142,41.19,5.0,Male,No,Thur,Lunch,5,2.0


In [37]:
## 
##  SELECT * FROM (
##    SELECT
##      t.*,
##      RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk
##    FROM df t
##    WHERE tip < 2
##  )
##  WHERE rnk < 3
##  ORDER BY sex, rnk;
##
(df[df['tip'] < 2]
 .assign(rnk_min=df.groupby(['sex'])['tip']
         .rank(method='min'))
 .query('rnk_min < 3')
 .sort_values(['sex', 'rnk_min']))

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,rnk_min
67,3.07,1.0,Female,Yes,Sat,Dinner,1,1.0
92,5.75,1.0,Female,Yes,Fri,Dinner,2,1.0
111,7.25,1.0,Female,No,Sat,Dinner,1,1.0
236,12.6,1.0,Male,Yes,Sat,Dinner,2,1.0
237,32.83,1.17,Male,Yes,Sat,Dinner,2,2.0


In [38]:
%%R 
df %>% filter(tip < 2) %>% group_by(sex) %>% arrange(tip) %>% mutate(rnk_min=1:n()) %>% filter(rnk_min < 3) %>% arrange(sex, rnk_min)

[90m# A tibble: 4 x 8[39m
[90m# Groups:   sex [2][39m
  total_bill   tip sex    smoker day   time    size rnk_min
       [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m [3m[90m<chr>[39m[23m  [3m[90m<int>[39m[23m   [3m[90m<int>[39m[23m
[90m1[39m       3.07  1    Female Yes    Sat   Dinner     1       1
[90m2[39m       5.75  1    Female Yes    Fri   Dinner     2       2
[90m3[39m      12.6   1    Male   Yes    Sat   Dinner     2       1
[90m4[39m      32.8   1.17 Male   Yes    Sat   Dinner     2       2


## Update

In [39]:
##
##  UPDATE 
##      df
##  SET 
##      tip = tip*2
##  WHERE 
##      tip < 2;
##
df.loc[df['tip'] < 2, 'tip'] *= 2
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,2.02,Female,No,Sun,Dinner,2
1,10.34,3.32,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [40]:
%%R
df %>% filter(tip < 2) %>% mutate(tip = 2*tip) %>% head(5)

  total_bill  tip    sex smoker day   time size
1      16.99 2.02 Female     No Sun Dinner    2
2      10.34 3.32   Male     No Sun Dinner    3
3      15.04 3.92   Male     No Sun Dinner    2
4      10.27 3.42   Male     No Sun Dinner    2
5      15.42 3.14   Male     No Sun Dinner    2


## Delete

In [41]:
## 
##  DELETE FROM 
##      tips
##  WHERE 
##      tip > 9;
##
df.loc[df['tip'] <= 9].head(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,2.02,Female,No,Sun,Dinner,2
1,10.34,3.32,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,3.92,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [42]:
%%R
df %>% filter(!(tip > 9)) %>% head(10)

   total_bill  tip    sex smoker day   time size
1       16.99 1.01 Female     No Sun Dinner    2
2       10.34 1.66   Male     No Sun Dinner    3
3       21.01 3.50   Male     No Sun Dinner    3
4       23.68 3.31   Male     No Sun Dinner    2
5       24.59 3.61 Female     No Sun Dinner    4
6       25.29 4.71   Male     No Sun Dinner    4
7        8.77 2.00   Male     No Sun Dinner    2
8       26.88 3.12   Male     No Sun Dinner    4
9       15.04 1.96   Male     No Sun Dinner    2
10      14.78 3.23   Male     No Sun Dinner    2
