# Grouping

In [77]:
import pandas as pd

In [78]:
drinks = pd.read_csv('./data/drinks.csv')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF


In [79]:
# Inspecting data to see if there are any missing values.  (Yes, there are)
drinks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   country                       193 non-null    object 
 1   beer_servings                 193 non-null    int64  
 2   spirit_servings               193 non-null    int64  
 3   wine_servings                 193 non-null    int64  
 4   total_litres_of_pure_alcohol  193 non-null    float64
 5   continent                     170 non-null    object 
dtypes: float64(1), int64(3), object(2)
memory usage: 9.2+ KB


In [80]:
# Inspect the missing rows
filt = drinks.isna().any(axis=1)
drinks[filt]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
5,Antigua & Barbuda,102,128,45,4.9,
11,Bahamas,122,176,51,6.3,
14,Barbados,143,173,36,6.3,
17,Belize,263,114,8,6.8,
32,Canada,240,122,100,8.2,
41,Costa Rica,149,87,11,4.4,
43,Cuba,93,137,5,4.2,
50,Dominica,52,286,26,6.6,
51,Dominican Republic,193,147,9,6.2,
54,El Salvador,52,69,2,2.2,


In [81]:
# Fill the missing values with the default string
drinks['continent'] = drinks['continent'].fillna('UNKNOWN')
drinks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   country                       193 non-null    object 
 1   beer_servings                 193 non-null    int64  
 2   spirit_servings               193 non-null    int64  
 3   wine_servings                 193 non-null    int64  
 4   total_litres_of_pure_alcohol  193 non-null    float64
 5   continent                     193 non-null    object 
dtypes: float64(1), int64(3), object(2)
memory usage: 9.2+ KB


In [82]:
drinks_grouped = drinks.groupby('continent')

### Step 4. Which continent drinks more beer on average?

In [83]:
drinks_grouped["beer_servings"].mean()

continent
AF          61.471698
AS          37.045455
EU         193.777778
OC          89.687500
SA         175.083333
UNKNOWN    145.434783
Name: beer_servings, dtype: float64

### Step 5. For each continent print the statistics for wine consumption.

In [84]:
drinks_grouped["wine_servings"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AF,53.0,16.264151,38.846419,0.0,1.0,2.0,13.0,233.0
AS,44.0,9.068182,21.667034,0.0,0.0,1.0,8.0,123.0
EU,45.0,142.222222,97.421738,0.0,59.0,128.0,195.0,370.0
OC,16.0,35.625,64.55579,0.0,1.0,8.5,23.25,212.0
SA,12.0,62.416667,88.620189,1.0,3.0,12.0,98.5,221.0
UNKNOWN,23.0,24.521739,28.266378,1.0,5.0,11.0,34.0,100.0


In [85]:
drinks_grouped[['beer_servings', 'wine_servings']].mean()

Unnamed: 0_level_0,beer_servings,wine_servings
continent,Unnamed: 1_level_1,Unnamed: 2_level_1
AF,61.471698,16.264151
AS,37.045455,9.068182
EU,193.777778,142.222222
OC,89.6875,35.625
SA,175.083333,62.416667
UNKNOWN,145.434783,24.521739


### Step 6. Print the mean alcohol consumption per continent for every column

In [75]:
drinks_grouped[['beer_servings', 'wine_servings']].agg(['mean', 'min', 'max', 'median'])

Unnamed: 0_level_0,beer_servings,beer_servings,beer_servings,beer_servings,wine_servings,wine_servings,wine_servings,wine_servings
Unnamed: 0_level_1,mean,min,max,median,mean,min,max,median
continent,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
AF,61.471698,0,376,32.0,16.264151,0,233,2.0
AS,37.045455,0,247,17.5,9.068182,0,123,1.0
EU,193.777778,0,361,219.0,142.222222,0,370,128.0
OC,89.6875,0,306,52.5,35.625,0,212,8.5
SA,175.083333,93,333,162.5,62.416667,1,221,12.0
UNKNOWN,145.434783,1,285,143.0,24.521739,1,100,11.0
