# Grouping DataFrames

## Setup

In [1]:
import numpy as np
import pandas as pd

## Creation

Creation of an example DataFrame (starting from a dictionary of dictionaries):

In [2]:
data = {
    "Capital": {
        "Spain": "Madrid",
        "Belgium": "Brussels",
        "France": "Paris",
        "Italy": "Roma",
        "Germany": "Berlin",
        "Portugal": "Lisbon",
        "Norway": "Oslo",
        "Greece": "Athens",
    },
    "Population": {
        "Spain": 46733038,
        "Belgium": 11449656,
        "France": 67076000,
        "Italy": 60390560,
        "Germany": 83122889,
        "Portugal": 10295909,
        "Norway": 5391369,
        "Greece": 10718565,
    },
    "Monarch": {
        "Spain": "Felipe VI",
        "Belgium": "Philippe",
        "Norway": "Harald V",
    },
    "Area": {
        "Spain": 505990,
        "Belgium": 30688,
        "France": 640679,
        "Italy": 301340,
        "Germany": 357022,
        "Portugal": 92212,
        "Norway": 385207,
        "Greece": 131957,
    },
}

In [3]:
data = {
    "City": [
        "Barcelona",
        "Barcelona",
        "Barcelona",
        "Torino",
        "Torino",
        "Basel",
        "Basel",
        "Basel",
        "Basel",
    ],
    "Animal": [
        "Elephant",
        "Dolphin",
        "Monkey",
        "Elephant",
        "Monkey",
        "Elephant",
        "Dolphin",
        "Lion",
        "Giraffe",
    ],
    "Origin": [
        "Asia",
        None,
        "Africa",
        "Africa",
        "Asia",
        "Africa",
        None,
        "Africa",
        "Africa",
    ],
    "Quantity": [
        4,
        5,
        8,
        3,
        12,
        2,
        6,
        7,
        1,
    ],
    "Adults": [
        4,
        2,
        2,
        3,
        8,
        1,
        4,
        2,
        0,
    ],
    "Kids": [
        0,
        3,
        6,
        0,
        4,
        1,
        2,
        5,
        1,
    ],
}

In [4]:
# For now, let's forget about these steps:
df = pd.DataFrame(data)
df["City"] = df["City"].astype("category")
df["Animal"] = df["Animal"].astype("string")
df["Origin"] = df["Origin"].astype("category")

In [5]:
df.dtypes

City        category
Animal        string
Origin      category
Quantity       int64
Adults         int64
Kids           int64
dtype: object

In [6]:
df["Quantity"] == df["Adults"] + df["Kids"]

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
dtype: bool

Apple stock data, taken from the [`matplotlib` sample datasets](https://github.com/matplotlib/sample_data/blob/master/aapl.csv)

In [7]:
# For now, let's forget about these steps:
apple = pd.read_csv("AAPL.csv")
apple["Date"] = apple["Date"].astype("datetime64[ns]")
apple = apple.set_index("Date")
apple = apple.sort_index()

In [8]:
characters = pd.read_csv("characters.csv", sep=";", index_col="Id")
characters = characters[
    characters["House"].isin(["Gryffindor", "Hufflepuff", "Ravenclaw", "Slytherin"])
].copy()
characters["Name"] = characters["Name"].astype("string")
characters["Gender"] = characters["Gender"].astype("category")
characters["Job"] = characters["Job"].astype("category")
characters["House"] = characters["House"].astype("category")
characters["Wand"] = characters["Wand"].astype("string")
characters["Patronus"] = characters["Patronus"].astype("string")
characters["Species"] = characters["Species"].astype("category")
characters["Blood status"] = characters["Blood status"].astype("category")
characters["Hair colour"] = characters["Hair colour"].astype("category")
characters["Eye colour"] = characters["Eye colour"].astype("category")
characters["Loyalty"] = characters["Loyalty"].astype("string")
characters["Skills"] = characters["Skills"].astype("string")
characters = characters.rename(columns={"Death": "Dead"})
characters.loc[characters["Dead"].notnull(), "Dead"] = True
characters.loc[characters["Dead"].isnull(), "Dead"] = False

## Demo 1: Grouping

In [9]:
df

Unnamed: 0,City,Animal,Origin,Quantity,Adults,Kids
0,Barcelona,Elephant,Asia,4,4,0
1,Barcelona,Dolphin,,5,2,3
2,Barcelona,Monkey,Africa,8,2,6
3,Torino,Elephant,Africa,3,3,0
4,Torino,Monkey,Asia,12,8,4
5,Basel,Elephant,Africa,2,1,1
6,Basel,Dolphin,,6,4,2
7,Basel,Lion,Africa,7,2,5
8,Basel,Giraffe,Africa,1,0,1


In [10]:
df.dtypes

City        category
Animal        string
Origin      category
Quantity       int64
Adults         int64
Kids           int64
dtype: object

In [11]:
df["Quantity"] == df["Adults"] + df["Kids"]

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
dtype: bool

In [12]:
df["Quantity"] != df["Adults"] + df["Kids"]

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
dtype: bool

In [13]:
(df["Quantity"] != df["Adults"] + df["Kids"]).sum()

0

Group rows by cities:

In [14]:
df.groupby("City")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001A8D61FC340>

Loop over the groups:

In [15]:
for (name, group) in df.groupby("City"):
    print(name)
    display(group)
    print("")

Barcelona


Unnamed: 0,City,Animal,Origin,Quantity,Adults,Kids
0,Barcelona,Elephant,Asia,4,4,0
1,Barcelona,Dolphin,,5,2,3
2,Barcelona,Monkey,Africa,8,2,6



Basel


Unnamed: 0,City,Animal,Origin,Quantity,Adults,Kids
5,Basel,Elephant,Africa,2,1,1
6,Basel,Dolphin,,6,4,2
7,Basel,Lion,Africa,7,2,5
8,Basel,Giraffe,Africa,1,0,1



Torino


Unnamed: 0,City,Animal,Origin,Quantity,Adults,Kids
3,Torino,Elephant,Africa,3,3,0
4,Torino,Monkey,Asia,12,8,4





## Exercise 1

In [16]:
characters.head()

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,31 July 1980,False
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,1 March 1980,False
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",False
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,True
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human (Half-giant),Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,False


Group the characters by their "House", and loop over the 4 groups:

In [17]:
characters.groupby(by="House")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001A8D61FC6A0>

In [18]:
for (name,group) in characters.groupby(by="House"):
    print(name)
    display(group)
    print("")

Gryffindor


Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,31 July 1980,False
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,1 March 1980,False
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",False
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,True
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human (Half-giant),Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,False
6,Neville Longbottom,Male,Student,Gryffindor,"13"" Cherry unicorn hair",Non-corporeal,Human,Pure-blood,Blond,,Dumbledore's Army | Order of the Phoenix | Hog...,Herbology,"30 July, 1980",False
7,Fred Weasley,Male,Student,Gryffindor,Unknown,Unknown,Human,Pure-blood,Red,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Beater,"1 April, 1978",True
8,George Weasley,Male,Student,Gryffindor,Unknown,Unknown,Human,Pure-blood,Red,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Beater,"1 April, 1978",False
9,Ginevra (Ginny) Molly Weasley,Female,Student,Gryffindor,Unknown,Horse,Human,Pure-blood,Red,Bright brown,Dumbledore's Army | Order of the Phoenix | Hog...,Chaser| Bat-Bogey hex,"11 August, 1981",False
10,Dean Thomas,Male,Student,Gryffindor,Unknown,Unknown,Human,Muggle-born,Black,Brown,Dumbledore's Army | Hogwarts School of Witchcr...,Chaser,1 September 1979- 31 August 1980,False



Hufflepuff


Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
78,Helga Hufflepuff,Female,Founder of Hufflepuff,Hufflepuff,Unknown,Unknown,Human,Pure-blood or half-blood,Brown,Blue,,Food-related Charms. Many traditional Hogwarts...,Pre 976,True
79,Fat Friar,Male,Hufflepuff House Ghost,Hufflepuff,Unknown,Unknown,Ghost,,Brown,,,Curing peasants of the pox,10th century,True
81,Nymphadora Tonks,Female,Auror,Hufflepuff,Unknown,"Jack rabbit, Wolf",Human,Half-blood,Variable,Variable,Ministry of Magic | Order of the Phoenix,"Talented Auror, Metamorphmagus",1 September 1972- 31 August 1973,True
82,Pomona Sprout,Female,Professor of Herbology | Head of Hufflepuff House,Hufflepuff,Unknown,Non-corporeal,Human,Pure-blood or half-blood,,Grey,,Herbology,15 May,False
83,Newton Scamander,Male,Employee in the Beast Division of the Departme...,Hufflepuff,Unknown,Unknown,Human,Pure-blood or half-blood,Red brown,Blue,,"Magizoology, Order of Merlin, Second Class",24 February 1897,False
84,Cedric Diggory,Male,Student,Hufflepuff,"12¼"", Ash, unicorn hair",Unknown,Human,Pure-blood,Dark,Grey,,Skilled Seeker,1 September - 30 October 1977,True
85,Justin Finch-Fletchley,Male,Student,Hufflepuff,Unknown,Non-corporeal,Human,Muggle-born,Red,,Dumbledore's Army,,1 September 1979- 31 August 1980,False
86,Zacharias Smith,Male,Student,Hufflepuff,Unknown,Unknown,Human,Pure-blood or half-blood,Blonde,,Dumbledore's Army,Chaser,1 September 1979 - 2 May 1981,False
87,Hannah Abbott,Female,Student,Hufflepuff,Unknown,Non-corporeal,Human,Half-blood,Blonde,Brown,Dumbledore's Army,"Defensive spells, learned with Dumbledore's Army",1 September 1979 - 31 August 1980,False
88,Ernest Macmillan,Male,Student,Hufflepuff,Unknown,Boar,Human,Pure-blood,Blond,,Dumbledore's Army,"Revising, being a Prefect, getting the wrong e...",21 April 1980 - 31 August 1980,False



Ravenclaw


Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
33,Quirinus Quirrell,Male,Defence Against the Dark Arts(1991-1992),Ravenclaw,"9"" Alder unicorn hair bendy",Non-corporeal,Human,Half-blood,,,Lord Voldemort,Learned in the theory of Defensive Magic| less...,"26 September,1970 or earlier",True
34,Cho Chang,Female,Student,Ravenclaw,Unknown,Swan,Human,Pure-blood or half-blood,Black,Dark,Dumbledore's Army |Hogwarts School of Witchcra...,Seeker,1 September 1978-31 August 1979,False
35,Luna Lovegood,Female,Student,Ravenclaw,Unknown,Hare,Human,Pure-blood or half-blood,Dirty-blonde,Pale silvery,Dumbledore's Army |Hogwarts School of Witchcra...,Spotting Nargles,"13 February, 1981",False
36,Gilderoy Lockhart,Male,Defence Against the Dark Arts(1992-1993),Ravenclaw,"9"" Cherry dragon heartstring",Non-corporeal,Human,Half-blood,Blond,Blue,,,26 January 1964,False
37,Filius Flitwick,Male,Professor of Charms | Head of Ravenclaw,Ravenclaw,Unknown,Non-corporeal,Human(goblin ancestry),Part-Goblin,White,,,Charms,17 October 1958 or earlier,False
38,Sybill Patricia Trelawney,Female,Professor of Divination,Ravenclaw,9 ½ hazel unicorn hair core,Non-corporeal,Human,Half-blood,,,,"A Seer, though the gift is unpredictable and u...",9 March prior to 1962,False
39,Garrick Ollivander,Male,Wandmaker,Ravenclaw,"12¾"" Hornbeam dragon heartstring",Non-corporeal,Human,Half-blood,,Silvery,,An incomparable understanding of wandcraft,"25 September, pre 1908",False
40,Myrtle Elizabeth Warren (Moaning Myrtle),Female,Student,Ravenclaw,Unknown,Unknown,Ghost,Muggle-born,,,,,14 June 1928 -13 June 1929,True
41,Padma Patil,Female,Student,Ravenclaw,Unknown,Non-corporeal,Human,Pure-blood or half-blood,Dark,Dark,Dumbledore's Army |Hogwarts School of Witchcra...,Prefect,1 September 1979 - 21 April 1980,False
42,Michael Corner,Male,Student,Ravenclaw,Unknown,Squirrel,Human,Half-blood,Black,Brown,Dumbledore's Army |Hogwarts School of Witchcra...,Potions,1 September 1979 - 31 August 1980,False



Slytherin


Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
46,Severus Snape,Male,Professor of Potions | Head of Slytherin,Slytherin,Unknown,Doe,Human,Half-blood,Black,Black,,Extremely skilled at potions and Occlumency,9 January 1960,True
47,Draco Malfoy,Male,Student,Slytherin,"10"" Hawthorn unicorn hair",Unknown,Human,Pure-blood,White-blond,Grey,,"Prefect, Seeker","5 June, 1980",False
48,Vincent Crabbe,Male,Student,Slytherin,Unknown,Unknown,Human,Pure-blood,Black,Black,,Beater,c. 1979-1980,True
49,Gregory Goyle,Male,Student,Slytherin,Unknown,Unknown,Human,Pure-blood,Brown,,,Beater,1 September 1979-31 August 1980,False
50,Bellatrix Lestrange,Female,,Slytherin,"12¾"" Walnut dragon heartstring",,Human,Pure-blood,Black,,Lord Voldemort | Death Eaters,She is a duellist of great skill and an experi...,1951,True
51,Dolores Jane Umbridge,Female,Professor of Defence Against the Dark Arts | D...,Slytherin,"8"" Birch dragon heartstring",Cat,Human,Half-blood,Iron grey,,Ministry of Magic,Her punishment quill is of her own invention,26 August,False
52,Horace Eugene Flaccus Slughorn,,Professor of Potions,Slytherin,"10¼"" Cedar dragon heartstring fairly flexible",Non-corporeal,Human,Pure-blood or half-blood,Bald,Gooseberry,,"Accomplished Occlumens, expert Potioneer, adva...",28 April,False
53,Lucius Malfoy,Male,School Governor,Slytherin,Elm and dragon heartstring,Unknown,Human,Pure-blood,White-blond,Grey,Lord Voldemort | Death Eaters,Skilled duellist and potioneer,c. 1954,False
54,Narcissa Malfoy,Female,,Slytherin,Unknown,Unknown,Human,Pure-blood,Blonde,Blue,Lord Voldemort | Death Eaters,,1955,False
55,Regulus Arcturus Black,Male,,Slytherin,Unknown,Non-corporeal,Human,Pure-blood,Black,,,Seeker,1961,True





## Demo 2: Grouping and calculating `size()` and `count()`

Apply a transformation:

In [None]:
df.groupby("City").size()

In [None]:
df.groupby("City").count()

## Exercise 2

In [19]:
characters.head()

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,31 July 1980,False
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,1 March 1980,False
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",False
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,True
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human (Half-giant),Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,False


Group the characters by their "House", and calculate the `size()` of each group:

In [20]:
characters.groupby(by="House").size()

House
Gryffindor    38
Hufflepuff    13
Ravenclaw     18
Slytherin     28
dtype: int64

Group the characters by their "House", and calculate the `count()` of each group:

In [21]:
characters.groupby(by="House").count()

Unnamed: 0_level_0,Name,Gender,Job,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
House,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Gryffindor,38,38,34,38,38,38,37,36,28,34,34,38,38
Hufflepuff,13,13,12,13,13,13,12,11,8,7,11,12,13
Ravenclaw,18,18,18,18,18,18,17,13,12,7,14,18,18
Slytherin,28,27,24,28,27,28,28,24,14,11,22,28,28


## Demo 3: Grouping and applying a single transformation

Apply a transformation:

In [22]:
df.groupby("City").sum()

Unnamed: 0_level_0,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Barcelona,17,8,9
Basel,16,7,9
Torino,15,11,4


In [23]:
df.groupby("City").mean()

Unnamed: 0_level_0,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Barcelona,5.666667,2.666667,3.0
Basel,4.0,1.75,2.25
Torino,7.5,5.5,2.0


In [24]:
df.groupby("City").min()

Unnamed: 0_level_0,Animal,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Barcelona,Dolphin,4,2,0
Basel,Dolphin,1,0,1
Torino,Elephant,3,3,0


In [25]:
df.groupby("City").max()

Unnamed: 0_level_0,Animal,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Barcelona,Monkey,8,4,6
Basel,Lion,7,4,5
Torino,Monkey,12,8,4


In [26]:
df.groupby("City").first()

Unnamed: 0_level_0,Animal,Origin,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barcelona,Elephant,Asia,4,4,0
Basel,Elephant,Africa,2,1,1
Torino,Elephant,Africa,3,3,0


In [27]:
df.groupby("City").last()

Unnamed: 0_level_0,Animal,Origin,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barcelona,Monkey,Africa,8,2,6
Basel,Giraffe,Africa,1,0,1
Torino,Monkey,Asia,12,8,4


Apply an arbitraty transformation:

In [28]:
df.groupby("City").aggregate(np.sum)

Unnamed: 0_level_0,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Barcelona,17,8,9
Basel,16,7,9
Torino,15,11,4


In [29]:
df.groupby("City").agg(np.sum)

Unnamed: 0_level_0,Quantity,Adults,Kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Barcelona,17,8,9
Basel,16,7,9
Torino,15,11,4


## Exercise 3

In [30]:
characters.head()

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,31 July 1980,False
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,1 March 1980,False
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",False
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,True
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human (Half-giant),Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,False


Group the characters by their "House", and calculate the sum (to see how many people died):

In [31]:
characters.groupby(by="House").sum()

Unnamed: 0_level_0,Dead
House,Unnamed: 1_level_1
Gryffindor,11
Hufflepuff,5
Ravenclaw,4
Slytherin,7


Group the characters by their "Gender" and calculate the sum, but using the `.agg()` method:

In [32]:
characters.groupby(by="Gender").agg(np.sum)

Unnamed: 0_level_0,Dead
Gender,Unnamed: 1_level_1
Female,8
Male,19


## Demo 4: Grouping by several columns

In [33]:
df

Unnamed: 0,City,Animal,Origin,Quantity,Adults,Kids
0,Barcelona,Elephant,Asia,4,4,0
1,Barcelona,Dolphin,,5,2,3
2,Barcelona,Monkey,Africa,8,2,6
3,Torino,Elephant,Africa,3,3,0
4,Torino,Monkey,Asia,12,8,4
5,Basel,Elephant,Africa,2,1,1
6,Basel,Dolphin,,6,4,2
7,Basel,Lion,Africa,7,2,5
8,Basel,Giraffe,Africa,1,0,1


Group using several columns:

In [34]:
df.groupby(["City", "Origin"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity,Adults,Kids
City,Origin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Barcelona,Africa,8,2,6
Barcelona,Asia,4,4,0
Basel,Africa,10,3,7
Basel,Asia,0,0,0
Torino,Africa,3,3,0
Torino,Asia,12,8,4


## Exercise 4

In [35]:
characters.head()

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Dead
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,31 July 1980,False
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,1 March 1980,False
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",False
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,True
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human (Half-giant),Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,False


Group the characters by their "House" and "Gender", and calculate the sum:

In [37]:
characters.groupby(by=["House","Gender"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Dead
House,Gender,Unnamed: 2_level_1
Gryffindor,Female,2
Gryffindor,Male,9
Hufflepuff,Female,2
Hufflepuff,Male,3
Ravenclaw,Female,3
Ravenclaw,Male,1
Slytherin,Female,1
Slytherin,Male,6


## Bonus: Grouping and applying several transformation

Apply several transformations together:

In [38]:
df.groupby("City").agg([np.sum, np.mean])

Unnamed: 0_level_0,Quantity,Quantity,Adults,Adults,Kids,Kids
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
City,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Barcelona,17,5.666667,8,2.666667,9,3.0
Basel,16,4.0,7,1.75,9,2.25
Torino,15,7.5,11,5.5,4,2.0


Apply different transformations for different columns:

In [39]:
agg = {
    "Minimum number of animals": pd.NamedAgg(column="Quantity", aggfunc=np.min),
    "Maximum number of animals": pd.NamedAgg(column="Quantity", aggfunc=np.max),
    "Total number of adults": pd.NamedAgg(column="Adults", aggfunc=np.sum),
    "Total number of kids": pd.NamedAgg(column="Kids", aggfunc=np.sum),
}

In [40]:
df.groupby("City").agg(**agg)

Unnamed: 0_level_0,Minimum number of animals,Maximum number of animals,Total number of adults,Total number of kids
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Barcelona,4,8,8,9
Basel,1,7,7,9
Torino,3,12,11,4
