In [1]:
# https://python-course.eu/numerical-programming/pandas-groupby.php

In [2]:
import pandas as pd
import numpy as np
import random

nvalues = 30
# we create random values, which will be used as the Series values:
values = np.random.randint(1, 20, (nvalues,))
fruits = ["bananas", "oranges", "apples", "clementines", "cherries", "pears"]
fruits_index = np.random.choice(fruits, (nvalues,))

s = pd.Series(values, index=fruits_index)
print(s[:100])

pears           1
clementines    14
bananas         6
pears          19
bananas        13
bananas        19
bananas        10
oranges        10
bananas        13
oranges         6
apples          1
oranges        19
pears          10
bananas        18
clementines     9
apples          8
apples         16
oranges        15
pears          15
bananas         4
clementines    19
clementines    14
oranges         3
clementines    12
cherries        8
oranges        15
oranges         5
pears           8
oranges         8
oranges        13
dtype: int64


In [3]:
grouped = s.groupby(s.index)

for fruit, s_obj in grouped:
    print(f"===== {fruit} =====")
    print(s_obj)

===== apples =====
apples     1
apples     8
apples    16
dtype: int64
===== bananas =====
bananas     6
bananas    13
bananas    19
bananas    10
bananas    13
bananas    18
bananas     4
dtype: int64
===== cherries =====
cherries    8
dtype: int64
===== clementines =====
clementines    14
clementines     9
clementines    19
clementines    14
clementines    12
dtype: int64
===== oranges =====
oranges    10
oranges     6
oranges    19
oranges    15
oranges     3
oranges    15
oranges     5
oranges     8
oranges    13
dtype: int64
===== pears =====
pears     1
pears    19
pears    10
pears    15
pears     8
dtype: int64


In [4]:
for fruit in set(s.index):
    print(f"===== {fruit} =====")
    print(s[fruit])

===== apples =====
apples     1
apples     8
apples    16
dtype: int64
===== pears =====
pears     1
pears    19
pears    10
pears    15
pears     8
dtype: int64
===== clementines =====
clementines    14
clementines     9
clementines    19
clementines    14
clementines    12
dtype: int64
===== oranges =====
oranges    10
oranges     6
oranges    19
oranges    15
oranges     3
oranges    15
oranges     5
oranges     8
oranges    13
dtype: int64
===== bananas =====
bananas     6
bananas    13
bananas    19
bananas    10
bananas    13
bananas    18
bananas     4
dtype: int64
===== cherries =====
8


In [5]:
import pandas as pd
beverages = pd.DataFrame({'Name': ['Robert', 'Melinda', 'Brenda',
                                   'Samantha', 'Melinda', 'Robert',
                                   'Melinda', 'Brenda', 'Samantha'],
                          'Coffee': [3, 0, 2, 2, 0, 2, 0, 1, 3],
                          'Tea':    [0, 4, 2, 0, 3, 0, 3, 2, 0]})
    
beverages

Unnamed: 0,Name,Coffee,Tea
0,Robert,3,0
1,Melinda,0,4
2,Brenda,2,2
3,Samantha,2,0
4,Melinda,0,3
5,Robert,2,0
6,Melinda,0,3
7,Brenda,1,2
8,Samantha,3,0


In [6]:
beverages['Coffee'].sum()

13

In [7]:
beverages[['Coffee', 'Tea']].sum()

Coffee    13
Tea       14
dtype: int64

In [8]:
res = beverages.groupby(['Name']).sum()
print(res)

          Coffee  Tea
Name                 
Brenda         3    4
Melinda        0   10
Robert         5    0
Samantha       5    0


In [9]:
res.index

Index(['Brenda', 'Melinda', 'Robert', 'Samantha'], dtype='object', name='Name')

In [10]:
res.columns

Index(['Coffee', 'Tea'], dtype='object')

In [11]:
beverages.groupby(['Name']).mean()

Unnamed: 0_level_0,Coffee,Tea
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Brenda,1.5,2.0
Melinda,0.0,3.333333
Robert,2.5,0.0
Samantha,2.5,0.0


In [12]:
from faker import Faker
import numpy as np
from itertools import chain

fake = Faker('de_DE')

number_of_names = 10
names = []
for _ in range(number_of_names):
    names.append(fake.first_name())


data = {}
workweek = ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
weekend = ("Saturday", "Sunday")

for day in chain(workweek, weekend):
    data[day] = np.random.randint(0, 10, (number_of_names,))
    
data_df = pd.DataFrame(data, index=names)
data_df

ModuleNotFoundError: No module named 'faker'

In [None]:
def is_weekend(day):
    if day in {'Saturday', 'Sunday'}:
        return "Weekend"
    else:
        return "Workday"
        
for res_func, df in data_df.groupby(by=is_weekend, axis=1):
    print(df)

           Saturday  Sunday
Stefan            5       8
Alex              1       0
Hildegart         5       2
Ingeborg          4       2
Alwina            9       2
Robert            0       6
Yusuf             2       2
Maren             1       6
Patrizia          5       3
Margot            7       0
           Monday  Tuesday  Wednesday  Thursday  Friday
Stefan          3        0          6         9       0
Alex            7        8          8         1       7
Hildegart       5        9          7         5       3
Ingeborg        1        6          2         2       3
Alwina          0        0          1         3       4
Robert          3        7          2         0       9
Yusuf           3        4          3         2       6
Maren           7        2          9         9       5
Patrizia        3        2          5         6       3
Margot          1        5          3         3       9


In [None]:
data_df.groupby(by=is_weekend, axis=1).sum()

Unnamed: 0,Weekend,Workday
Stefan,13,18
Alex,1,31
Hildegart,7,29
Ingeborg,6,14
Alwina,11,8
Robert,6,21
Yusuf,4,18
Maren,7,32
Patrizia,8,19
Margot,7,21
