# Groupby Demo

In [23]:
import numpy as np
import pandas as pd
import random


## Init a DataFrame

In [10]:
# get 6 days
rows = 10
dates = pd.date_range("20221101", periods=rows)

groups = []

# loop 10 times
for i in range(rows):
    # add a random choice of 'A', 'B', 'C', or 'D' to the list
    groups.append(random.choice(['A', 'B', 'C', 'D']))

# dataframe of 6 rows, 4 columns, index using dates,
# column name A B C D
df = pd.DataFrame(
    np.random.randn(rows, 4), 
    index=dates, 
    columns=list("ABCD")
)
df['group'] = groups
df

Unnamed: 0,A,B,C,D,group
2022-11-01,-0.4106,-0.065767,-0.461423,0.217363,C
2022-11-02,-0.510364,-2.147316,0.713797,-1.038984,C
2022-11-03,-0.496057,0.047652,-0.347322,1.554182,B
2022-11-04,-0.670524,0.934769,0.065391,0.698081,B
2022-11-05,-0.354363,0.298966,-0.499015,0.121221,C
2022-11-06,-0.778994,2.103829,-1.649054,1.437184,B
2022-11-07,-0.444217,-0.879306,1.053771,1.320646,D
2022-11-08,0.718616,-1.373646,0.129225,-0.324881,A
2022-11-09,0.017866,-0.399896,0.450833,-1.330551,B
2022-11-10,-1.606591,-0.228823,1.586585,-0.41457,D


# grouby 

In [11]:
df.groupby('group')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10762f9b0>

In the below, 
* "name" is the group name, and "group" is the collection object, the subset of the columns with the same group
* group's datatype is dataframe

In [28]:
for name, group in df.groupby('group'):
    # do something with each group
    display(group)
    print(f"group: {name}, column A mean: {group['A'].mean()}, column C mean: {group['C'].mean()}")
    print('-' * 100)


Unnamed: 0,A,B,C,D,group
2022-11-08,0.718616,-1.373646,0.129225,-0.324881,A


group: A, column A mean: 0.7186164148719729, column C mean: 0.12922546647594302
----------------------------------------------------------------------------------------------------


Unnamed: 0,A,B,C,D,group
2022-11-03,-0.496057,0.047652,-0.347322,1.554182,B
2022-11-04,-0.670524,0.934769,0.065391,0.698081,B
2022-11-06,-0.778994,2.103829,-1.649054,1.437184,B
2022-11-09,0.017866,-0.399896,0.450833,-1.330551,B


group: B, column A mean: -0.48192742986583503, column C mean: -0.3700381603966108
----------------------------------------------------------------------------------------------------


Unnamed: 0,A,B,C,D,group
2022-11-01,-0.4106,-0.065767,-0.461423,0.217363,C
2022-11-02,-0.510364,-2.147316,0.713797,-1.038984,C
2022-11-05,-0.354363,0.298966,-0.499015,0.121221,C


group: C, column A mean: -0.42510941736281516, column C mean: -0.08221366767049365
----------------------------------------------------------------------------------------------------


Unnamed: 0,A,B,C,D,group
2022-11-07,-0.444217,-0.879306,1.053771,1.320646,D
2022-11-10,-1.606591,-0.228823,1.586585,-0.41457,D


group: D, column A mean: -1.0254043361142045, column C mean: 1.3201780360627033
----------------------------------------------------------------------------------------------------
