# Pandas GroupBy

### Grouping data with one key: 
In order to group data with one key, we pass only one key as an argument in groupby function. 

In [1]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj',
                   'Jaunpur', 'Kanpur', 'Allahabad', 'Aligarh'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd',
                         'B.Tech', 'B.com', 'Msc', 'MA']}
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)

print(df)

     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1    Anuj   24     Kanpur            MA
2     Jai   22  Allahabad           MCA
3  Princi   32    Kannuaj           Phd
4  Gaurav   33    Jaunpur        B.Tech
5    Anuj   36     Kanpur         B.com
6  Princi   27  Allahabad           Msc
7    Abhi   32    Aligarh            MA


In [6]:
# using groupby function
# with one key
 
df.groupby('Name')
print(df.groupby('Name').groups)

{'Abhi': [7], 'Anuj': [1, 5], 'Gaurav': [4], 'Jai': [0, 2], 'Princi': [3, 6]}


Now we print the first entries in all the groups formed. 

In [3]:
# applying groupby() function to
# group the data on Name value.
gk = df.groupby('Name')
   
# Let's print the first entries
# in all the groups formed.
gk.first()

Unnamed: 0_level_0,Age,Address,Qualification
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abhi,32,Aligarh,MA
Anuj,24,Kanpur,MA
Gaurav,33,Jaunpur,B.Tech
Jai,27,Nagpur,Msc
Princi,32,Kannuaj,Phd


### Grouping data with multiple keys : 
In order to group data with multiple keys, we pass multiple keys in groupby function. 

In [10]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi', "Anuj"],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32,32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj',
                   'Jaunpur', 'Kanpur', 'Allahabad', 'Aligarh','Kanpur'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd',
                         'B.Tech', 'B.com', 'Msc', 'MA','B.com']}
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)
  
print(df)

     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1    Anuj   24     Kanpur            MA
2     Jai   22  Allahabad           MCA
3  Princi   32    Kannuaj           Phd
4  Gaurav   33    Jaunpur        B.Tech
5    Anuj   36     Kanpur         B.com
6  Princi   27  Allahabad           Msc
7    Abhi   32    Aligarh            MA
8    Anuj   32     Kanpur         B.com


In [11]:
# Using multiple keys in
# groupby() function
df.groupby(['Name', 'Qualification'])
 
print(df.groupby(['Name', 'Qualification']).groups)

{('Abhi', 'MA'): [7], ('Anuj', 'B.com'): [5, 8], ('Anuj', 'MA'): [1], ('Gaurav', 'B.Tech'): [4], ('Jai', 'MCA'): [2], ('Jai', 'Msc'): [0], ('Princi', 'Msc'): [6], ('Princi', 'Phd'): [3]}


### Grouping data by sorting keys : 
Group keys are sorted by default using the groupby operation. User can pass sort=False for potential speedups.

In [12]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32], }
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)
  
print(df)

     Name  Age
0     Jai   27
1    Anuj   24
2     Jai   22
3  Princi   32
4  Gaurav   33
5    Anuj   36
6  Princi   27
7    Abhi   32


In [13]:
# using groupby function
# without using sort
 
df.groupby(['Name']).sum()

Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
Abhi,32
Anuj,60
Gaurav,33
Jai,49
Princi,59


Now we apply groupby() using sort in order to attain potential speedups 
 

In [14]:
# using groupby function
# with sort
 
df.groupby(['Name'], sort = False).sum()

Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
Jai,49
Anuj,60
Princi,59
Gaurav,33
Abhi,32


### Grouping data with object attributes : 
Groups attribute is like dictionary whose keys are the computed unique groups and corresponding values being the axis labels belonging to each group. 

In [15]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj',
                   'Jaunpur', 'Kanpur', 'Allahabad', 'Aligarh'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd',
                         'B.Tech', 'B.com', 'Msc', 'MA']}
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)
  
print(df)


     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1    Anuj   24     Kanpur            MA
2     Jai   22  Allahabad           MCA
3  Princi   32    Kannuaj           Phd
4  Gaurav   33    Jaunpur        B.Tech
5    Anuj   36     Kanpur         B.com
6  Princi   27  Allahabad           Msc
7    Abhi   32    Aligarh            MA


Now we group data like we do in a dictionary using keys.

In [16]:
# using keys for grouping
# data
 
df.groupby('Address').groups

{'Aligarh': [7], 'Allahabad': [2, 6], 'Jaunpur': [4], 'Kannuaj': [3], 'Kanpur': [1, 5], 'Nagpur': [0]}

## Iterating through groups
In order to iterate an element of groups, we can iterate through the object similar to itertools.obj. 
 

In [17]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj',
                   'Jaunpur', 'Kanpur', 'Allahabad', 'Aligarh'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd',
                         'B.Tech', 'B.com', 'Msc', 'MA']}
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)
  
print(df)

     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1    Anuj   24     Kanpur            MA
2     Jai   22  Allahabad           MCA
3  Princi   32    Kannuaj           Phd
4  Gaurav   33    Jaunpur        B.Tech
5    Anuj   36     Kanpur         B.com
6  Princi   27  Allahabad           Msc
7    Abhi   32    Aligarh            MA


In [5]:
# iterating an element
# of group
 
grp = df.groupby('Name')
for name, group in grp:
    print(name)
    print(group)
    print()

Abhi
   Name  Age  Address Qualification
7  Abhi   32  Aligarh            MA

Anuj
   Name  Age Address Qualification
1  Anuj   24  Kanpur            MA
5  Anuj   36  Kanpur         B.com

Gaurav
     Name  Age  Address Qualification
4  Gaurav   33  Jaunpur        B.Tech

Jai
  Name  Age    Address Qualification
0  Jai   27     Nagpur           Msc
2  Jai   22  Allahabad           MCA

Princi
     Name  Age    Address Qualification
3  Princi   32    Kannuaj           Phd
6  Princi   27  Allahabad           Msc



In [18]:
# iterating an element
# of group containing
# multiple keys
 
grp = df.groupby(['Name', 'Qualification'])
for name, group in grp:
    print(name)
    print(group)
    print()

('Abhi', 'MA')
   Name  Age  Address Qualification
7  Abhi   32  Aligarh            MA

('Anuj', 'B.com')
   Name  Age Address Qualification
5  Anuj   36  Kanpur         B.com

('Anuj', 'MA')
   Name  Age Address Qualification
1  Anuj   24  Kanpur            MA

('Gaurav', 'B.Tech')
     Name  Age  Address Qualification
4  Gaurav   33  Jaunpur        B.Tech

('Jai', 'MCA')
  Name  Age    Address Qualification
2  Jai   22  Allahabad           MCA

('Jai', 'Msc')
  Name  Age Address Qualification
0  Jai   27  Nagpur           Msc

('Princi', 'Msc')
     Name  Age    Address Qualification
6  Princi   27  Allahabad           Msc

('Princi', 'Phd')
     Name  Age  Address Qualification
3  Princi   32  Kannuaj           Phd



## Selecting a groups
In order to select a group, we can select group using GroupBy.get_group(). We can select a group by applying a function GroupBy.get_group this function select a single group. 

In [19]:
# importing pandas module
import pandas as pd
  
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Anuj', 'Jai', 'Princi',
                 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
        'Age':[27, 24, 22, 32,
               33, 36, 27, 32],
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj',
                   'Jaunpur', 'Kanpur', 'Allahabad', 'Aligarh'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd',
                         'B.Tech', 'B.com', 'Msc', 'MA']}
    
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data1)
  
print(df)


     Name  Age    Address Qualification
0     Jai   27     Nagpur           Msc
1    Anuj   24     Kanpur            MA
2     Jai   22  Allahabad           MCA
3  Princi   32    Kannuaj           Phd
4  Gaurav   33    Jaunpur        B.Tech
5    Anuj   36     Kanpur         B.com
6  Princi   27  Allahabad           Msc
7    Abhi   32    Aligarh            MA


In [26]:
# selecting a single group
 
grp = df.groupby('Name')
grp.get_group('Jai').min()

Name                   Jai
Age                     22
Address          Allahabad
Qualification          MCA
dtype: object

Now we select an object grouped on multiple columns 

In [24]:
# selecting object grouped
# on multiple columns
 
grp = df.groupby(['Name', 'Qualification'])
grp.get_group(('Jai', 'Msc')).head()

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Nagpur,Msc


In [None]:
type()