## Creating a Grouping object from scratch

In [2]:
import pandas as pd

In [4]:
food_data = {
          "Item": ["Banana", "Cucumber", "Orange", "Tomato", "Watermelon"],
          "Type": ["Fruit", "Vegetable", "Fruit", "Vegetable", "Fruit"],
          "Price": [0.99, 1.25, 0.25, 0.33, 3.00]
}
supermarket = pd.DataFrame(food_data)
supermarket

Unnamed: 0,Item,Type,Price
0,Banana,Fruit,0.99
1,Cucumber,Vegetable,1.25
2,Orange,Fruit,0.25
3,Tomato,Vegetable,0.33
4,Watermelon,Fruit,3.0


In [6]:
groups = supermarket.groupby('Type')
groups

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001AB851668E0>

In [10]:
groups.get_group('Fruit')

Unnamed: 0,Item,Type,Price
0,Banana,Fruit,0.99
2,Orange,Fruit,0.25
4,Watermelon,Fruit,3.0


In [12]:
groups.get_group('Vegetable')

Unnamed: 0,Item,Type,Price
1,Cucumber,Vegetable,1.25
3,Tomato,Vegetable,0.33


In [15]:
groups.mean()

Unnamed: 0_level_0,Price
Type,Unnamed: 1_level_1
Fruit,1.413333
Vegetable,0.79


## Creating a GroupBy object from a data set

In [17]:
fortune = pd.read_csv('fortune1000.csv')
fortune.head()

Unnamed: 0,Company,Revenues,Profits,Employees,Sector,Industry
0,Walmart,500343.0,9862.0,2300000,Retailing,General Merchandisers
1,Exxon Mobil,244363.0,19710.0,71200,Energy,Petroleum Refining
2,Berkshire Hathaway,242137.0,44940.0,377000,Financials,Insurance: Property and Casualty (Stock)
3,Apple,229234.0,48351.0,123000,Technology,"Computers, Office Equipment"
4,UnitedHealth Group,201159.0,10558.0,260000,Health Care,Health Care: Insurance and Managed Care


In [25]:
fortune['Sector'].unique()

array(['Retailing', 'Energy', 'Financials', 'Technology', 'Health Care',
       'Wholesalers', 'Telecommunications', 'Motor Vehicles & Parts',
       'Food &  Drug Stores', 'Industrials', 'Aerospace & Defense',
       'Household Products', 'Transportation',
       'Food, Beverages & Tobacco', 'Chemicals', 'Media', 'Apparel',
       'Materials', 'Hotels, Restaurants & Leisure', 'Business Services',
       'Engineering & Construction'], dtype=object)

In [21]:
sectors = fortune.groupby('Sector')

In [27]:
len(sectors)

21

In [31]:
sectors.size()

Sector
Aerospace & Defense               25
Apparel                           14
Business Services                 53
Chemicals                         33
Energy                           107
Engineering & Construction        27
Financials                       155
Food &  Drug Stores               12
Food, Beverages & Tobacco         37
Health Care                       71
Hotels, Restaurants & Leisure     26
Household Products                28
Industrials                       49
Materials                         45
Media                             25
Motor Vehicles & Parts            19
Retailing                         77
Technology                       103
Telecommunications                10
Transportation                    40
Wholesalers                       44
dtype: int64

## Attributes and methods of a GroupBy object

In [32]:
sectors.groups

{'Aerospace & Defense': [26, 50, 58, 98, 117, 118, 207, 224, 275, 380, 404, 406, 414, 540, 660, 661, 806, 829, 884, 930, 954, 955, 959, 975, 988], 'Apparel': [88, 241, 331, 420, 432, 526, 529, 554, 587, 678, 766, 774, 835, 861], 'Business Services': [142, 160, 187, 199, 201, 221, 235, 242, 253, 295, 325, 358, 364, 423, 462, 465, 486, 493, 497, 499, 502, 510, 528, 567, 577, 584, 591, 599, 604, 618, 649, 686, 691, 692, 700, 702, 712, 720, 738, 744, 771, 802, 810, 825, 879, 888, 894, 895, 898, 905, 922, 972, 997], 'Chemicals': [46, 189, 190, 198, 214, 263, 281, 309, 344, 351, 381, 447, 450, 454, 527, 593, 623, 648, 671, 672, 679, 704, 722, 740, 790, 836, 865, 872, 908, 932, 958, 963, 978], 'Energy': [1, 12, 27, 30, 40, 63, 89, 90, 91, 94, 104, 114, 124, 125, 134, 145, 166, 167, 175, 184, 205, 212, 213, 217, 218, 219, 222, 231, 232, 243, 248, 254, 256, 265, 268, 269, 270, 273, 307, 313, 326, 333, 335, 343, 352, 363, 371, 379, 383, 384, 387, 428, 437, 452, 456, 488, 490, 496, 498, 500, 517,

In [35]:
fortune.loc[26, 'Sector']

'Aerospace & Defense'

In [40]:
sectors.first().sort_values('Revenues', ascending=False)

Unnamed: 0_level_0,Company,Revenues,Profits,Employees,Industry
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Retailing,Walmart,500343.0,9862.0,2300000,General Merchandisers
Energy,Exxon Mobil,244363.0,19710.0,71200,Petroleum Refining
Financials,Berkshire Hathaway,242137.0,44940.0,377000,Insurance: Property and Casualty (Stock)
Technology,Apple,229234.0,48351.0,123000,"Computers, Office Equipment"
Health Care,UnitedHealth Group,201159.0,10558.0,260000,Health Care: Insurance and Managed Care
Wholesalers,McKesson,198533.0,5070.0,64500,Wholesalers: Health Care
Telecommunications,AT&T,160546.0,29450.0,254000,Telecommunications
Motor Vehicles & Parts,General Motors,157311.0,-3864.0,180000,Motor Vehicles and Parts
Food & Drug Stores,Kroger,122662.0,1907.0,449000,Food and Drug Stores
Industrials,General Electric,122274.0,-5786.0,313000,Industrial Machinery


In [42]:
sectors.nth(0)

Unnamed: 0_level_0,Company,Revenues,Profits,Employees,Industry
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aerospace & Defense,Boeing,93392.0,8197.0,140800,Aerospace and Defense
Apparel,Nike,34350.0,4240.0,74400,Apparel
Business Services,ManpowerGroup,21034.0,545.4,29000,Temporary Help
Chemicals,DowDuPont,62683.0,1460.0,98000,Chemicals
Energy,Exxon Mobil,244363.0,19710.0,71200,Petroleum Refining
Engineering & Construction,Fluor,19521.0,191.4,56706,"Engineering, Construction"
Financials,Berkshire Hathaway,242137.0,44940.0,377000,Insurance: Property and Casualty (Stock)
Food & Drug Stores,Kroger,122662.0,1907.0,449000,Food and Drug Stores
"Food, Beverages & Tobacco",PepsiCo,63525.0,4857.0,263000,Food Consumer Products
Health Care,UnitedHealth Group,201159.0,10558.0,260000,Health Care: Insurance and Managed Care


In [45]:
sectors.head(2).sort_values(by="Sector")

Unnamed: 0,Company,Revenues,Profits,Employees,Sector,Industry
26,Boeing,93392.0,8197.0,140800,Aerospace & Defense,Aerospace and Defense
50,United Technologies,59837.0,4552.0,204700,Aerospace & Defense,Aerospace and Defense
241,VF,12400.0,614.9,69000,Apparel,Apparel
88,Nike,34350.0,4240.0,74400,Apparel,Apparel
160,Visa,18358.0,6699.0,15000,Business Services,Financial Data Services
142,ManpowerGroup,21034.0,545.4,29000,Business Services,Temporary Help
189,Sherwin-Williams,14984.0,1772.3,52695,Chemicals,Chemicals
46,DowDuPont,62683.0,1460.0,98000,Chemicals,Chemicals
1,Exxon Mobil,244363.0,19710.0,71200,Energy,Petroleum Refining
12,Chevron,134533.0,9195.0,51900,Energy,Petroleum Refining


## Aggregate operations

In [47]:
sectors.sum()

Unnamed: 0_level_0,Revenues,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aerospace & Defense,383835.0,26733.5,1010124
Apparel,101157.3,6350.7,355699
Business Services,316090.0,37179.2,1593999
Chemicals,251151.0,20475.0,474020
Energy,1543507.2,85369.6,981207
Engineering & Construction,172782.0,7121.0,420745
Financials,2442480.0,264253.5,3500119
Food & Drug Stores,405468.0,8440.3,1398074
"Food, Beverages & Tobacco",510232.0,54902.5,1079316
Health Care,1507991.4,92791.1,2971189


In [56]:
sectors.get_group("Aerospace & Defense").loc[:, "Revenues"].sum()

383835.0

In [69]:
aggregations = {
    "Revenues": "min",
    "Profits": "max",
    "Employees": "mean"
}

sectors.agg(aggregations)

Unnamed: 0_level_0,Revenues,Profits,Employees
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aerospace & Defense,1877.0,8197.0,40404.96
Apparel,2350.0,4240.0,25407.071429
Business Services,1851.0,6699.0,30075.45283
Chemicals,1925.0,3000.4,14364.242424
Energy,1874.0,19710.0,9170.158879
Engineering & Construction,1906.0,1038.4,15583.148148
Financials,1848.0,44940.0,22581.412903
Food & Drug Stores,2064.0,4078.0,116506.166667
"Food, Beverages & Tobacco",2071.0,10999.0,29170.702703
Health Care,1849.0,21308.0,41847.732394


## Applying a custom opetation to all groups

In [71]:
fortune.nlargest(n = 5, columns ='Profits')

Unnamed: 0,Company,Revenues,Profits,Employees,Sector,Industry
3,Apple,229234.0,48351.0,123000,Technology,"Computers, Office Equipment"
2,Berkshire Hathaway,242137.0,44940.0,377000,Financials,Insurance: Property and Casualty (Stock)
15,Verizon,126034.0,30101.0,155400,Telecommunications,Telecommunications
8,AT&T,160546.0,29450.0,254000,Telecommunications,Telecommunications
19,JPMorgan Chase,113899.0,24441.0,252539,Financials,Commercial Banks


In [72]:
def get_largest_row(df):
    return df.nlargest(1, 'Revenues')

In [74]:
sectors.apply(get_largest_row).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Company,Revenues,Profits,Employees,Industry
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aerospace & Defense,26,Boeing,93392.0,8197.0,140800,Aerospace and Defense
Apparel,88,Nike,34350.0,4240.0,74400,Apparel
Business Services,142,ManpowerGroup,21034.0,545.4,29000,Temporary Help
Chemicals,46,DowDuPont,62683.0,1460.0,98000,Chemicals
Energy,1,Exxon Mobil,244363.0,19710.0,71200,Petroleum Refining


## Grouping by mulpliple columns

In [76]:
sector_and_industry = fortune.groupby(['Sector', 'Industry'])

In [78]:
sector_and_industry.size()

Sector               Industry                                     
Aerospace & Defense  Aerospace and Defense                            25
Apparel              Apparel                                          14
Business Services    Advertising, marketing                            2
                     Diversified Outsourcing Services                 14
                     Education                                         2
                                                                      ..
Transportation       Trucking, Truck Leasing                          11
Wholesalers          Wholesalers: Diversified                         24
                     Wholesalers: Electronics and Office Equipment     8
                     Wholesalers: Food and Grocery                     6
                     Wholesalers: Health Care                          6
Length: 82, dtype: int64

In [80]:
sector_and_industry.get_group(('Business Services', 'Education'))

Unnamed: 0,Company,Revenues,Profits,Employees,Sector,Industry
567,Laureate Education,4378.0,91.5,54500,Business Services,Education
810,Graham Holdings,2592.0,302.0,16153,Business Services,Education


In [83]:
sector_and_industry.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenues,Profits,Employees
Sector,Industry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aerospace & Defense,Aerospace and Defense,383835.0,26733.5,1010124
Apparel,Apparel,101157.3,6350.7,355699
Business Services,"Advertising, marketing",23156.0,1667.4,127500
Business Services,Diversified Outsourcing Services,74175.0,5043.7,858600
Business Services,Education,6970.0,393.5,70653
...,...,...,...,...
Transportation,"Trucking, Truck Leasing",43676.0,3535.5,208312
Wholesalers,Wholesalers: Diversified,130984.0,5231.5,262390
Wholesalers,Wholesalers: Electronics and Office Equipment,122231.0,1259.4,183518
Wholesalers,Wholesalers: Food and Grocery,125908.0,1794.0,135767


In [85]:
sector_and_industry['Revenues'].mean().head(5)

Sector               Industry                        
Aerospace & Defense  Aerospace and Defense               15353.400000
Apparel              Apparel                              7225.521429
Business Services    Advertising, marketing              11578.000000
                     Diversified Outsourcing Services     5298.214286
                     Education                            3485.000000
Name: Revenues, dtype: float64