<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Merging-Dataframes" data-toc-modified-id="Merging-Dataframes-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Merging Dataframes</a></span><ul class="toc-item"><li><span><a href="#conflicts" data-toc-modified-id="conflicts-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>conflicts</a></span></li></ul></li><li><span><a href="#Pandas-Idioms" data-toc-modified-id="Pandas-Idioms-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Pandas Idioms</a></span><ul class="toc-item"><li><span><a href="#Method-chaining-(pandorable)" data-toc-modified-id="Method-chaining-(pandorable)-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Method chaining (pandorable)</a></span></li><li><span><a href="#applymap-and-apply" data-toc-modified-id="applymap-and-apply-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span><code>applymap</code> and <code>apply</code></a></span></li></ul></li><li><span><a href="#Group-by" data-toc-modified-id="Group-by-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Group by</a></span></li></ul></div>

## Merging Dataframes

In [1]:
import pandas as pd

In [2]:
staff_df = pd.DataFrame([{'Name':'Kelly','Role':'Director of HR'},
                        {'Name':'Sally','Role':'Course liasion'},
                        {'Name':'James','Role':'Grader'}])
staff_df = staff_df.set_index('Name')

student_df = pd.DataFrame([{'Name':'James','School':'Business'},
                        {'Name':'Mike','School':'Law'},
                        {'Name':'Sally','School':'Engineering'}])
student_df = student_df.set_index('Name')

print(student_df.head())
print(staff_df.head())

            School
Name              
James     Business
Mike           Law
Sally  Engineering
                 Role
Name                 
Kelly  Director of HR
Sally  Course liasion
James          Grader


In [3]:
# outer join is union
pd.merge(staff_df,student_df,how='outer',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Kelly,Director of HR,
Mike,,Law
Sally,Course liasion,Engineering


In [4]:
# inner join is intersect
pd.merge(staff_df,student_df,how='inner',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Sally,Course liasion,Engineering
James,Grader,Business


In [5]:
# list all components in the left table with addition info from right table
pd.merge(staff_df,student_df,how='left',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Kelly,Director of HR,
Sally,Course liasion,Engineering
James,Grader,Business


In [6]:
pd.merge(staff_df,student_df,how='right',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Mike,,Law
Sally,Course liasion,Engineering


In [7]:
staff_df = staff_df.reset_index()
student_df = student_df.reset_index()
pd.merge(staff_df,student_df,how='right',on = 'Name')

Unnamed: 0,Name,Role,School
0,James,Grader,Business
1,Mike,,Law
2,Sally,Course liasion,Engineering


### conflicts
* `_x` reserves for left information
* `_y` for right one

In [8]:
staff_df = pd.DataFrame([{'Name':'Kelly','Role':'Director of HR',
                         'Location':'State Street'},
                        {'Name':'Sally','Role':'Course liasion',
                        'Location':'Washington Avenue'},
                        {'Name':'James','Role':'Grader',
                        'Location':'Washington Avenue'}])
student_df = pd.DataFrame([{'Name':'James','School':'Business',
                           'Location':'1024 Billiard Avenue'},
                        {'Name':'Mike','School':'Law',
                        'Location':'Fraternity House #22'},
                        {'Name':'Sally','School':'Engineering',
                        'Location': '512 Wilson Crescent'}])

In [9]:
pd.merge(staff_df,student_df,how='outer',on = 'Name')

Unnamed: 0,Name,Role,Location_x,School,Location_y
0,Kelly,Director of HR,State Street,,
1,Sally,Course liasion,Washington Avenue,Engineering,512 Wilson Crescent
2,James,Grader,Washington Avenue,Business,1024 Billiard Avenue
3,Mike,,,Law,Fraternity House #22


In [10]:
staff_df = pd.DataFrame([{'First Name':'Kelly','Last Name':'Desjardins','Role':'Director of HR',
                         'Location':'State Street'},
                        {'First Name':'Sally','Last Name':'Brooks','Role':'Course liasion',
                        'Location':'Washington Avenue'},
                        {'First Name':'James','Last Name':'Wilde','Role':'Grader',
                        'Location':'Washington Avenue'}])
student_df = pd.DataFrame([{'First Name':'James','Last Name': 'Hammond','School':'Business',
                           'Location':'1024 Billiard Avenue'},
                        {'First Name':'Mike','Last Name':'Smith','School':'Law',
                        'Location':'Fraternity House #22'},
                        {'First Name':'Sally','Last Name':'Brooks','School':'Engineering',
                        'Location': '512 Wilson Crescent'}])

pd.merge(staff_df,student_df,how='inner',on = ['First Name','Last Name'])

Unnamed: 0,First Name,Last Name,Role,Location_x,School,Location_y
0,Sally,Brooks,Course liasion,Washington Avenue,Engineering,512 Wilson Crescent


In [11]:
!ls

DateFunctionality_ed.html
GroupBy_ed.html
MergingDataFrame_ed.html
PandasIdioms_ed.html
PivotTable_ed.html
Scales.html
Week3.ipynb
census.csv
college_scorecard
cwurData.csv
listings.csv


In [12]:
%%capture
df_2004 = pd.read_csv('college_scorecard/MERGED2004_05_PP.csv',error_bad_lines=False)
df_2005 = pd.read_csv('college_scorecard/MERGED2005_06_PP.csv',error_bad_lines=False)
df_2006 = pd.read_csv('college_scorecard/MERGED2006_07_PP.csv',error_bad_lines=False)

In [13]:
df_2004.head(3)

Unnamed: 0,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,OMAWDP8_NOTFIRSTTIME_POOLED_SUPP,OMENRUP_NOTFIRSTTIME_POOLED_SUPP,OMENRYP_FULLTIME_POOLED_SUPP,OMENRAP_FULLTIME_POOLED_SUPP,OMAWDP8_FULLTIME_POOLED_SUPP,OMENRUP_FULLTIME_POOLED_SUPP,OMENRYP_PARTTIME_POOLED_SUPP,OMENRAP_PARTTIME_POOLED_SUPP,OMAWDP8_PARTTIME_POOLED_SUPP,OMENRUP_PARTTIME_POOLED_SUPP
0,100654,100200,1002,Alabama A & M University,Normal,AL,35762,,,,...,,,,,,,,,,
1,100663,105200,1052,University of Alabama at Birmingham,Birmingham,AL,35294-0110,,,,...,,,,,,,,,,
2,100690,2503400,25034,Amridge University,Montgomery,AL,36117-3553,,,,...,,,,,,,,,,


In [14]:
frames = [df_2004,df_2005,df_2006]
pd.concat(frames)

Unnamed: 0,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,OMAWDP8_NOTFIRSTTIME_POOLED_SUPP,OMENRUP_NOTFIRSTTIME_POOLED_SUPP,OMENRYP_FULLTIME_POOLED_SUPP,OMENRAP_FULLTIME_POOLED_SUPP,OMAWDP8_FULLTIME_POOLED_SUPP,OMENRUP_FULLTIME_POOLED_SUPP,OMENRYP_PARTTIME_POOLED_SUPP,OMENRAP_PARTTIME_POOLED_SUPP,OMAWDP8_PARTTIME_POOLED_SUPP,OMENRUP_PARTTIME_POOLED_SUPP
0,100654,00100200,1002,Alabama A & M University,Normal,AL,35762,,,,...,,,,,,,,,,
1,100663,00105200,1052,University of Alabama at Birmingham,Birmingham,AL,35294-0110,,,,...,,,,,,,,,,
2,100690,02503400,25034,Amridge University,Montgomery,AL,36117-3553,,,,...,,,,,,,,,,
3,100706,00105500,1055,University of Alabama in Huntsville,Huntsville,AL,35899,,,,...,,,,,,,,,,
4,100724,00100500,1005,Alabama State University,Montgomery,AL,36104-0271,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6843,44098901,02568108,25681,Texas Barber College - Branch Campus #1,Dallas,TX,75241,,,,...,,,,,,,,,,
6844,44098902,02568101,25681,Texas Barber College - Branch Campus #2,Dallas,TX,75228,,,,...,,,,,,,,,,
6845,44098903,02568106,25681,Texas Barber Colleges and Hairstyling Schools ...,Houston,TX,77063,,,,...,,,,,,,,,,
6846,44098904,02568107,25681,Texas Barber College - Branch Campus #5,Houston,TX,77022,,,,...,,,,,,,,,,


In [15]:
pd.concat(frames,keys = ['2011','2012','2013'])

Unnamed: 0,Unnamed: 1,UNITID,OPEID,OPEID6,INSTNM,CITY,STABBR,ZIP,ACCREDAGENCY,INSTURL,NPCURL,...,OMAWDP8_NOTFIRSTTIME_POOLED_SUPP,OMENRUP_NOTFIRSTTIME_POOLED_SUPP,OMENRYP_FULLTIME_POOLED_SUPP,OMENRAP_FULLTIME_POOLED_SUPP,OMAWDP8_FULLTIME_POOLED_SUPP,OMENRUP_FULLTIME_POOLED_SUPP,OMENRYP_PARTTIME_POOLED_SUPP,OMENRAP_PARTTIME_POOLED_SUPP,OMAWDP8_PARTTIME_POOLED_SUPP,OMENRUP_PARTTIME_POOLED_SUPP
2011,0,100654,00100200,1002,Alabama A & M University,Normal,AL,35762,,,,...,,,,,,,,,,
2011,1,100663,00105200,1052,University of Alabama at Birmingham,Birmingham,AL,35294-0110,,,,...,,,,,,,,,,
2011,2,100690,02503400,25034,Amridge University,Montgomery,AL,36117-3553,,,,...,,,,,,,,,,
2011,3,100706,00105500,1055,University of Alabama in Huntsville,Huntsville,AL,35899,,,,...,,,,,,,,,,
2011,4,100724,00100500,1005,Alabama State University,Montgomery,AL,36104-0271,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2013,6843,44098901,02568108,25681,Texas Barber College - Branch Campus #1,Dallas,TX,75241,,,,...,,,,,,,,,,
2013,6844,44098902,02568101,25681,Texas Barber College - Branch Campus #2,Dallas,TX,75228,,,,...,,,,,,,,,,
2013,6845,44098903,02568106,25681,Texas Barber Colleges and Hairstyling Schools ...,Houston,TX,77063,,,,...,,,,,,,,,,
2013,6846,44098904,02568107,25681,Texas Barber College - Branch Campus #5,Houston,TX,77022,,,,...,,,,,,,,,,


## Pandas Idioms

In [16]:
import numpy as np
import timeit

In [18]:
df = pd.read_csv('census.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861


### Method chaining (pandorable)

In [19]:
(df.where(df['SUMLEV'] == 50)
     .dropna()
     .set_index(['STNAME','CTYNAME'])
     .rename(columns = {'ESTIMATESBASE2010':'Estimates Base 2010'}))

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,Estimates Base 2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Autauga County,50.0,3.0,6.0,1.0,1.0,54571.0,54571.0,54660.0,55253.0,55175.0,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
Alabama,Baldwin County,50.0,3.0,6.0,1.0,3.0,182265.0,182265.0,183193.0,186659.0,190396.0,...,14.832960,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
Alabama,Barbour County,50.0,3.0,6.0,1.0,5.0,27457.0,27457.0,27341.0,27226.0,27159.0,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
Alabama,Bibb County,50.0,3.0,6.0,1.0,7.0,22915.0,22919.0,22861.0,22733.0,22642.0,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
Alabama,Blount County,50.0,3.0,6.0,1.0,9.0,57322.0,57322.0,57373.0,57711.0,57776.0,...,1.807375,-1.177622,-1.748766,-2.062535,-1.369970,1.859511,-0.848580,-1.402476,-1.577232,-0.884411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Sweetwater County,50.0,4.0,8.0,56.0,37.0,43806.0,43806.0,43593.0,44041.0,45104.0,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
Wyoming,Teton County,50.0,4.0,8.0,56.0,39.0,21294.0,21294.0,21297.0,21482.0,21697.0,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
Wyoming,Uinta County,50.0,4.0,8.0,56.0,41.0,21118.0,21118.0,21102.0,20912.0,20989.0,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
Wyoming,Washakie County,50.0,4.0,8.0,56.0,43.0,8533.0,8533.0,8545.0,8469.0,8443.0,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [20]:
df = df[df['SUMLEV']==50]
df.set_index(['STNAME','CTYNAME'],inplace = True)
df.rename(columns = {'ESTIMATESBASE2010':'Estimates Base 2010'})

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,Estimates Base 2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Autauga County,50,3,6,1,1,54571,54571,54660,55253,55175,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
Alabama,Baldwin County,50,3,6,1,3,182265,182265,183193,186659,190396,...,14.832960,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
Alabama,Barbour County,50,3,6,1,5,27457,27457,27341,27226,27159,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
Alabama,Bibb County,50,3,6,1,7,22915,22919,22861,22733,22642,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
Alabama,Blount County,50,3,6,1,9,57322,57322,57373,57711,57776,...,1.807375,-1.177622,-1.748766,-2.062535,-1.369970,1.859511,-0.848580,-1.402476,-1.577232,-0.884411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Sweetwater County,50,4,8,56,37,43806,43806,43593,44041,45104,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
Wyoming,Teton County,50,4,8,56,39,21294,21294,21297,21482,21697,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
Wyoming,Uinta County,50,4,8,56,41,21118,21118,21102,20912,20989,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
Wyoming,Washakie County,50,4,8,56,43,8533,8533,8545,8469,8443,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [21]:
def first_approach():
    global df
    return (df.where(df['SUMLEV'] == 50)
             .dropna()
             .set_index(['STNAME','CTYNAME'])
             .rename(columns = {'ESTIMATESBASE2010':'Estimates Base 2010'}))

df = pd.read_csv('census.csv')

timeit.timeit(first_approach,number=10)

0.13623040000004494

In [22]:
def second_approach():
    global df
    new_df = df[df['SUMLEV']==50]
    new_df.set_index(['STNAME','CTYNAME'],inplace = True)
    return new_df.rename(columns = {'ESTIMATESBASE2010':'Estimates Base 2010'})
df = pd.read_csv('census.csv')

timeit.timeit(second_approach,number=10)

0.03782730000000356

### `applymap` and `apply`

In [23]:
def min_max(row):
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015']]
    return pd.Series({'min':np.min(data),'max':np.max(data)})

In [24]:
df.apply(min_max,axis='columns').head()

Unnamed: 0,min,max
0,4785161,4858979
1,54660,55347
2,183193,203709
3,26489,27341
4,22512,22861


In [25]:
def min_max(row):
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015']]
    row['min'] = np.min(data)
    row['max'] = np.max(data)
    return row

In [26]:
df.apply(min_max,axis='columns').head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,min,max
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594,4785161,4858979
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333,54660,55347
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499,183193,203709
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299,26489,27341
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861,22512,22861


In [29]:
rows = ['POPESTIMATE2010',
        'POPESTIMATE2011',
        'POPESTIMATE2012',
        'POPESTIMATE2013',
        'POPESTIMATE2014',
        'POPESTIMATE2015']

df.apply(lambda x: {'max':np.max(x[rows]),'min':np.min(x[rows])}, axis=1).head()

0    {'max': 4858979, 'min': 4785161}
1        {'max': 55347, 'min': 54660}
2      {'max': 203709, 'min': 183193}
3        {'max': 27341, 'min': 26489}
4        {'max': 22861, 'min': 22512}
dtype: object

## Group by