# Python Pandas III: Aggregates

Still starring our favorite globetrotter. 

We'll start by setting up a merge between the same two previous datasets we were using in the previous lesson

In [2]:
# import pandas
import pandas

# configure pandas
pandas.options.display.max_rows = None
pandas.options.display.max_columns = None

# load our first data set and give it a quick head check
celtics_roster = pandas.read_csv('boston_celtics_2023_2024.csv')
celtics_roster.head(4)

Unnamed: 0,number,player,position,height,weight,birth_date,country_code,experience,college,on-off
0,11,Payton Pritchard,PG,6-1,195,January 28 1998,us,3,Oregon,0.4
1,30,Sam Hauser,SF,6-8,215,December 8 1997,us,2,Marquette Virginia,0.4
2,0,Jayson Tatum,PF,6-8,210,March 3 1998,us,6,Duke,0.7
3,9,Derrick White,SG,6-4,190,July 2 1994,us,6,Colorado-Colorado Springs Colorado,0.6


In [3]:
# Go ahead and load our next dataset and give it a quick head check
celtics_totals = pandas.read_csv('boston_celtics_2023_2024_totals.csv')
celtics_totals.head(4)

Unnamed: 0,player,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Jayson Tatum,25,74,74,2645,672,1426,0.471,229,609,0.376,443,817,0.542,0.552,414,497,0.833,67,534,601,364,75,43,188,145,1987
1,Derrick White,29,73,73,2381,387,839,0.461,196,495,0.396,191,344,0.555,0.578,137,152,0.901,51,259,310,377,74,87,112,152,1107
2,Jaylen Brown,27,70,70,2343,627,1256,0.499,145,410,0.354,482,846,0.57,0.557,211,300,0.703,84,303,387,249,83,37,166,185,1610
3,Jrue Holiday,33,69,69,2263,331,689,0.48,138,322,0.429,193,367,0.526,0.581,60,72,0.833,84,289,373,333,61,53,124,108,860


In [4]:
# Now let's do the merge and do our last head check
celtics = pandas.merge(celtics_roster, celtics_totals, on='player', how='outer')
celtics.head(5)

Unnamed: 0,number,player,position,height,weight,birth_date,country_code,experience,college,on-off,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,42.0,Al Horford,C,6-9,240.0,June 3 1986,do,16,Florida,0.4,37.0,65.0,33.0,1740.0,214.0,419.0,0.511,108.0,258.0,0.419,106.0,161.0,0.658,0.64,26.0,30.0,0.867,82.0,331.0,413.0,168.0,38.0,62.0,48.0,93.0,562.0
1,,Dalano Banton,,,,,,,,,24.0,24.0,1.0,171.0,19.0,51.0,0.373,2.0,16.0,0.125,17.0,35.0,0.486,0.392,16.0,20.0,0.8,12.0,23.0,35.0,19.0,5.0,3.0,10.0,20.0,56.0
2,9.0,Derrick White,SG,6-4,190.0,July 2 1994,us,6,Colorado-Colorado Springs Colorado,0.6,29.0,73.0,73.0,2381.0,387.0,839.0,0.461,196.0,495.0,0.396,191.0,344.0,0.555,0.578,137.0,152.0,0.901,51.0,259.0,310.0,377.0,74.0,87.0,112.0,152.0,1107.0
3,,Drew Peterson,,,,,,,,,24.0,3.0,0.0,23.0,4.0,6.0,0.667,3.0,5.0,0.6,1.0,1.0,1.0,0.917,0.0,0.0,,0.0,1.0,1.0,1.0,2.0,0.0,1.0,1.0,11.0
4,13.0,Drew Peterson (TW),PF,6-9,205.0,November 9 1999,us,R,Rice University USC,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,


### Let's look at Groups and Pivots. 

We're going to group our players by position group and on-off percentile. 

Before we do this, we're going to create a new column (Remember how to do that??) We want to aggregate our forwards and guards. 

In [6]:
celtics['position_group'] = "Unknown"
celtics.loc[(celtics['position'] == 'C'), 'position_group'] = 'center'
celtics.loc[(celtics['position'] == 'PF') | (celtics['position'] == 'SF'), 'position_group'] = 'forward'
celtics.loc[(celtics['position'] == 'PG') | (celtics['position'] == 'SG'), 'position_group'] = 'guard'
celtics[['player','position_group']]

Unnamed: 0,player,position_group
0,Al Horford,center
1,Dalano Banton,Unknown
2,Derrick White,guard
3,Drew Peterson,Unknown
4,Drew Peterson (TW),forward
5,JD Davison,Unknown
6,JD Davison (TW),guard
7,Jaden Springer,guard
8,Jaylen Brown,forward
9,Jayson Tatum,forward


Now let's put together the group

In [8]:
celtics.groupby(['position_group','on-off']).player.count().reset_index()

Unnamed: 0,position_group,on-off,player
0,center,0.1,1
1,center,0.3,1
2,center,0.4,2
3,forward,0.0,2
4,forward,0.1,2
5,forward,0.2,1
6,forward,0.4,1
7,forward,0.6,1
8,forward,0.7,1
9,guard,0.0,2


Neat! This is one way we can look at who the starters on the team are, or what playing time looks like. 

Let's create a pivot_table from the group and look at it in a different way. (This creates something easier to analyze and look at to human eyes, because it de-duplicates the index)

In [10]:
celtics.groupby(['position_group','on-off']).player.count().reset_index().pivot(columns='position_group',index='on-off', values='player')

position_group,center,forward,guard
on-off,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,,2.0,2.0
0.1,1.0,2.0,
0.2,,1.0,
0.3,1.0,,
0.4,2.0,1.0,1.0
0.6,,1.0,2.0
0.7,,1.0,


### Column Stats

Points are everything, right?? So let's get some interesting stats. 

In [12]:
print(f'Max Points: {celtics.PTS.max()}')
print(f'Min Points: {celtics.PTS.min()}')
print(f'Mean Points: {celtics.PTS.mean()}')
print(f'Median Points: {celtics.PTS.median()}')
print(f'Count Points(Number of Points Scorers): {celtics.PTS.count()}')
print(f'Standard Deviation of Points: {celtics.PTS.std()}')

Max Points: 1987.0
Min Points: 11.0
Mean Points: 520.3684210526316
Median Points: 201.0
Count Points(Number of Points Scorers): 19
Standard Deviation of Points: 593.0011439492728




Here let's look at getting unique values (or a set) and counts. 

In [14]:
# This returns all positions (duplicates!) 
celtics.position

0       C
1     NaN
2      SG
3     NaN
4      PF
5     NaN
6      SG
7      PG
8      SF
9      PF
10     SF
11     PG
12      C
13    NaN
14      C
15      C
16     SF
17     PG
18     SF
19     SF
20     PF
Name: position, dtype: object

# this returns a python array of the unique values. It's more or less a set. 
celtics.position.unique()

In [16]:
# this gets the count of unique values
celtics.position.nunique()

5

### Aggregates and Groups

Remember groups??? We can create aggregates per group. 

In [18]:
# Top point scorers at each position
celtics.groupby('position').PTS.max().reset_index()

Unnamed: 0,position,PTS
0,C,1145.0
1,PF,1987.0
2,PG,860.0
3,SF,1610.0
4,SG,1107.0


---

This is useful, but how do I get the player associated w/ that max score? 

First let's look at what we get just by viewing the groupby('position')

In [20]:
# basic groupby command for position
celtics.groupby('position')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x73afcff6af80>



That wasn't too useful, right? It just told us that we have a DataFrameGroupBy object. 

So, what happens if we try to perform a selection? (Any Guesses??) youre_fired = celtics.groupby('position')

In [22]:
# Let's select PTS...
celtics.groupby('position')['PTS']

<pandas.core.groupby.generic.SeriesGroupBy object at 0x73afd85a35e0>

Remember Python Pandas 1?

That's right, when you perform a single selection on a DataFrame, you get a Series object. So, if you perform a single selection on a DataFrameGroupBy... you get a SeriesGroupBy. 

So let's use idxmax() to get the index of the max value and then try to use that index to get the information we want. 

In [24]:
# First we'll get the index and print it out to check that we've actually got something other than an object.
index = celtics.groupby('position')['PTS'].idxmax()
index

position
C     12
PF     9
PG    11
SF     8
SG     2
Name: PTS, dtype: int64

In [25]:
# Rad! So we've got the index of the max scorer for each position. Now let's use it. 
celtics.loc[index]

Unnamed: 0,number,player,position,height,weight,birth_date,country_code,experience,college,on-off,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,position_group
12,8.0,Kristaps Porziņģis,C,7-2,240.0,August 2 1995,lv,7,,0.4,28.0,57.0,57.0,1690.0,388.0,752.0,0.516,110.0,293.0,0.375,278.0,459.0,0.606,0.589,259.0,302.0,0.858,97.0,312.0,409.0,115.0,42.0,111.0,89.0,156.0,1145.0,center
9,0.0,Jayson Tatum,PF,6-8,210.0,March 3 1998,us,6,Duke,0.7,25.0,74.0,74.0,2645.0,672.0,1426.0,0.471,229.0,609.0,0.376,443.0,817.0,0.542,0.552,414.0,497.0,0.833,67.0,534.0,601.0,364.0,75.0,43.0,188.0,145.0,1987.0,forward
11,4.0,Jrue Holiday,PG,6-4,205.0,June 12 1990,us,14,UCLA,0.6,33.0,69.0,69.0,2263.0,331.0,689.0,0.48,138.0,322.0,0.429,193.0,367.0,0.526,0.581,60.0,72.0,0.833,84.0,289.0,373.0,333.0,61.0,53.0,124.0,108.0,860.0,guard
8,7.0,Jaylen Brown,SF,6-6,223.0,October 24 1996,us,7,California,0.6,27.0,70.0,70.0,2343.0,627.0,1256.0,0.499,145.0,410.0,0.354,482.0,846.0,0.57,0.557,211.0,300.0,0.703,84.0,303.0,387.0,249.0,83.0,37.0,166.0,185.0,1610.0,forward
2,9.0,Derrick White,SG,6-4,190.0,July 2 1994,us,6,Colorado-Colorado Springs Colorado,0.6,29.0,73.0,73.0,2381.0,387.0,839.0,0.461,196.0,495.0,0.396,191.0,344.0,0.555,0.578,137.0,152.0,0.901,51.0,259.0,310.0,377.0,74.0,87.0,112.0,152.0,1107.0,guard




Ok. That's helpful, but we don't need all of that information right? We just need the player's name and the points. 

In [27]:
celtics.loc[index][['player','PTS']]

Unnamed: 0,player,PTS
12,Kristaps Porziņģis,1145.0
9,Jayson Tatum,1987.0
11,Jrue Holiday,860.0
8,Jaylen Brown,1610.0
2,Derrick White,1107.0


Now that we've put the pieces together, let's try to put it together using a python lambda and use groupby in order to fix the row axis so that we 
don't have the indexes anymore, but rather the positions. 

In [29]:
get_max_score = lambda group: group.loc[group.PTS.idxmax()]

celtics.groupby('position').apply(get_max_score)[['player', 'PTS']]

  celtics.groupby('position').apply(get_max_score)[['player', 'PTS']]


Unnamed: 0_level_0,player,PTS
position,Unnamed: 1_level_1,Unnamed: 2_level_1
C,Kristaps Porziņģis,1145.0
PF,Jayson Tatum,1987.0
PG,Jrue Holiday,860.0
SF,Jaylen Brown,1610.0
SG,Derrick White,1107.0


Let's do this again w/ Total Rebounds. Unfortunately "TRB" is a vague abbreviation that many users might not recognize, so we'll want to rename the column. Let's do that. 

In [31]:
get_max_trb = lambda group: group.loc[group.TRB.idxmax()]

celtics_trb_by_pos = celtics.groupby('position').apply(get_max_trb)[['player','TRB']]
celtics_trb_by_pos

  celtics_trb_by_pos = celtics.groupby('position').apply(get_max_trb)[['player','TRB']]


Unnamed: 0_level_0,player,TRB
position,Unnamed: 1_level_1,Unnamed: 2_level_1
C,Al Horford,413.0
PF,Jayson Tatum,601.0
PG,Jrue Holiday,373.0
SF,Jaylen Brown,387.0
SG,Derrick White,310.0


In [32]:
# Now let's rename the columns. 
celtics_trb_by_pos = celtics_trb_by_pos.rename(columns={"TRB": "Rebounds"})
celtics_trb_by_pos

Unnamed: 0_level_0,player,Rebounds
position,Unnamed: 1_level_1,Unnamed: 2_level_1
C,Al Horford,413.0
PF,Jayson Tatum,601.0
PG,Jrue Holiday,373.0
SF,Jaylen Brown,387.0
SG,Derrick White,310.0


---

Getting the high scorers is helpful, but what if we want to determine the percentiles so we can select the players above/below those percentiles? 

Before we can do this... we're going to need to solve for some of the NaNs

In [34]:
celtics[['PTS', 'player', 'position']]

Unnamed: 0,PTS,player,position
0,562.0,Al Horford,C
1,56.0,Dalano Banton,
2,1107.0,Derrick White,SG
3,11.0,Drew Peterson,
4,,Drew Peterson (TW),PF
5,16.0,JD Davison,
6,,JD Davison (TW),SG
7,35.0,Jaden Springer,PG
8,1610.0,Jaylen Brown,SF
9,1987.0,Jayson Tatum,PF


Remember that silly (TW) suffix in the player name? This caused some issues when we merged the two data sets. It would have gone much smoother if we had fixed the names to match up (or created an id of some kind.) 

We're going to solve this w/ a simple hack. We're going to fill in the NaN values in PTS, because they'll prevent us from calculating percentiles. 

```
Rule of Thumb: Non Numeric Values Break Numeric Calculations.
```
1. The easiest way to spot non-numerics is running DataFrame.info(), and look for fields that we expect to see a numeric data type that shows object. This means that there is likely a string somewhere.
2. The second easiest way is to hunt down "NaN"s. NaN shows up in numeric fields. 

In [36]:
# Setting the values (I'm just taking the values from the player w/o the TW and copying it to their duplicate entry.  
celtics.at[15,'PTS'] = celtics.iloc[19]['PTS']
celtics.at[16, 'PTS'] = celtics.iloc[20]['PTS']
celtics.loc[[15, 16, 19, 20]][['player','PTS']]

Unnamed: 0,player,PTS
15,Neemias Queta,162.0
16,Oshae Brissett,80.0
19,Svi Mykhailiuk,162.0
20,Xavier Tillman Sr.,80.0


The output should show you the correct outcomes!

NOTE: There is a method called set_value() that is SO Much faster than .at(), but it is going to be deprecated by pandas. use at() or iat(). 

Now that we've done a little hacky-wacky wrangling, let me show you an easy way. 

In [38]:
# import numpy. We need that to set NaN and get to percentiles later
import numpy as np


# Setting the values Back
celtics.at[15,'PTS'] = np.NaN
celtics.at[16, 'PTS'] = np.NaN
celtics.loc[[15, 16, 19, 20]][['player','PTS']]

Unnamed: 0,player,PTS
15,Neemias Queta,
16,Oshae Brissett,
19,Svi Mykhailiuk,162.0
20,Xavier Tillman Sr.,80.0


In [39]:
# Let's calculate the top scorers... and show the bad values. 
top_scorers = celtics.groupby('position').PTS.apply(lambda x: np.percentile(x, 75)).reset_index()
top_scorers

Unnamed: 0,position,PTS
0,C,
1,PF,
2,PG,823.5
3,SF,
4,SG,


In [40]:
# Lame! So let's fix it. [replace numpy.percentile w/ numpy.nanpercentile]
top_scorers = celtics.groupby('position').PTS.apply(lambda x: np.nanpercentile(x, 75)).reset_index()
top_scorers

Unnamed: 0,position,PTS
0,C,853.5
1,PF,1510.25
2,PG,823.5
3,SF,936.5
4,SG,1107.0


In [41]:
# now let's calculate the low scorers
low_scorers = celtics.groupby('position').PTS.apply(lambda x: np.percentile(x, 25)).reset_index()
low_scorers

Unnamed: 0,position,PTS
0,C,
1,PF,
2,PG,411.0
3,SF,
4,SG,


In [42]:
# what about the middle scorers? The 50th percentile...

In [43]:
mid = celtics.groupby('position').PTS.median().reset_index()
mid

Unnamed: 0,position,PTS
0,C,562.0
1,PF,1033.5
2,PG,787.0
3,SF,437.0
4,SG,1107.0


In [44]:
# Let's prove it by 
mid_quantile = celtics.groupby('position').PTS.apply(lambda x: np.nanpercentile(x, 50)).reset_index()
mid_quantile

Unnamed: 0,position,PTS
0,C,562.0
1,PF,1033.5
2,PG,787.0
3,SF,437.0
4,SG,1107.0


In [45]:
# Here is another short cut to get all of the information we just calculated separately. 
# .. I added a second field (TRB) to demonstrate how easy it is to get there...
celtics.groupby(by='position').describe()[['PTS', 'TRB']]

Unnamed: 0_level_0,PTS,PTS,PTS,PTS,PTS,PTS,PTS,PTS,TRB,TRB,TRB,TRB,TRB,TRB,TRB,TRB
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
position,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
C,3.0,680.333333,418.249128,334.0,448.0,562.0,853.5,1145.0,4.0,301.25,138.862942,122.0,226.25,335.0,410.0,413.0
PF,2.0,1033.5,1348.452632,80.0,556.75,1033.5,1510.25,1987.0,2.0,327.5,386.787409,54.0,190.75,327.5,464.25,601.0
PG,3.0,560.666667,456.70158,35.0,411.0,787.0,823.5,860.0,3.0,219.333333,180.876569,20.0,142.5,265.0,319.0,373.0
SF,4.0,624.75,722.094811,15.0,125.25,437.0,936.5,1610.0,5.0,178.8,153.865851,20.0,51.0,160.0,276.0,387.0
SG,1.0,1107.0,,1107.0,1107.0,1107.0,1107.0,1107.0,1.0,310.0,,310.0,310.0,310.0,310.0,310.0


### DataFrameGroupBy.quantile() vs. numpy.percentile() !!!

It's generally recommended to use numpy for performance reasons, however quantile can be easier to use

#### quantile's interpolation method
This is how quantile calculates the quantile... (in other word's it's not very accurate...) 
- linear: i + (j - i) * (x-i)/(j-i), where (x-i)/(j-i) is the fractional part of the index surrounded by i > j.  **<-- Default**
- lower: i.
- higher: j.
- nearest: i or j whichever is nearest.
- midpoint: (i + j) / 2.

#### numpy percentile. 

https://numpy.org/doc/stable/reference/generated/numpy.percentile.html

Similar, but more involved: 

This parameter specifies the method to use for estimating the percentile. There are many different methods, some unique to NumPy. See the notes for explanation. The options sorted by their R type as summarized in the H&F paper [1] are:

- ‘inverted_cdf’
- ‘averaged_inverted_cdf’
- ‘closest_observation’
- ‘interpolated_inverted_cdf’
- ‘hazen’
- ‘weibull’
- ‘linear’ **<--default**
- ‘median_unbiased’
- ‘normal_unbiased’



In [47]:
# Here is an example of using Quantile()..Same results. 
celtics.groupby('position').PTS.quantile(q=.25) 

position
C      448.00
PF     556.75
PG     411.00
SF     125.25
SG    1107.00
Name: PTS, dtype: float64

### Other helpful aggregations w/ Groups. 

A comprehensive list is here https://pandas.pydata.org/docs/reference/groupby.html

In [49]:
# this groups the first few players at each position based on their index. (not terribly useful unless you filter it..) 
celtics.groupby('position').head(2)

Unnamed: 0,number,player,position,height,weight,birth_date,country_code,experience,college,on-off,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,position_group
0,42.0,Al Horford,C,6-9,240.0,June 3 1986,do,16,Florida,0.4,37.0,65.0,33.0,1740.0,214.0,419.0,0.511,108.0,258.0,0.419,106.0,161.0,0.658,0.64,26.0,30.0,0.867,82.0,331.0,413.0,168.0,38.0,62.0,48.0,93.0,562.0,center
2,9.0,Derrick White,SG,6-4,190.0,July 2 1994,us,6,Colorado-Colorado Springs Colorado,0.6,29.0,73.0,73.0,2381.0,387.0,839.0,0.461,196.0,495.0,0.396,191.0,344.0,0.555,0.578,137.0,152.0,0.901,51.0,259.0,310.0,377.0,74.0,87.0,112.0,152.0,1107.0,guard
4,13.0,Drew Peterson (TW),PF,6-9,205.0,November 9 1999,us,R,Rice University USC,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,forward
6,20.0,JD Davison (TW),SG,6-1,195.0,October 3 2002,us,1,Alabama,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,guard
7,44.0,Jaden Springer,PG,6-4,204.0,September 25 2002,us,2,Tennessee,0.0,21.0,17.0,1.0,130.0,13.0,30.0,0.433,2.0,11.0,0.182,11.0,19.0,0.579,0.467,7.0,8.0,0.875,8.0,12.0,20.0,9.0,11.0,4.0,8.0,17.0,35.0,guard
8,7.0,Jaylen Brown,SF,6-6,223.0,October 24 1996,us,7,California,0.6,27.0,70.0,70.0,2343.0,627.0,1256.0,0.499,145.0,410.0,0.354,482.0,846.0,0.57,0.557,211.0,300.0,0.703,84.0,303.0,387.0,249.0,83.0,37.0,166.0,185.0,1610.0,forward
9,0.0,Jayson Tatum,PF,6-8,210.0,March 3 1998,us,6,Duke,0.7,25.0,74.0,74.0,2645.0,672.0,1426.0,0.471,229.0,609.0,0.376,443.0,817.0,0.542,0.552,414.0,497.0,0.833,67.0,534.0,601.0,364.0,75.0,43.0,188.0,145.0,1987.0,forward
10,27.0,Jordan Walsh,SF,6-7,205.0,March 3 2004,us,R,Arkansas,0.0,19.0,9.0,1.0,83.0,6.0,15.0,0.4,2.0,9.0,0.222,4.0,6.0,0.667,0.467,1.0,2.0,0.5,5.0,15.0,20.0,5.0,5.0,1.0,3.0,11.0,15.0,forward
11,4.0,Jrue Holiday,PG,6-4,205.0,June 12 1990,us,14,UCLA,0.6,33.0,69.0,69.0,2263.0,331.0,689.0,0.48,138.0,322.0,0.429,193.0,367.0,0.526,0.581,60.0,72.0,0.833,84.0,289.0,373.0,333.0,61.0,53.0,124.0,108.0,860.0,guard
12,8.0,Kristaps Porziņģis,C,7-2,240.0,August 2 1995,lv,7,,0.4,28.0,57.0,57.0,1690.0,388.0,752.0,0.516,110.0,293.0,0.375,278.0,459.0,0.606,0.589,259.0,302.0,0.858,97.0,312.0,409.0,115.0,42.0,111.0,89.0,156.0,1145.0,center


In [50]:
# first entry
celtics.groupby('position').first()

Unnamed: 0_level_0,number,player,height,weight,birth_date,country_code,experience,college,on-off,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,position_group
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
C,42.0,Al Horford,6-9,240.0,June 3 1986,do,16,Florida,0.4,37.0,65.0,33.0,1740.0,214.0,419.0,0.511,108.0,258.0,0.419,106.0,161.0,0.658,0.64,26.0,30.0,0.867,82.0,331.0,413.0,168.0,38.0,62.0,48.0,93.0,562.0,center
PF,13.0,Drew Peterson (TW),6-9,205.0,November 9 1999,us,R,Rice University USC,0.0,25.0,74.0,74.0,2645.0,672.0,1426.0,0.471,229.0,609.0,0.376,443.0,817.0,0.542,0.552,414.0,497.0,0.833,67.0,534.0,601.0,364.0,75.0,43.0,188.0,145.0,1987.0,forward
PG,44.0,Jaden Springer,6-4,204.0,September 25 2002,us,2,Tennessee,0.0,21.0,17.0,1.0,130.0,13.0,30.0,0.433,2.0,11.0,0.182,11.0,19.0,0.579,0.467,7.0,8.0,0.875,8.0,12.0,20.0,9.0,11.0,4.0,8.0,17.0,35.0,guard
SF,7.0,Jaylen Brown,6-6,223.0,October 24 1996,us,7,California,0.6,27.0,70.0,70.0,2343.0,627.0,1256.0,0.499,145.0,410.0,0.354,482.0,846.0,0.57,0.557,211.0,300.0,0.703,84.0,303.0,387.0,249.0,83.0,37.0,166.0,185.0,1610.0,forward
SG,9.0,Derrick White,6-4,190.0,July 2 1994,us,6,Colorado-Colorado Springs Colorado,0.6,29.0,73.0,73.0,2381.0,387.0,839.0,0.461,196.0,495.0,0.396,191.0,344.0,0.555,0.578,137.0,152.0,0.901,51.0,259.0,310.0,377.0,74.0,87.0,112.0,152.0,1107.0,guard


In [51]:
# last entry
celtics.groupby('position').last()

Unnamed: 0_level_0,number,player,height,weight,birth_date,country_code,experience,college,on-off,age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,position_group
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
C,88.0,Neemias Queta,7-0,245.0,July 13 1999,pt,2,Utah State University,0.1,24.0,28.0,0.0,333.0,67.0,104.0,0.644,0.0,0.0,1.0,67.0,104.0,0.644,0.644,20.0,28.0,0.714,53.0,69.0,122.0,20.0,13.0,21.0,13.0,50.0,334.0,center
PF,26.0,Xavier Tillman Sr.,6-8,245.0,January 12 1999,us,3,Michigan State,0.1,25.0,20.0,2.0,274.0,34.0,66.0,0.515,8.0,28.0,0.286,26.0,38.0,0.684,0.576,4.0,7.0,0.571,14.0,40.0,54.0,20.0,9.0,9.0,5.0,15.0,80.0,forward
PG,11.0,Payton Pritchard,6-1,195.0,January 28 1998,us,3,Oregon,0.4,26.0,82.0,5.0,1825.0,297.0,635.0,0.468,147.0,382.0,0.385,150.0,253.0,0.593,0.583,46.0,56.0,0.821,70.0,195.0,265.0,281.0,39.0,6.0,61.0,106.0,787.0,guard
SF,50.0,Svi Mykhailiuk,6-7,205.0,June 10 1997,ua,5,Kansas,0.1,26.0,41.0,2.0,413.0,57.0,137.0,0.416,42.0,108.0,0.389,15.0,29.0,0.517,0.569,6.0,9.0,0.667,11.0,40.0,51.0,35.0,11.0,1.0,12.0,19.0,162.0,forward
SG,20.0,JD Davison (TW),6-1,195.0,October 3 2002,us,1,Alabama,0.0,29.0,73.0,73.0,2381.0,387.0,839.0,0.461,196.0,495.0,0.396,191.0,344.0,0.555,0.578,137.0,152.0,0.901,51.0,259.0,310.0,377.0,74.0,87.0,112.0,152.0,1107.0,guard
