# Chapter 12 - Advanced Pandas

## Advanced GroupBy Use

In [1]:
import pandas as pd

In [2]:
# Read from CSV file
df = pd.read_csv('dataset-A3-loans.csv')
df2 = df.copy()[['id', 'grade', 'funded_amount', 'interest_rate']]
df2 = df2[df2.grade.isin(['C', 'D', 'E', 'F'])]
_ = df2.reset_index(inplace=True, drop=True)
display(df2)
g = df2.groupby('grade')['funded_amount']
print(g)

Unnamed: 0,id,grade,funded_amount,interest_rate
0,721751,D,7000.0,14.91
1,40277218,D,16800.0,16.49
2,59481461,C,8000.0,12.29
3,55917749,C,5000.0,12.29
4,1149328,C,11500.0,16.29
5,1614457,C,6000.0,15.8
6,31999103,C,11000.0,14.49
7,42221389,C,2875.0,12.69
8,62082707,C,3225.0,13.33
9,62042554,C,18000.0,12.29


<pandas.core.groupby.SeriesGroupBy object at 0x113d28e80>


In [3]:
g = df2.groupby('grade').funded_amount
g.mean()

grade
C    10670.000000
D    12568.750000
E    18683.333333
F    25000.000000
Name: funded_amount, dtype: float64

In [4]:
df2['grade_mean'] = g.transform(lambda x : x.mean())
display(df2)

Unnamed: 0,id,grade,funded_amount,interest_rate,grade_mean
0,721751,D,7000.0,14.91,12568.75
1,40277218,D,16800.0,16.49,12568.75
2,59481461,C,8000.0,12.29,10670.0
3,55917749,C,5000.0,12.29,10670.0
4,1149328,C,11500.0,16.29,10670.0
5,1614457,C,6000.0,15.8,10670.0
6,31999103,C,11000.0,14.49,10670.0
7,42221389,C,2875.0,12.69,10670.0
8,62082707,C,3225.0,13.33,10670.0
9,62042554,C,18000.0,12.29,10670.0


In [5]:
def normalise(x):
    return (x - x.mean())/x.std()

In [6]:
display(df2.groupby('grade')['funded_amount'].transform(normalise))
display(df2.groupby('grade')['funded_amount'].apply(normalise))

0    -0.659240
1     0.500904
2    -0.334472
3    -0.710284
4     0.103974
5    -0.585013
6     0.041339
7    -0.976483
8    -0.932639
9     0.918232
10    1.538321
11   -1.037469
12    1.152006
13    0.079710
14         NaN
15   -0.960824
16    0.957760
17    1.920396
18   -0.993670
19   -1.211365
20   -0.585013
21    0.855597
22    0.918232
Name: funded_amount, dtype: float64

0    -0.659240
1     0.500904
2    -0.334472
3    -0.710284
4     0.103974
5    -0.585013
6     0.041339
7    -0.976483
8    -0.932639
9     0.918232
10    1.538321
11   -1.037469
12    1.152006
13    0.079710
14         NaN
15   -0.960824
16    0.957760
17    1.920396
18   -0.993670
19   -1.211365
20   -0.585013
21    0.855597
22    0.918232
Name: funded_amount, dtype: float64

**References:**

Python for Data Analysis, 2nd Edition, McKinney (2017)