In [1]:
# import check for numpy and pandas
import numpy as np
import pandas as pd

In [2]:
print(np.__version__)
print(pd.__version__)

1.18.1
1.0.1


## Python Scientific Library: Numpy

### Arrays

In [3]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [4]:
a = np.array([[1, 2, 3], [4, 5, 6]])
print(a.shape)

(2, 3)


In [5]:
a = np.arange(24)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [6]:
a = np.empty([3, 2], dtype=int)
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [7]:
x = np.zeros(5)
x

array([0., 0., 0., 0., 0.])

In [8]:
x = np.ones(5)
x

array([1., 1., 1., 1., 1.])

In [9]:
x = np.ones([2, 2], dtype=int)
x

array([[1, 1],
       [1, 1]])

In [10]:
x = [1, 2, 3]
a = np.asarray(x)
a

array([1, 2, 3])

In [11]:
x = (1, 2, 3)
a = np.asarray(x)
a

array([1, 2, 3])

In [12]:
x = np.linspace(10, 20, 5)
x

array([10. , 12.5, 15. , 17.5, 20. ])

In [13]:
x = np.linspace(10, 20, 5, endpoint=False)
x

array([10., 12., 14., 16., 18.])

In [14]:
x = np.linspace(1, 2, 5, retstep=True)
x

(array([1.  , 1.25, 1.5 , 1.75, 2.  ]), 0.25)

In [15]:
x = np.array([[1, 2], [3, 4], [5, 6]])
x

array([[1, 2],
       [3, 4],
       [5, 6]])

In [16]:
y = x[[0, 1, 2], [0, 0, 0]]  # integer array indexing
print(y)
y2 = x[[0, 1, 2], [0, 1, 0]]
print(y2)

[1 3 5]
[1 4 5]


In [17]:
print(x[x > 5])

[6]


In [18]:
a = np.array([np.nan, 1, 2, np.nan, 3, 4, 5])
a[~np.isnan(a)]

array([1., 2., 3., 4., 5.])

In [19]:
a = np.array([[3, 7, 5], [8, 4, 3], [2, 4, 9]])
a

array([[3, 7, 5],
       [8, 4, 3],
       [2, 4, 9]])

In [20]:
print(np.amin(a, 1))
print(np.amin(a, 0))
print(np.amax(a))
print(np.amax(a, axis=0))
print(np.amax(a, axis=1))

[3 3 2]
[2 4 3]
9
[8 7 9]
[7 8 9]


## Python Scientific Library: Pandas

Two widely used Data Structures from pandas:
- `Series`, 1-D, Non-mutable
```
pandas.Series(data, index, dtype, copy)
```
- `DataFrame`, 2-D, Mutable
```
pandas.DataFrame(data, index, columns, dtype, copy)
```

### Series

In [21]:
data = np.array(['a', 'b', 'c', 'd'])
s = pd.Series(data)
s

0    a
1    b
2    c
3    d
dtype: object

In [22]:
data = np.array(['a', 'b', 'c', 'd'])
s = pd.Series(data, index=[100, 101, 102, 103])
s

100    a
101    b
102    c
103    d
dtype: object

In [23]:
data = {'a': 0., 'b': 1., 'c': 2.}  # Note the decimal 
s = pd.Series(data)
s

a    0.0
b    1.0
c    2.0
dtype: float64

In [24]:
s = pd.Series(5, index=range(4))
s

0    5
1    5
2    5
3    5
dtype: int64

### Series data access

In [25]:
s = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [26]:
print(s[0])
print(s[:3])
print(s[-3:])
print(s['a'])
print(s[['a', 'c', 'd']])

1
a    1
b    2
c    3
dtype: int64
c    3
d    4
e    5
dtype: int64
1
a    1
c    3
d    4
dtype: int64


### DataFrame

In [27]:
data = [1, 2, 3, 4, 5]
df = pd.DataFrame(data)
df

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [28]:
data = [['Alex', 10], ['Bob', 12], ['Clarke', 13]]
df = pd.DataFrame(data, columns=['Name', 'Age'])
df

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,13


In [29]:
data = [['Alex', 10], ['Bob', 12], ['Clarke', 13]]
df = pd.DataFrame(data, columns=['Name', 'Age'], dtype=float)
df

Unnamed: 0,Name,Age
0,Alex,10.0
1,Bob,12.0
2,Clarke,13.0


In [30]:
data = {'Name': ['Tom', 'Jack', 'Steve', 'Ricky'],
        'Age': [28, 34, 29, 42]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


In [31]:
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3', 'rank4'])
df

Unnamed: 0,Name,Age
rank1,Tom,28
rank2,Jack,34
rank3,Steve,29
rank4,Ricky,42


In [32]:
data = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [33]:
df = pd.DataFrame(data, index=['first', 'second'], columns=['a', 'b1'])
df

Unnamed: 0,a,b1
first,1,
second,5,


In [34]:
d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


### Column operations

In [35]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [36]:
df['three'] = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
del df['one']
df

Unnamed: 0,two,three
a,1,10.0
b,2,20.0
c,3,30.0
d,4,


### Row operations

In [37]:
df.loc['b']

two       2.0
three    20.0
Name: b, dtype: float64

In [38]:
df.iloc[2]

two       3.0
three    30.0
Name: c, dtype: float64

In [39]:
df[2:4]

Unnamed: 0,two,three
c,3,30.0
d,4,


In [40]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['a', 'b'])
df = df.append(df2)
df

Unnamed: 0,a,b
0,1,2
1,3,4
0,5,6
1,7,8


In [41]:
df = df.drop(0)
df

Unnamed: 0,a,b
1,3,4
1,7,8


### More pandas operations and descriptive statistics

In [42]:
d = {'Name': pd.Series(['Tom', 'James', 'Ricky', 'Vin', 'Steve']),
     'Age': pd.Series([25, 26, 25, 23, 30]),
     'Rating': pd.Series([4.23, 3.24, 3.98, 2.56, 3.20])}
df = pd.DataFrame(d)

In [43]:
df

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2


In [44]:
df.T  # transpose

Unnamed: 0,0,1,2,3,4
Name,Tom,James,Ricky,Vin,Steve
Age,25,26,25,23,30
Rating,4.23,3.24,3.98,2.56,3.2


In [45]:
df.axes

[RangeIndex(start=0, stop=5, step=1),
 Index(['Name', 'Age', 'Rating'], dtype='object')]

In [46]:
df.dtypes

Name       object
Age         int64
Rating    float64
dtype: object

In [47]:
df.shape

(5, 3)

In [48]:
df.size

15

In [49]:
df.values

array([['Tom', 25, 4.23],
       ['James', 26, 3.24],
       ['Ricky', 25, 3.98],
       ['Vin', 23, 2.56],
       ['Steve', 30, 3.2]], dtype=object)

In [50]:
df.head(2)

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24


In [51]:
df.tail(3)

Unnamed: 0,Name,Age,Rating
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2


In [52]:
df.sum()

Name      TomJamesRickyVinSteve
Age                         129
Rating                    17.21
dtype: object

In [53]:
df.sum(1)

0    29.23
1    29.24
2    28.98
3    25.56
4    33.20
dtype: float64

In [54]:
df.mean()

Age       25.800
Rating     3.442
dtype: float64

In [55]:
df.std()

Age       2.588436
Rating    0.668521
dtype: float64

In [56]:
df.describe()

Unnamed: 0,Age,Rating
count,5.0,5.0
mean,25.8,3.442
std,2.588436,0.668521
min,23.0,2.56
25%,25.0,3.2
50%,25.0,3.24
75%,26.0,3.98
max,30.0,4.23


In [57]:
df.describe(include=['object'])

Unnamed: 0,Name
count,5
unique,5
top,Vin
freq,1


In [58]:
df.describe(include='all')

Unnamed: 0,Name,Age,Rating
count,5,5.0,5.0
unique,5,,
top,Vin,,
freq,1,,
mean,,25.8,3.442
std,,2.588436,0.668521
min,,23.0,2.56
25%,,25.0,3.2
50%,,25.0,3.24
75%,,26.0,3.98


### More row/column  wise operations

In [59]:
df = pd.DataFrame(np.random.randn(5, 3), columns=['col1', 'col2', 'col3'])
df

Unnamed: 0,col1,col2,col3
0,1.219963,0.1183,0.998317
1,-1.212981,0.764604,-1.310311
2,-0.519956,0.200389,0.538601
3,-0.591756,1.035061,0.977846
4,-0.092092,-0.612718,-0.669293


In [60]:
df.apply(np.mean)

col1   -0.239364
col2    0.301127
col3    0.107032
dtype: float64

In [61]:
df.apply(np.mean, axis=1)

0    0.778860
1   -0.586230
2    0.073011
3    0.473717
4   -0.458034
dtype: float64

In [62]:
df.apply(lambda x: x.max() - x.min())

col1    2.432945
col2    1.647779
col3    2.308628
dtype: float64

In [63]:
df['col1'].map(lambda x: x*100)

0    121.996341
1   -121.298112
2    -51.995625
3    -59.175557
4     -9.209221
Name: col1, dtype: float64

### Iterate over DataFrame

In [64]:
N = 20
df = pd.DataFrame({
    'A': pd.date_range(start='2016-01-01', periods=N, freq='D'),
    'x': np.linspace(0, stop=N-1, num=N),
    'y': np.random.rand(N),
    'C': np.random.choice(['Low', 'Medium', 'High'], N).tolist(),
    'D': np.random.normal(100, 10, size=(N)).tolist()
})

In [65]:
for key, val in df.iteritems():
    print(key, val)

A 0    2016-01-01
1    2016-01-02
2    2016-01-03
3    2016-01-04
4    2016-01-05
5    2016-01-06
6    2016-01-07
7    2016-01-08
8    2016-01-09
9    2016-01-10
10   2016-01-11
11   2016-01-12
12   2016-01-13
13   2016-01-14
14   2016-01-15
15   2016-01-16
16   2016-01-17
17   2016-01-18
18   2016-01-19
19   2016-01-20
Name: A, dtype: datetime64[ns]
x 0      0.0
1      1.0
2      2.0
3      3.0
4      4.0
5      5.0
6      6.0
7      7.0
8      8.0
9      9.0
10    10.0
11    11.0
12    12.0
13    13.0
14    14.0
15    15.0
16    16.0
17    17.0
18    18.0
19    19.0
Name: x, dtype: float64
y 0     0.421398
1     0.709884
2     0.071989
3     0.122258
4     0.553008
5     0.077755
6     0.962122
7     0.694047
8     0.215247
9     0.703116
10    0.373046
11    0.470834
12    0.092956
13    0.084624
14    0.376133
15    0.128552
16    0.639441
17    0.632546
18    0.217071
19    0.783587
Name: y, dtype: float64
C 0       High
1        Low
2     Medium
3       High
4        Low
5       

### DataFrame Sorting

In [66]:
df = pd.DataFrame(np.random.randn(10, 2),
                  index=[1, 4, 6, 2, 3, 5, 9, 8, 0, 7],
                  columns=['col1', 'col2'])
df

Unnamed: 0,col1,col2
1,1.554915,-0.23852
4,-2.733059,-0.510555
6,-0.302432,-1.96536
2,0.317515,0.544764
3,0.183908,0.500179
5,-0.699678,0.614794
9,1.374636,2.419605
8,-0.187613,2.099046
0,-0.695094,1.273796
7,-0.744572,-1.19112


In [67]:
df.sort_index()

Unnamed: 0,col1,col2
0,-0.695094,1.273796
1,1.554915,-0.23852
2,0.317515,0.544764
3,0.183908,0.500179
4,-2.733059,-0.510555
5,-0.699678,0.614794
6,-0.302432,-1.96536
7,-0.744572,-1.19112
8,-0.187613,2.099046
9,1.374636,2.419605


In [68]:
df.sort_index(ascending=False)

Unnamed: 0,col1,col2
9,1.374636,2.419605
8,-0.187613,2.099046
7,-0.744572,-1.19112
6,-0.302432,-1.96536
5,-0.699678,0.614794
4,-2.733059,-0.510555
3,0.183908,0.500179
2,0.317515,0.544764
1,1.554915,-0.23852
0,-0.695094,1.273796


In [69]:
df.sort_values(by='col1')

Unnamed: 0,col1,col2
4,-2.733059,-0.510555
7,-0.744572,-1.19112
5,-0.699678,0.614794
0,-0.695094,1.273796
6,-0.302432,-1.96536
8,-0.187613,2.099046
3,0.183908,0.500179
2,0.317515,0.544764
9,1.374636,2.419605
1,1.554915,-0.23852


### Index & Selecting

In [70]:
df = pd.DataFrame(np.random.randn(8, 4),
                  index=['a','b','c','d','e','f','g','h'],
                  columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
a,0.601021,-1.041482,-0.636807,-0.391878
b,0.458838,-0.221465,0.343558,0.385834
c,-0.22318,1.002784,-0.170332,0.582287
d,-0.712982,-0.2067,0.007127,-0.458834
e,-0.513955,-0.582114,0.780118,1.441621
f,1.108678,-1.741129,-0.334045,0.499579
g,-1.049725,0.691149,-1.0737,1.037459
h,-0.183965,-0.467123,-0.265452,0.22263


In [71]:
df.loc[:, 'A']

a    0.601021
b    0.458838
c   -0.223180
d   -0.712982
e   -0.513955
f    1.108678
g   -1.049725
h   -0.183965
Name: A, dtype: float64

In [72]:
df.loc[:, ['A', 'C']]

Unnamed: 0,A,C
a,0.601021,-0.636807
b,0.458838,0.343558
c,-0.22318,-0.170332
d,-0.712982,0.007127
e,-0.513955,0.780118
f,1.108678,-0.334045
g,-1.049725,-1.0737
h,-0.183965,-0.265452


In [73]:
df.loc[['a', 'b', 'f', 'h'], ['A', 'C']]

Unnamed: 0,A,C
a,0.601021,-0.636807
b,0.458838,0.343558
f,1.108678,-0.334045
h,-0.183965,-0.265452


In [74]:
df.iloc[:4]

Unnamed: 0,A,B,C,D
a,0.601021,-1.041482,-0.636807,-0.391878
b,0.458838,-0.221465,0.343558,0.385834
c,-0.22318,1.002784,-0.170332,0.582287
d,-0.712982,-0.2067,0.007127,-0.458834


In [75]:
df.iloc[1:5, 2:4]

Unnamed: 0,C,D
b,0.343558,0.385834
c,-0.170332,0.582287
d,0.007127,-0.458834
e,0.780118,1.441621


### Missing data

In [76]:
df = pd.DataFrame(np.random.randn(5, 3),
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])
df

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
c,-0.124086,-0.321431,0.230911
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
h,1.334257,1.350098,-0.844321


In [77]:
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
b,,,
c,-0.124086,-0.321431,0.230911
d,,,
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
g,,,
h,1.334257,1.350098,-0.844321


In [78]:
df['one'].isnull()

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

In [79]:
df['one'].notnull()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: one, dtype: bool

In [80]:
df.fillna(0)

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
b,0.0,0.0,0.0
c,-0.124086,-0.321431,0.230911
d,0.0,0.0,0.0
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
g,0.0,0.0,0.0
h,1.334257,1.350098,-0.844321


In [81]:
df.fillna(method='pad')

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
b,-2.353099,0.303269,-0.91406
c,-0.124086,-0.321431,0.230911
d,-0.124086,-0.321431,0.230911
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
g,1.227695,-0.353066,-1.324609
h,1.334257,1.350098,-0.844321


In [82]:
df.fillna(method='backfill')

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
b,-0.124086,-0.321431,0.230911
c,-0.124086,-0.321431,0.230911
d,-0.307877,-0.866465,-0.999702
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
g,1.334257,1.350098,-0.844321
h,1.334257,1.350098,-0.844321


In [83]:
df.dropna()

Unnamed: 0,one,two,three
a,-2.353099,0.303269,-0.91406
c,-0.124086,-0.321431,0.230911
e,-0.307877,-0.866465,-0.999702
f,1.227695,-0.353066,-1.324609
h,1.334257,1.350098,-0.844321


### Grouping data

In [84]:
ipl_data = {
    'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings', 'kings',
             'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
    'Rank': [1, 2, 2, 3, 3, 4, 1, 1, 2, 4, 1, 2],
    'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
    'Points': [876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)
df

Unnamed: 0,Team,Rank,Year,Points
0,Riders,1,2014,876
1,Riders,2,2015,789
2,Devils,2,2014,863
3,Devils,3,2015,673
4,Kings,3,2014,741
5,kings,4,2015,812
6,Kings,1,2016,756
7,Kings,1,2017,788
8,Riders,2,2016,694
9,Royals,4,2014,701


In [85]:
df.groupby('Team').groups

{'Devils': Int64Index([2, 3], dtype='int64'),
 'Kings': Int64Index([4, 6, 7], dtype='int64'),
 'Riders': Int64Index([0, 1, 8, 11], dtype='int64'),
 'Royals': Int64Index([9, 10], dtype='int64'),
 'kings': Int64Index([5], dtype='int64')}

In [86]:
df.groupby(['Team', 'Year']).groups

{('Devils', 2014): Int64Index([2], dtype='int64'),
 ('Devils', 2015): Int64Index([3], dtype='int64'),
 ('Kings', 2014): Int64Index([4], dtype='int64'),
 ('Kings', 2016): Int64Index([6], dtype='int64'),
 ('Kings', 2017): Int64Index([7], dtype='int64'),
 ('Riders', 2014): Int64Index([0], dtype='int64'),
 ('Riders', 2015): Int64Index([1], dtype='int64'),
 ('Riders', 2016): Int64Index([8], dtype='int64'),
 ('Riders', 2017): Int64Index([11], dtype='int64'),
 ('Royals', 2014): Int64Index([9], dtype='int64'),
 ('Royals', 2015): Int64Index([10], dtype='int64'),
 ('kings', 2015): Int64Index([5], dtype='int64')}

In [87]:
grouped = df.groupby('Year')
for name, group in grouped:
    print(name)
    print(group)

2014
     Team  Rank  Year  Points
0  Riders     1  2014     876
2  Devils     2  2014     863
4   Kings     3  2014     741
9  Royals     4  2014     701
2015
      Team  Rank  Year  Points
1   Riders     2  2015     789
3   Devils     3  2015     673
5    kings     4  2015     812
10  Royals     1  2015     804
2016
     Team  Rank  Year  Points
6   Kings     1  2016     756
8  Riders     2  2016     694
2017
      Team  Rank  Year  Points
7    Kings     1  2017     788
11  Riders     2  2017     690


In [88]:
left = pd.DataFrame({
    'id': [1,2,3,4,5],
    'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
    'subject_id': ['sub1', 'sub2', 'sub4', 'sub6', 'sub5']})
right = pd.DataFrame({
    'id': [1,2,3,4,5],
    'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
    'subject_id': ['sub2', 'sub4', 'sub3', 'sub6', 'sub5']})

In [89]:
left

Unnamed: 0,id,Name,subject_id
0,1,Alex,sub1
1,2,Amy,sub2
2,3,Allen,sub4
3,4,Alice,sub6
4,5,Ayoung,sub5


In [90]:
right

Unnamed: 0,id,Name,subject_id
0,1,Billy,sub2
1,2,Brian,sub4
2,3,Bran,sub3
3,4,Bryce,sub6
4,5,Betty,sub5


In [91]:
pd.merge(left, right, on='id')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5


In [92]:
pd.merge(left, right, on='subject_id', how='left')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1,Alex,sub1,,
1,2,Amy,sub2,1.0,Billy
2,3,Allen,sub4,2.0,Brian
3,4,Alice,sub6,4.0,Bryce
4,5,Ayoung,sub5,5.0,Betty


In [93]:
pd.merge(left, right, on='subject_id', how='inner')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,2,Amy,sub2,1,Billy
1,3,Allen,sub4,2,Brian
2,4,Alice,sub6,4,Bryce
3,5,Ayoung,sub5,5,Betty


In [94]:
pd.merge(left, right, how='outer', on='subject_id')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1.0,Alex,sub1,,
1,2.0,Amy,sub2,1.0,Billy
2,3.0,Allen,sub4,2.0,Brian
3,4.0,Alice,sub6,4.0,Bryce
4,5.0,Ayoung,sub5,5.0,Betty
5,,,sub3,3.0,Bran


In [95]:
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11e59bc90>

In [96]:
grouped.get_group(2014)

Unnamed: 0,Team,Rank,Year,Points
0,Riders,1,2014,876
2,Devils,2,2014,863
4,Kings,3,2014,741
9,Royals,4,2014,701


In [97]:
grouped['Points'].agg(np.mean)

Year
2014    795.25
2015    769.50
2016    725.00
2017    739.00
Name: Points, dtype: float64

In [98]:
grouped['Points'].agg([np.sum, np.mean, np.std])

Unnamed: 0_level_0,sum,mean,std
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,3181,795.25,87.439026
2015,3078,769.5,65.035888
2016,1450,725.0,43.84062
2017,1478,739.0,69.296465
