In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

In [3]:
s1 = pd.Series(2)
s1

0    2
dtype: int64

In [4]:
# get value with lable 0
print(s1[0])

2


This looks like a normal array access of the item at position zero in the array, but pandas really references the index of the Series for a label of value 0 and then return the matching values.

In [5]:
# create from a list
s2 = pd.Series([1, 2, 3, 4, 5])
s2

0    1
1    2
2    3
3    4
4    5
dtype: int64

## Lookup values


In [9]:
s3 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s3)
print(s3['a'])
# same to
print(s3[0])

a    1
b    2
c    3
dtype: int64
1
1


In [11]:
print(s3[['a', 'c']])

a    1
c    3
dtype: int64


In [12]:
s5 = pd.Series([1, 2, 3], index=[10, 11, 12])
s5

10    1
11    2
12    3
dtype: int64

In [13]:
s5[11]

2

In [17]:
# error
#s5[1]

print(s5.loc[11])
print(s5.iloc[1])

2
2


In [18]:
print(s5.loc[[10, 12]])
print(s5.iloc[[0, 2]])

10    1
12    3
dtype: int64
10    1
12    3
dtype: int64


In [22]:
print(s3)
print(s3.ix[[0, 2]])
print(s3.ix[['a', 'c']])

a    1
b    2
c    3
dtype: int64
a    1
c    3
dtype: int64
a    1
c    3
dtype: int64


## Alignment via index labels
A fundamental difference between a NumPy ndarray and a pandas Series is the ability of a Series to automatically align data from another Series based on label values before performing an operation.

In [23]:
s6 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s6

a    1
b    2
c    3
d    4
dtype: int64

In [24]:
s7 = pd.Series([4, 3, 2, 1], index=['d', 'c', 'b', 'a'])
s7

d    4
c    3
b    2
a    1
dtype: int64

In [25]:
s6 + s7

a    2
b    4
c    6
d    8
dtype: int64

In [27]:
# the order is different from numpy arrays
a1 = np.arange(1, 5)
a2 = np.arange(4, 0, -1)
print(a1)
print(a2)
a1 + a2

[1 2 3 4]
[4 3 2 1]


array([5, 5, 5, 5])

## Arithmetic operations

In [28]:
s3 * 2

a    2
b    4
c    6
dtype: int64

In [29]:
t = pd.Series(2, s3.index)
s3 * t

a    2
b    4
c    6
dtype: int64

In [30]:
# alignment
s8 = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 5})
s8

a    1
b    2
c    3
d    5
dtype: int64

In [32]:
s9 = pd.Series({'b': 6, 'c': 7, 'd': 9, 'e': 10})
s9

b     6
c     7
d     9
e    10
dtype: int64

In [33]:
s8 + s9

a   NaN
b     8
c    10
d    14
e   NaN
dtype: float64

## The special case of NaN

In [38]:
nda = np.array([1, 2, 3, 4, 5])
print(nda.mean())
nda = np.array([1, 2, 3, 4, np.NAN])
print(nda.mean())

3.0
nan


In [40]:
s = pd.Series(nda)
print(s.mean())
print(s.mean(skipna=False))

2.5
nan


## Boolean selection


In [41]:
s = pd.Series(np.arange(0, 10))
print(s[s > 5])

6    6
7    7
8    8
9    9
dtype: int64


In [42]:
s[(s > 5) & (s < 8)]

6    6
7    7
dtype: int64

In [43]:
(s >= 0).all()

True

In [44]:
s[s < 2].any()

True

In [45]:
# counting
(s < 2).sum()

2

## Reindexing a Series


In [49]:
np.random.seed(123)
s = pd.Series(np.random.randn(5))
s

0   -1.085631
1    0.997345
2    0.282978
3   -1.506295
4   -0.578600
dtype: float64

In [50]:
s.index = ['a', 'b', 'c', 'd', 'e']
s

a   -1.085631
b    0.997345
c    0.282978
d   -1.506295
e   -0.578600
dtype: float64

In [53]:
np.random.seed(123456)
s1 = pd.Series(np.random.randn(3))
s2 = pd.Series(np.random.randn(3))
combined = pd.concat([s1, s2])
combined.index = np.arange(0, len(combined))
combined

0    0.469112
1   -0.282863
2   -1.509059
3   -1.135632
4    1.212112
5   -0.173215
dtype: float64

In [55]:
np.random.seed(123456)
s1 = pd.Series(np.random.randn(4), ['a', 'b', 'c', 'd'])
s2 = s1.reindex(['a', 'b', 'g'])
s2

a    0.469112
b   -0.282863
g         NaN
dtype: float64

In [56]:
s2.index

Index(['a', 'b', 'g'], dtype='object')

In [58]:
s1 = pd.Series([0, 1, 2], index=[0, 1, 2])
s2 = pd.Series([3, 4, 5], index=['0', '1', '2'])
s2.index = s2.index.values.astype(int)
print(s1 + s2)

0    3
1    5
2    7
dtype: int64


In [59]:
s2 = s.copy()
s2.reindex(['a', 'f'], fill_value=0)

a   -1.085631
f    0.000000
dtype: float64

In [62]:
#s3.reindex(np.arange(0, 7), method='ffill')

## Modifying a Series in-place

In [63]:
np.random.seed(123456)
s = pd.Series(np.random.randn(3), index=['a', 'b', 'c'])
s

a    0.469112
b   -0.282863
c   -1.509059
dtype: float64

In [65]:
s['d'] = 100
s

a      0.469112
b     -0.282863
c     -1.509059
d    100.000000
dtype: float64

In [67]:
s['d'] = -100
s

a      0.469112
b     -0.282863
c     -1.509059
d   -100.000000
dtype: float64

In [68]:
del(s['a'])
s

b     -0.282863
c     -1.509059
d   -100.000000
dtype: float64

## Powerful slicing

In [69]:
s = pd.Series(np.arange(0, 5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [70]:
s[1:3]

b    1
c    2
dtype: int64

In [71]:
s['b':'d']

b    1
c    2
d    3
dtype: int64