# a one-dimentional data structure: Series

In [1]:
import numpy as np
import pandas as pd



Create from ndarray

In [2]:
s = pd.Series(np.random.rand(5), index=["a", "b", "c", "d", "e"])

s

a    0.562967
b    0.050268
c    0.545442
d    0.742767
e    0.522604
dtype: float64

In [3]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
# if not specify index, [1, ..., len(data)-1] will be the index.
pd.Series(np.random.randn(5))

0    0.828039
1    1.156762
2   -0.861705
3   -0.275534
4   -0.965933
dtype: float64

Create from dictionary.

In [5]:
d = {'a':0., 'b':1., 'c':2.}
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [6]:
# if given index, will read value for corresponding key. if some key not exists, the value will be NaN.
pd.Series(d, index=['b', 'd', 'a'])

b    1.0
d    NaN
a    0.0
dtype: float64

Create from value.  
If data are value, index must be specified. So the length of Series will equal to the length of index.

In [7]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

## Some operations

In [8]:
s

a    0.562967
b    0.050268
c    0.545442
d    0.742767
e    0.522604
dtype: float64

In [10]:
s.iloc[0]

0.5629672175298958

In [11]:
s['a']

0.5629672175298958

In [12]:
s.iloc[:3]

a    0.562967
b    0.050268
c    0.545442
dtype: float64

In [13]:
# mask
s[s > s.median()]

a    0.562967
d    0.742767
dtype: float64

In [15]:
s.iloc[[4, 3, 1]]

e    0.522604
d    0.742767
b    0.050268
dtype: float64

In [16]:
s[['a', 'b', 'c']] # index use label

a    0.562967
b    0.050268
c    0.545442
dtype: float64

In [17]:
s.loc[['a', 'b']]

a    0.562967
b    0.050268
dtype: float64

In [18]:
np.exp(s)

a    1.755875
b    1.051553
c    1.725371
d    2.101743
e    1.686414
dtype: float64

In [19]:
s['a'] = 12.
s

a    12.000000
b     0.050268
c     0.545442
d     0.742767
e     0.522604
dtype: float64

In [20]:
'b' in s

True

In [21]:
'f' in s

False

When using key indexing, if you're unsure whether the key is present, you can use the get method. If it doesn't exist, it returns None or a specified default value.

In [22]:
s.get('f', np.nan)

nan

In [23]:
s+s

a    24.000000
b     0.100535
c     1.090884
d     1.485534
e     1.045208
dtype: float64

In [24]:
s * 2

a    24.000000
b     0.100535
c     1.090884
d     1.485534
e     1.045208
dtype: float64

In [25]:
s.iloc[1:] + s.iloc[:-1]

a         NaN
b    0.100535
c    1.090884
d    1.485534
e         NaN
dtype: float64

In [26]:
s = pd.Series(np.random.randn(5), name='something')
s.name

'something'

In [27]:
s

0    0.225746
1    2.692545
2   -1.940176
3    1.205719
4    1.070720
Name: something, dtype: float64