# 1 Working with Series

In [1]:
import pandas as pd
import numpy as np

### Series Initialization

In [7]:
s = pd.Series([1, 2, 3])
print(s)

0    1
1    2
2    3
dtype: int64


In [8]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)

a    1
b    2
c    3
dtype: int64


In [9]:
s = pd.Series({'a': 1, 'b': 2, 'c': 3})
print(s)

a    1
b    2
c    3
dtype: int64


### Series Adding/Deleting an Element

In [10]:
# Add an element
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
s['d'] = 4
print(s)

a    1
b    2
c    3
dtype: int64
a    1
b    2
c    3
d    4
dtype: int64


In [11]:
# Delete an element
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
del s['c']
print(s)

a    1
b    2
c    3
dtype: int64
a    1
b    2
dtype: int64


### Series Drop Element(s) 

In [47]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
s_dropped = s.drop(['b', 'c'])
print(s_dropped)

a    1
b    2
c    3
dtype: int64
a    1
dtype: int64


### Series Index and Values

In [17]:
# Series index
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s.index)
print(s.index.values)

a    1
b    2
c    3
dtype: int64
Index(['a', 'b', 'c'], dtype='object')
['a' 'b' 'c']


In [15]:
# Series values
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s.values)

a    1
b    2
c    3
dtype: int64
[1 2 3]


### Series Reindexing

In [44]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
s_reindexed = s.reindex(['d', 'c', 'b', 'a'], fill_value=0)
print(s_reindexed)

a    1
b    2
c    3
dtype: int64
d    0
c    3
b    2
a    1
dtype: int64


### Series Indexing and Selecting

#### Label

In [50]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s['a'])

a    1
b    2
c    3
dtype: int64
1


In [52]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s['a':'b']) # Slicing with label, the endpoint is included

a    1
b    2
c    3
dtype: int64
a    1
b    2
dtype: int64


#### Position

In [51]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s[0])

a    1
b    2
c    3
dtype: int64
1


In [53]:
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s[:1]) # Slicing with position, the endpoint is excluded

a    1
b    2
c    3
dtype: int64
a    1
dtype: int64


### Series Sorting

#### Sorting by Index

In [98]:
s = pd.Series([1, 2, 3], index=['c', 'b', 'a'])
print(s)
s_sorted = s.sort_index()
print(s_sorted)

c    1
b    2
a    3
dtype: int64
a    3
b    2
c    1
dtype: int64


#### Sorting by Values

In [99]:
s = pd.Series([3, 2, 1], index=['a', 'b', 'c'])
print(s)
s_sorted = s.sort_values()
print(s_sorted)

a    3
b    2
c    1
dtype: int64
c    1
b    2
a    3
dtype: int64


### Series Ranking

In [115]:
s = pd.Series([1, 1, 2, 3, 3])
print(s.rank(method='average'))
# Tie-breaking methods with rank:
# average: average rank of group
# min: lowest rank in group
# max: highest rank in group
# first: ranks assigned in order they appear in the array
# dense: like ‘min’, but rank always increases by 1 between groups

0    1.5
1    1.5
2    3.0
3    4.5
4    4.5
dtype: float64


### Series Computatons
See DataFrame Computations for more shared computation methods between Series and DataFrame

In [129]:
# Unique values
s = pd.Series(['a', 'a', 'a', 'b', 'b', 'c'])
print(s)
print(s.unique())

0    a
1    a
2    a
3    b
4    b
5    c
dtype: object
['a' 'b' 'c']


In [130]:
# Counts of unique values
s = pd.Series(['a', 'a', 'a', 'b', 'b', 'c'])
print(s)
print(s.value_counts())

0    a
1    a
2    a
3    b
4    b
5    c
dtype: object
a    3
b    2
c    1
dtype: int64


In [132]:
# Membership
s = pd.Series(['a', 'a', 'a', 'b', 'b', 'c'])
print(s)
print(s.isin(['b', 'c']))

0    a
1    a
2    a
3    b
4    b
5    c
dtype: object
0    False
1    False
2    False
3     True
4     True
5     True
dtype: bool


### Series Applying Function

In [88]:
# Series elementwise function application
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print(s)
print(s.map(np.square))

a    1
b    2
c    3
dtype: int64
a    1
b    4
c    9
dtype: int64


### Series Missing Values

#### Counting

In [133]:
# Count the number of null values
s = pd.Series([0, 1, np.nan, np.nan, 4])
print(s)
print(s.count())

0    0.0
1    1.0
2    NaN
3    NaN
4    4.0
dtype: float64
3


#### Filtering

In [134]:
# Whether the values are null
s = pd.Series([0, 1, np.nan, np.nan, 4])
print(s)
print(s.isnull())

0    0.0
1    1.0
2    NaN
3    NaN
4    4.0
dtype: float64
0    False
1    False
2     True
3     True
4    False
dtype: bool


In [135]:
# Whether the values are not null
s = pd.Series([0, 1, np.nan, np.nan, 4])
print(s)
print(s.notnull())

0    0.0
1    1.0
2    NaN
3    NaN
4    4.0
dtype: float64
0     True
1     True
2    False
3    False
4     True
dtype: bool


#### Dropping

In [136]:
# Drop the null values
s = pd.Series([0, 1, np.nan, np.nan, 4])
print(s)
s_dropped = s.dropna()
print(s_dropped)

0    0.0
1    1.0
2    NaN
3    NaN
4    4.0
dtype: float64
0    0.0
1    1.0
4    4.0
dtype: float64


#### Filling

In [144]:
# Fill the null values
s = pd.Series([0, 1, np.nan, np.nan, 4])
print(s)
s_filled = s.fillna(999)
print(s_filled)

0    0.0
1    1.0
2    NaN
3    NaN
4    4.0
dtype: float64
0      0.0
1      1.0
2    999.0
3    999.0
4      4.0
dtype: float64
