# Effective Pandas: Series Introduction

### Import Libraries

In [2]:
# Data Manipulation
import pandas as pd
# Numerical Computing
import numpy as np
# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
series = {
    'index': [0, 1, 2, 3],
    'data': [145, 142, 38, 13],
    'name': 'songs'
}

In [4]:
def get(series, idx):
    value_idx = series['index'].index(idx)
    return series['data'][value_idx]

In [5]:
get(series, 1)

142

#### Index Abstraction

In [7]:
songs = {
    'index': ['Paul', 'John', 'George', 'Ringo'],
    'data': [145, 142, 38, 13],
    'name': 'counts'
}

In [8]:
get(songs, 'John')

142

#### The Pandas Series

In [9]:
songs2 = pd.Series([145, 142, 38, 13],
                   name='counts'
                  )

In [10]:
songs2

0    145
1    142
2     38
3     13
Name: counts, dtype: int64

In [11]:
songs2.index

RangeIndex(start=0, stop=4, step=1)

In [12]:
songs3 = pd.Series([145, 142, 38, 13],
                   name='counts',
                   index=['Paul', 'John', 'George', 'Ringo']
                  )

In [13]:
songs3

Paul      145
John      142
George     38
Ringo      13
Name: counts, dtype: int64

In [14]:
songs3.index

Index(['Paul', 'John', 'George', 'Ringo'], dtype='object')

In [15]:
class Foo:
    pass

In [16]:
ringo = pd.Series(
    ['Richard', 'Starkey', 13, Foo()],
    name='ringo')

In [17]:
ringo

0                                    Richard
1                                    Starkey
2                                         13
3    <__main__.Foo object at 0x7fe24055a2e0>
Name: ringo, dtype: object

#### The NaN Value

In [18]:
nan_series = pd.Series([2, np.nan],
                       index=['Ono', 'Clapton'])

In [19]:
nan_series

Ono        2.0
Clapton    NaN
dtype: float64

In [20]:
nan_series.count()

1

In [21]:
nan_series.size

2

#### Optional Integer Support for NaN

In [22]:
nan_series2 = pd.Series([2, None],
                        index=['Ono', 'Clapton'],
                        dtype='Int64')

In [23]:
nan_series2

Ono           2
Clapton    <NA>
dtype: Int64

In [24]:
nan_series2.count()

1

In [27]:
nan_series.astype('Int64')

Ono           2
Clapton    <NA>
dtype: Int64

#### Similar to NumPy

In [28]:
numpy_ser = np.array([145, 142, 38, 13])

In [30]:
songs3.iloc[1]

142

In [31]:
numpy_ser[1]

142

In [32]:
songs3.mean()

84.5

In [33]:
numpy_ser.mean()

84.5

In [34]:
mask = songs3 > songs3.median()

In [35]:
mask

Paul       True
John       True
George    False
Ringo     False
Name: counts, dtype: bool

In [36]:
songs3[mask]

Paul    145
John    142
Name: counts, dtype: int64

In [37]:
numpy_ser[numpy_ser > np.median(numpy_ser)]

array([145, 142])

#### Categorical Data

In [38]:
s = pd.Series(['m', 'l', 'xs', 's', 'xl'], dtype='category')

In [39]:
s

0     m
1     l
2    xs
3     s
4    xl
dtype: category
Categories (5, object): ['l', 'm', 's', 'xl', 'xs']

In [40]:
s.cat.ordered

False

In [41]:
s2 = pd.Series(['m', 'l', 'xs', 's', 'xl'])

In [42]:
size_type = pd.api.types.CategoricalDtype(
    categories=['s', 'm', 'l'], ordered=True)

In [43]:
s3 = s2.astype(size_type)

In [44]:
s3

0      m
1      l
2    NaN
3      s
4    NaN
dtype: category
Categories (3, object): ['s' < 'm' < 'l']

In [46]:
s3 > 's'

0     True
1     True
2    False
3    False
4    False
dtype: bool

In [47]:
s.cat.reorder_categories(['xs', 's', 'm', 'l', 'xl'], ordered=True)

0     m
1     l
2    xs
3     s
4    xl
dtype: category
Categories (5, object): ['xs' < 's' < 'm' < 'l' < 'xl']

In [48]:
s3.str.upper()

0      M
1      L
2    NaN
3      S
4    NaN
dtype: object