# Summary:
## Series
(1) Create series from list   
(2) Getting the values and indices of a series   
(3) Change index, specify index upon creation   
(4) Indexing, boolean indexing   
(5) Arithmetic operations   
(6) Create series from dict   
(7) Detect missing values   
(8) Assign names to a series and its index   

In [24]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [5]:
# A Series is a one-dimensional array-like object containing a sequence of values, 
# and an associated array of data labels, called its index.
obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [6]:
# Values of the series
obj.values

array([ 4,  7, -5,  3], dtype=int64)

In [7]:
# Index of the series
obj.index

RangeIndex(start=0, stop=4, step=1)

In [None]:
# Change index
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']

In [11]:
# Specifying index
obj2 = pd.Series([4,7,-5,3], index=['d','b','a','c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [12]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [17]:
# Using index to get values
print(obj2['a'])
print(obj2['d'])
print(obj2[['b','c','a']]) # Use list for all indices
#print(obj2['b','c','a']) # This is wrong!

-5
4
b    7
c    3
a   -5
dtype: int64


In [22]:
# Boolean index operations
print(obj2>0)
print(obj2[obj2>0])

d     True
b     True
a    False
c     True
dtype: bool
d    4
b    7
c    3
dtype: int64
d    4
b    7
a   -5
c    3
dtype: int64
d     8
b    14
a   -10
c     6
dtype: int64


In [23]:
# Arithmetics
print(obj2)
print(obj2*2)

d    4
b    7
a   -5
c    3
dtype: int64
d     8
b    14
a   -10
c     6
dtype: int64


In [25]:
np.exp(obj2)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [26]:
# In some sense, a series is a dict
'b' in obj2

True

In [27]:
# The above code only works for index.
4 in obj2

False

In [29]:
# Create a Series from a dict
sdata = {'Ohio':35000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [32]:
# specifying index with a list
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
obj4
# There is no 'California' in sdata, thus its value in the series is NaN
# 'Utah' is not in 'states', thus there is no 'Utah' in the series.

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [33]:
# Detecting missing data
pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [34]:
pd.notnull(obj4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [36]:
obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [37]:
# Automatically align labels
print(obj3)
print(obj4)
print(obj3+obj4)

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64


In [41]:
# Assign names to the series and the index
obj4.name = 'population'
obj4.index.name = 'state'
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64