In [7]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
%matplotlib inline

## Series and DataFrame data structures

In [4]:
# Series
# one-dimensional object, assigned label index (default 0 to N (length of Series minus one))

In [11]:
# create Series with arbitrary list
ser = pd.Series([0.1, 'dog', dt.date(2013, 3, 9)])
ser

0           0.1
1           dog
2    2013-03-09
dtype: object

In [13]:
# specifiy index to use with Series
ser = pd.Series([0.1, 'dog', dt.date(2013, 3, 9)], index=['a', 'b', 'c'])
ser

a           0.1
b           dog
c    2013-03-09
dtype: object

In [39]:
# Series constuctor can conver dictionaries to Series, using dict keys as index labels
d = {
    'Atlanta': 112233,
    'Baltimore': None,
    'Cleveland': 343,
    'San Francisco': 987
}
ser = pd.Series(d)
ser

Atlanta          112233.0
Baltimore             NaN
Cleveland           343.0
San Francisco       987.0
dtype: float64

In [40]:
# access items in Series by index label
ser['Atlanta']

112233.0

In [41]:
ser[['Atlanta','San Francisco']]

Atlanta          112233.0
San Francisco       987.0
dtype: float64

In [42]:
# use boolean indexing for item selection
ser[ser < 1000]

Cleveland        343.0
San Francisco    987.0
dtype: float64

In [43]:
# change items value based on index
ser['Baltimore'] = 345
ser['Baltimore']

345.0

In [44]:
# change value using boolean
ser[ser < 500] = 756
ser

Atlanta          112233.0
Baltimore           756.0
Cleveland           756.0
San Francisco       987.0
dtype: float64

In [45]:
# use Idiomatic Python to check for existence of specific index label
'San Francisco' in ser

True

In [46]:
# divide values by 2
ser / 2

Atlanta          56116.5
Baltimore          378.0
Cleveland          378.0
San Francisco      493.5
dtype: float64

In [47]:
# square values
np.square(ser)

Atlanta          1.259625e+10
Baltimore        5.715360e+05
Cleveland        5.715360e+05
San Francisco    9.741690e+05
dtype: float64

In [49]:
# add two Series together returns union of two Series (addition on shared index values)
# values on either Series that did not have a shared index will produce null/NaN
ser1 = ser[['Atlanta', 'Baltimore']]
ser2 = ser[['Cleveland', 'San Francisco']]
print(ser1 + ser2)

Atlanta         NaN
Baltimore       NaN
Cleveland       NaN
San Francisco   NaN
dtype: float64
