In [1]:
import pandas as pd

#### Creating Series 

In [2]:
# create  one item Series
s1 = pd.Series(2)

In [3]:
s1

0    2
dtype: int64

In [4]:
#create a series of multiple items in a list
s2 = pd.Series([1,2,3,4,5,6])
s2

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [5]:
s2.index

RangeIndex(start=0, stop=6, step=1)

In [6]:
s2.values

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [7]:
# create a series with explicit index
s3 = pd.Series([1,2,3], index=['a','b','c'])

In [8]:
s3.index

Index(['a', 'b', 'c'], dtype='object')

In [9]:
s3

a    1
b    2
c    3
dtype: int64

In [11]:
# lookup in series
s3['c']

3

In [12]:
#create a series from an existing index
s4 = pd.Series(2,index=s2.index)

In [13]:
s4

0    2
1    2
2    2
3    2
4    2
5    2
dtype: int64

In [14]:
#Create Randoem number series
import numpy as np
np.random.seed(123)

In [15]:
pd.Series(np.random.randn(5))

0   -1.085631
1    0.997345
2    0.282978
3   -1.506295
4   -0.578600
dtype: float64

In [17]:
pd.Series(np.linspace(0,9,7))

0    0.0
1    1.5
2    3.0
3    4.5
4    6.0
5    7.5
6    9.0
dtype: float64

In [18]:
pd.Series(np.arange(10))

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [21]:
# Create series from dictionary
# Key of dictionary become index
s6 = pd.Series({'a':1, 'b':3, 'c':5, 'd':7})

In [20]:
s6

a    1
b    3
c    5
d    7
dtype: int64

#### Size, shape, uniqueness, and counts of values

In [23]:
s = pd.Series([0,1,2,3,4,4,6,np.nan])

In [24]:
len(s)

8

In [25]:
s

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    4.0
6    6.0
7    NaN
dtype: float64

In [26]:
s.size

8

In [28]:
#Won't include np.nan in count
s.count()

7

In [29]:
s.unique()

array([  0.,   1.,   2.,   3.,   4.,   6.,  nan])

In [30]:
s.value_counts()

4.0    2
6.0    1
3.0    1
2.0    1
1.0    1
0.0    1
dtype: int64

### Peeking at data with heads, tails, & take 

In [32]:
s.head(n = 3)
#Seeing top few data of entire datasets

0    0.0
1    1.0
2    2.0
dtype: float64

In [33]:
s.tail(n=3)

5    4.0
6    6.0
7    NaN
dtype: float64

In [34]:
s.take([0,3,5])

0    0.0
3    3.0
5    4.0
dtype: float64

### Looking up values in Series

In [35]:
s3['a']

1

In [36]:
s3[1]

2

In [37]:
s3

a    1
b    2
c    3
dtype: int64

In [38]:
s3[['a','b']]

a    1
b    2
dtype: int64

In [39]:
s5 = pd.Series([1,2,3], index=[10,11,12])

In [40]:
s5

10    1
11    2
12    3
dtype: int64

In [42]:
s5[11]

2

In [43]:
#Enforcing lookup by position
s5.iloc[1]

2

In [46]:
#Enforcing lookup by index
s5.loc[11]

2

In [47]:
s5.loc[[12,10]]

12    3
10    1
dtype: int64

In [48]:
s5.iloc[[0,2]]

10    1
12    3
dtype: int64

#### Alignment via index labels

In [49]:
s6 = pd.Series([1,2,3,4], index=['a','b','c','d'])

In [50]:
s6 

a    1
b    2
c    3
d    4
dtype: int64

In [51]:
s7 = pd.Series([4,3,2,1],index=['d','c','b','a'])

In [52]:
s7

d    4
c    3
b    2
a    1
dtype: int64

In [53]:
s6 + s7

a    2
b    4
c    6
d    8
dtype: int64

### Arithmetic operations

In [54]:
s3 * 3

a    3
b    6
c    9
dtype: int64

In [55]:
t = pd.Series(4, s3.index)

In [56]:
t

a    4
b    4
c    4
dtype: int64

In [57]:
s3 * t

a     4
b     8
c    12
dtype: int64

In [58]:
s8 = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 5}) 
s9 = pd.Series({'b': 6, 'c': 7, 'd': 9, 'e': 10})

In [59]:
s8 + s9

a     NaN
b     8.0
c    10.0
d    14.0
e     NaN
dtype: float64

In [66]:
s10 = pd.Series([1.0, 2.0, 3.0], index=['a', 'a', 'b'])
s11 = pd.Series([4.0, 5.0, 6.0,7.0], index=['a', 'a', 'b','b'])
# for same indexes, it does a cartisian product

In [67]:
s10 + s11

a     5.0
a     6.0
a     6.0
a     7.0
b     9.0
b    10.0
dtype: float64

### Handling NaN

In [69]:
np.mean(np.array([1,2,3,np.nan]))

nan

In [70]:
s = pd.Series(np.array([1,2,3,np.nan]))

In [71]:
s.mean()

2.0

In [72]:
s.mean(skipna=False)

nan

### Boolean selection

In [73]:
s = pd.Series(np.arange(0,10))

In [76]:
s[s > 5]

6    6
7    7
8    8
9    9
dtype: int32

In [77]:
(s > 5).sum()

4

In [78]:
(s[s > 5]).sum()

30

### Reindexing a Series

In [79]:
s = pd.Series(np.random.randn(5))

In [80]:
s

0    1.651437
1   -2.426679
2   -0.428913
3    1.265936
4   -0.866740
dtype: float64

In [81]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [82]:
s.index = ['a','b','c','d','e']

In [83]:
s

a    1.651437
b   -2.426679
c   -0.428913
d    1.265936
e   -0.866740
dtype: float64

In [85]:
#s.index = ['aa','bb']

In [86]:
s1 = pd.Series(np.random.randn(3))
s2 = pd.Series(np.random.randn(3))

In [88]:
combined = pd.concat([s1,s2])

In [89]:
combined.index = np.arange(0,combined.size)

In [90]:
combined

0   -0.678886
1   -0.094709
2    1.491390
3   -0.638902
4   -0.443982
5   -0.434351
dtype: float64

In [91]:
 s1 = pd.Series(np.random.randn(4), ['a', 'b', 'c', 'd'])

In [92]:
s1

a    2.205930
b    2.186786
c    1.004054
d    0.386186
dtype: float64

In [93]:
s2 = s1.reindex(['a','c'])

In [94]:
s2

a    2.205930
c    1.004054
dtype: float64

In [95]:
s2['a'] = 989

In [96]:
s1

a    2.205930
b    2.186786
c    1.004054
d    0.386186
dtype: float64

In [98]:
s5 = s1.copy()
#create another separet piece of memory

In [99]:
s2.reindex(['a','f'],fill_value=0)

a    989.0
f      0.0
dtype: float64

In [100]:
s2

a    989.000000
c      1.004054
dtype: float64

In [101]:
del(s2['a'])

### Slicing

In [102]:
s = pd.Series(np.arange(100, 110), index=np.arange(10, 20))

In [103]:
s[0:6:2]

10    100
12    102
14    104
dtype: int32

In [104]:
s[::-1]

19    109
18    108
17    107
16    106
15    105
14    104
13    103
12    102
11    101
10    100
dtype: int32

In [105]:
s[-4:-1]

16    106
17    107
18    108
dtype: int32