In [2]:
import pandas as pd

In [5]:
n = pd.Series([1,2,3,4])
n

0    1
1    2
2    3
3    4
dtype: int64

## Auto casting to none

In [15]:
animals = ['tiger', 'bear', None]
a = pd.Series(animals)
a

0    tiger
1     bear
2     None
dtype: object

## Auto casting to NaN

In [17]:
numbers = [1,2,3,None]
a = pd.Series(numbers)
a

0     1
1     2
2     3
3   NaN
dtype: float64

## None and NaN (np.nan)

### NaN is numeric None, which is more efficient. 

In [23]:
import numpy as np
print (np.nan == None)
print (np.nan == np.nan)
print (np.isnan(np.nan))


False
False
True


In [24]:
sport = {'soccer': 'france',
         'golf': 'usa',
         'basketball': 'spain',
         'matkot': 'zfonKorea'}

In [46]:
s = pd.Series(sport)
s

basketball        spain
golf                usa
matkot        zfonKorea
soccer           france
dtype: object

In [47]:
s.index

Index(['basketball', 'golf', 'matkot', 'soccer'], dtype='object')

### keys can be duplicated

In [51]:
t = pd.Series(sport, index=['soccer', 'matkot', 'golf'])
t

soccer       france
matkot    zfonKorea
golf            usa
dtype: object

### iloc, loc and direct access ( indexing operator )

In [55]:
print (t.iloc[0])
print (t[0])
print (t.loc['soccer'])
print (t['soccer'])


france
france
france
france


In [63]:
items= {101: 'aaa', 102: 'bbb', 103:'ccc'}
x = pd.Series(items)
x

101    aaa
102    bbb
103    ccc
dtype: object

In [65]:
# x[0] will return error, must explicitly call iloc
x.iloc[0]

'aaa'

## Iterations


In [66]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100
1    120
2    101
3      3
dtype: float64

In [71]:
# slow
total = 0
for item in s:
    total += item
print (total)



324.0


## Vectorization for better performance

In [70]:
import numpy as np

total = np.sum(s)
print (total)

324.0


In [77]:
s = pd.Series(np.random.randint(0,1000,10000))
print ('total length is {}'.format(len(s)))
s.head

total length is 10000


<bound method Series.head of 0       127
1       817
2       263
3       900
4       228
5       286
6       889
7       735
8       947
9       251
10      457
11      915
12      486
13      151
14      982
15      601
16      101
17      375
18      305
19       76
20      789
21      534
22      644
23      353
24       35
25      711
26      964
27      419
28      810
29      320
       ... 
9970    560
9971    699
9972    709
9973    355
9974    500
9975    123
9976    842
9977    648
9978    339
9979    671
9980    117
9981    684
9982    216
9983    115
9984     81
9985    182
9986      9
9987    278
9988    490
9989    276
9990    602
9991    368
9992    228
9993    889
9994    243
9995    583
9996    486
9997    688
9998    689
9999     50
dtype: int64>

In [78]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item

100 loops, best of 3: 791 µs per loop


In [80]:
# parallel computing with numpy
# much faster
# func prog style
%%timeit -n 100
total = np.sum(s)

100 loops, best of 3: 62.9 µs per loop


### broadcasting ( apply to all elements )

In [86]:
print(s.head())
s += 2
print(s.head()[0:2])


0    129
1    819
2    265
3    902
4    230
dtype: int64
0    131
1    821
dtype: int64


## Iterating over Series is BAD practice. Vectorize functions are much faster !

In [89]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
s += 2


10 loops, best of 3: 193 µs per loop


In [91]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
for k,v in s.iteritems():
    s.set_value(k, v+2)

10 loops, best of 3: 17.1 ms per loop


In [96]:
import time
%timeit time.sleep(3)

1 loops, best of 3: 3 s per loop


## Mixed types

In [98]:
s = pd.Series([1,2,3])
s


0    1
1    2
2    3
dtype: int64

In [99]:
s['animal'] = 'bear'
s

0            1
1            2
2            3
animal    bear
dtype: object

In [102]:
original_sports = pd.Series({'hocky': 'usa', 
                             'soccer': 'spain',
                             'baseball': 'austria'})
cricket_countries = pd.Series(['israel', 'india', 'england'])

print (original_sports)
print ('')
print (cricket_countries)

baseball    austria
hocky           usa
soccer        spain
dtype: object

0     israel
1      india
2    england
dtype: object


In [103]:
# 2 ref to same object
s = original_sports
s['hocky'] = 'lebanon'
print (original_sports)


baseball    austria
hocky       lebanon
soccer        spain
dtype: object


In [105]:
# append DOESNT change the object, it creates a new one
t = original_sports.append(cricket_countries)
t

baseball    austria
hocky       lebanon
soccer        spain
0            israel
1             india
2           england
dtype: object