# The Series Data Structure

In [84]:
!pip freeze > requirements.txt

[33mYou are using pip version 8.1.2, however version 9.0.0 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [85]:
import pandas as pd
# pd.Series?  # opens the documentation

In [86]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [87]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [88]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [89]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [90]:
import numpy as np
np.nan == None

False

In [91]:
np.nan == np.nan

False

In [92]:
np.isnan(np.nan)

True

In [93]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [94]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [95]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [96]:
s.index

Index(['India', 'America', 'Canada'], dtype='object')

In [97]:
sports

{'Archery': 'Bhutan',
 'Golf': 'Scotland',
 'Sumo': 'Japan',
 'Taekwondo': 'South Korea'}

In [98]:
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

In [99]:
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [100]:
s.iloc[3]

'South Korea'

In [101]:
s.loc['Golf']

'Scotland'

In [102]:
s[3]

'South Korea'

In [103]:
s['Golf']

'Scotland'

In [104]:
sports = {99: 'Bhutan',
         100: 'Scotland',
         101: 'Japan',
         102: 'South Korea'}
s = pd.Series(sports)
s

99          Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object

In [105]:
"""
s[0]
>>> KeyError: 0
"""

'\ns[0]\n>>> KeyError: 0\n'

In [106]:
s.iloc[0]

'Bhutan'

In [107]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [108]:
total = 0
for item in s:
    total += item
print(total)

324.0


In [109]:
import numpy as np

total = np.sum(s)
print(total)

324.0


In [110]:
# Create a big series of random numbers
s = pd.Series(np.random.randint(0, 1000, 10000))
s.head()

0    385
1    598
2    561
3    250
4    197
dtype: int64

In [111]:
len(s)

10000

In [115]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item

100 loops, best of 3: 1.06 ms per loop


In [116]:
%%timeit -n 100
summary = np.sum(s)

100 loops, best of 3: 107 µs per loop


In [118]:
# Add two to each item in s using broadcasting
s += 2
s.head()

0    389
1    602
2    565
3    254
4    201
dtype: int64

In [119]:
for label, value in s.iteritems():
    s.set_value(label, value+2)
s.head()

0    391
1    604
2    567
3    256
4    203
dtype: int64

In [122]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in s.iteritems():
    s.loc[label] = value + 2

10 loops, best of 3: 708 ms per loop


In [123]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
s += 2

10 loops, best of 3: 261 µs per loop


In [124]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [127]:
original_sports = pd.Series({'Archery': 'Bhutan',
                            'Golf': 'Scotland',
                            'Sumo': 'Japan',
                            'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                     'Barbados',
                                     'Pakistan',
                                     'England'],
                                    index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [128]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [129]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [130]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [131]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

# The DataFrame Data Structure