# The Series Data Structure

In [3]:
import pandas as pd 
pd.Series?

In [4]:
animals = ['Tiger', 'Bear', 'Moose'] #list of animals
pd.Series(animals)  #data type auto set to object 

0    Tiger
1     Bear
2    Moose
dtype: object

In [5]:
numbers = [1,2,3] #list of numbers
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [7]:
animals = ['Tiger', 'Bear', None] #here None is converted to object 
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [8]:
numbers = [1,2, None] #Here None is converted to NaN, not a number
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [9]:
import numpy as np
np.nan == None #can not do equality test on NaN

False

In [10]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)   #series created with auto index
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [11]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [12]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada']) #create series by calling out index 
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [14]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])  #Series created with NaN generated for missing value
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

#  Querying a Series

In [15]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [16]:
s.iloc[3] #calls for specfic location in series


'South Korea'

In [17]:
s.loc['Golf'] #calls specific index

'Scotland'

In [20]:
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)

In [21]:
s[0] #This won't call s.iloc[0] as one might expect, it generates an error instead

KeyError: 0

In [37]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])   #series created with paranthesis and brackets
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [23]:
import numpy as np  #quickly sum series 
total = np.sum(s)
print(total)

324.0


In [24]:
#this creates a big series of random numbers
s = pd.Series(np.random.randint(0,1000,10000))
s.head()  #gives the first five elements

0    581
1    360
2    440
3    936
4     23
dtype: int64

In [25]:
%%timeit -n 100   #yields time to sum column 
summary = 0
for item in s:
    summary+=item

100 loops, best of 3: 2.32 ms per loop


In [26]:
%%timeit -n 100   #time for vectorized addition using functions 
summary = np.sum(s)

100 loops, best of 3: 118 µs per loop


In [27]:
s+=2 #adds two to each item in s using broadcasting
s.head()

0    583
1    362
2    442
3    938
4     25
dtype: int64

In [28]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'  #adds new index and value of different type
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [36]:
original_sports = pd.Series({'Archery': 'Bhutan',  #use curly brackets when listing index and values
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [31]:
original_sports   #original sports did not change 

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [32]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [33]:
all_countries #appended series 

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [35]:
all_countries.loc['Cricket']   #index yields all values with same index

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object