# Pandas Series

Different from dictionaries because it has column names and index names for rows

In [40]:
import pandas as pd
# pd.Series?

In [41]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [42]:
numbers = [1, 2, 3, 4]
pd.Series(numbers)

0    1
1    2
2    3
3    4
dtype: int64

In [43]:
# Missing Data

animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [44]:
numbers = [1, 2, 3, None]
pd.Series(numbers)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

In [45]:
# Create a dictionary

sports = {'Archery' : 'Bhutan',
          'Golf' : 'Scotland',
          'Sumo' : 'Japan',
          'Taekwondo' : 'South Korea'}

s = pd.Series(sports)
print(s)
 

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object


In [46]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [47]:
# Pandas overrides keys that dont make sense in a dictionary and applies its own index

animal = pd.Series(['Tiger', 'Bear', 'Moose'], index = ['India', 'America', 'Canada'])


print(animal.items);
print(animal.keys);
print(animal.values)

<bound method Series.items of India      Tiger
America     Bear
Canada     Moose
dtype: object>
<bound method Series.keys of India      Tiger
America     Bear
Canada     Moose
dtype: object>
['Tiger' 'Bear' 'Moose']


# Querying a Series

In [53]:
# Can be queried by index position or label
# loc - label
# iloc - integer / index position
# They are methods and not attributes so no brackets

s.iloc[3]

'South Korea'

In [55]:
s.loc['Golf']

'Scotland'

In [59]:
s = pd.Series([100.00, 120.0, 101.0, 3.00])

total = 0
for item in s:
    total += item
print(total)

324.0


In [62]:
# Rewrite 

import numpy as np

total = np.sum(s)
print(total)

324.0


In [64]:
# Let's check the time efficiency of the two methods

s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    770
1    886
2     91
3    905
4    603
dtype: int64

In [66]:
len(s)

10000

In [87]:
%%timeit -n 100 # time for 100 iterations
summary = 0
for item in s:
    summary += item

1.28 ms ± 77.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [88]:
%%timeit -n 100 # time for 100 iterations
s = pd.Series(np.random.randint(0,1000,10000))

178 µs ± 26.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [94]:
s += 2
s

0       797
1       779
2       503
3       428
4       292
       ... 
9995    339
9996     40
9997    976
9998    158
9999    613
Length: 10000, dtype: int64

In [95]:
# Below you can once again see how much faster vectorised functions are than traditional Python iteration functions

In [96]:

%%timeit -n 100 
s = pd.Series(np.random.randint(0, 1000, 10000))
s += 2


418 µs ± 55.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [97]:
%%timeit -n 2
s = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in s.iteritems():
    s.loc[label] = value+2

622 ms ± 22 ms per loop (mean ± std. dev. of 7 runs, 2 loops each)


In [103]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

## So Above we have used the .loc operator to add a new key value pair or index / column pair


0             1
1             2
2             3
Animal    Bears
dtype: object

In [109]:
# Create a dictionary

original_sports = pd.Series({'Archery' : 'Bhutan',
          'Golf' : 'Scotland',
          'Sumo' : 'Japan',
          'Taekwondo' : 'South Korea'})

cricket_countries = pd.Series(['Australia', 'Barbados', 'Pakistan', 'England'],
                              index = ['Cricket', 'Cricket', 'Cricket', 'Cricket'])


all_countries = original_sports.append(cricket_countries)
print(all_countries);
print(original_sports);
print(cricket_countries)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object
Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object
Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object


In [111]:
# Append does not change the original
print(original_sports)

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object


# Pandas DataFrames