<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#The-Series-Data-Structure" data-toc-modified-id="The-Series-Data-Structure-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>The Series Data Structure</a></span></li><li><span><a href="#Querying-a-Series" data-toc-modified-id="Querying-a-Series-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Querying a Series</a></span></li></ul></div>

## The Series Data Structure

In [1]:
import pandas as pd
students = ['Alice','Jack','Molly']
pd.Series(students)

0    Alice
1     Jack
2    Molly
dtype: object

In [2]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [3]:
students = ['Alice','Jack',None]
pd.Series(students)

0    Alice
1     Jack
2     None
dtype: object

In [4]:
numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [5]:
# NaN and None not the same thing
import numpy as np
np.nan == None

False

In [6]:
np.nan == np.nan

False

In [7]:
np.isnan(np.nan)

True

In [8]:
studens_scores = {'Alice':'Physics',
                  'Jack': 'Chemistry',
                  'Molly': 'English'}
s = pd.Series(studens_scores)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [9]:
s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [10]:
s = pd.Series(['Physics','Chemistry','English'],index=['Alice','Jack','Molly'])
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [11]:
studens_scores = {'Alice':'Physics',
                  'Jack': 'Chemistry',
                  'Molly': 'English'}
s = pd.Series(studens_scores,index=['Alice','Sam','Molly'])
s

Alice    Physics
Sam          NaN
Molly    English
dtype: object

## Querying a Series
* `iloc`: query by location
* `loc`: query by index label

In [13]:
studens_classes = {'Alice':'Physics',
                  'Jack': 'Chemistry',
                  'Molly': 'English',
                  'Sam':'History'}
s = pd.Series(studens_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [14]:
s.iloc[3]

'History'

In [15]:
s.loc['Molly']

'English'

In [16]:
# not recommend
s[3]

'History'

In [17]:
# not recommend
s['Molly']

'English'

In [18]:
class_code = {99:'Physics',
             100:'Chemistry',
             101:'English',
             102:'History'}
s = pd.Series(class_code)
s

99       Physics
100    Chemistry
101      English
102      History
dtype: object

In [19]:
s[0]

KeyError: 0

In [20]:
s[99]

'Physics'

In [21]:
grades = pd.Series([90,80,70,60])

total = 0
for grade in grades:
    total+=grade
print(total / len(grades))

75.0


In [22]:
total = np.sum(grades)
print(total/len(grades))

75.0


In [24]:
numbers = pd.Series(np.random.randint(0,10000,10000))
numbers.head()

0    8363
1    3002
2    9348
3    1823
4    5896
dtype: int32

In [25]:
%%timeit -n 100
total = 0
for number in numbers:
    total+=number
total / len(numbers)

895 µs ± 10.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [27]:
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)

49.3 µs ± 2.17 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [28]:
numbers +=2
numbers.head()

0    8365
1    3004
2    9350
3    1825
4    5898
dtype: int32

In [34]:
for label,value in numbers.iteritems():
    numbers.at[2,label] = value+2
numbers.head()

0    8367
1    3006
2    3090
3    1827
4    5900
dtype: int32

In [37]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label,value in s.iteritems():
    s.loc[label] = value + 2

315 ms ± 1.37 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [36]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2

233 µs ± 22.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [38]:
s = pd.Series([1,2,3])
s.loc['History'] = 102
s

0            1
1            2
2            3
History    102
dtype: int64

In [42]:
studens_classes
studens_classes = pd.Series(studens_classes)
studens_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [40]:
kelly_classes = pd.Series(['Philosophy','Arts','Math'],index=['Kelly','Kelly','Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [43]:
all_students_classes = studens_classes.append(kelly_classes)
all_students_classes

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [44]:
all_students_classes.loc['Kelly']

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object