In [2]:
# fetching values from series via index and key

import pandas as pd

classes = {"Alice": "Physics", "Jack": "Chemistry", "Molly": "English"}
s = pd.Series(classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [3]:
# Fetching data by index

s.iloc[1]

'Chemistry'

In [4]:
# Fetching data by key

s.loc["Molly"]

'English'

In [6]:
# iloc and loc are not methods, but attributes. That is why square brackets are used, instead of parentheses.
# index value calls iloc in the background

s[2] # not supported anymore; use the iloc attribute to identify via index

  s[2]


'English'

In [7]:
# Similar to prev. example, key can be used to call loc in the background

s["Molly"]

'English'

In [13]:
# Above options don't work when you have indices as series of integers

code = {99: "Physics", 100: "Chemistry", 101: "English", 102: "History"}
s = pd.Series(code)

# s[1]    # throws error; cuz indices don't have the number 1, and don't identify 1 as the second index(as in 0, 1, 2)
s.iloc[1] # this works though, since the particular attribute is being called
s[100]  # this works too; cuz 100 is an index

'Chemistry'

In [14]:
# trying to get average of student grades from a series of student grades

grades = pd.Series([90, 80, 70, 60])

total = 0
for grade in grades:
    total += grade
print(total/len(grades))

75.0


In [15]:
# above method works; but is slow. we can do it with the numpy sum method; uses vectorization (apparently faster)

import numpy as np

total = np.sum(grades)
print(total/len(grades))

75.0


In [16]:
# checking which one runs faster

numbers = pd.Series(np.random.randint(0, 1000, 10000))
numbers.head()  # head function shows the first five elements of the series

0     42
1    693
2    462
3    845
4    283
dtype: int64

In [17]:
len(numbers)

10000

In [21]:
%%timeit -n 100 # we'll be using the magic function called timeit to run the function few times, and see how long it takes
total = 0
for number in numbers:
    total += number
total/len(numbers)  # takes 420 microseconds + 33 microsecond per loop

420 µs ± 33 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [23]:
%%timeit -n 100
total = np.sum(grades)
total/len(grades)   # takes 8.33 microseconds + 681 nanoseconds per loop; massive difference from prev.

8.33 µs ± 681 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
# another feature called broadcasting can be used to do an operation on every element of the series
numbers.head()

0     42
1    693
2    462
3    845
4    283
dtype: int64

In [25]:
numbers += 2
numbers.head() # all values get increased by 2

0     44
1    695
2    464
3    847
4    285
dtype: int64

In [30]:
# we can still do this via iterating through every element

for label, value in numbers.items():
    numbers.iat[label] = value+2

numbers.head()

0     46
1    697
2    466
3    849
4    287
dtype: int64

In [32]:
%%timeit -n 10 # speed check using generic changing and the items() function

s = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in s.items():
    s.loc[label] = value + 2 # 110 microseconds + 2.11 microseconds per loop

110 ms ± 2.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
%%timeit -n 10

s = pd.Series(np.random.randint(0, 1000, 10000))
s += 2 # 88.3 microseconds + 33.7 nanoseconds; way faster than previous

88.3 µs ± 33.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [34]:
# adding new data via loc operator

s = pd.Series([1, 2, 3])
s.loc["History"] = 102
s

0            1
1            2
2            3
History    102
dtype: int64

In [38]:
# dealing with similar values

classes = pd.Series({
    "Alice": "Physics",
    "Jack": "Chemistry",
    "Molly": "English",
    "Sam": "History"
})

classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [39]:
kellyClasses = pd.Series(["Philosophy", "Arts", "Math"], index=["Kelly", "Kelly", "Kelly"])
kellyClasses

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [41]:
# adding similar values to classes series; append creates a new series with both series' data; no changes are done

allClasses = classes._append(kellyClasses)
allClasses

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [42]:
# checking for kelly returns a series itself, since similar key has many values

allClasses["Kelly"]

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object