# pandas Series

## Setup

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

### Create Series from ndarray

In [2]:
s1 = pd.Series(np.arange(0, 5))
s1

0    0
1    1
2    2
3    3
4    4
dtype: int32

### Create Series with index

In [3]:
s2 = pd.Series(np.arange(0, 5), index=['a', 'b', 'c', 'd', 'e'])
s2

a    0
b    1
c    2
d    3
e    4
dtype: int32

### Assign index to Existing Series

In [4]:
s2.index = ['A', 'B', 'C', 'D', 'E']
s2

A    0
B    1
C    2
D    3
E    4
dtype: int32

### The index and values Properties

In [5]:
s2.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [6]:
s2.values

array([0, 1, 2, 3, 4])

In [7]:
type(s2.values)

numpy.ndarray

### Create One-Item Series from Scalar

In [8]:
s3 = pd.Series(5)
s3

0    5
dtype: int64

### Create Series from list

In [9]:
s4 = pd.Series([1, 2, 3, 4, 5])
s4

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Create Series from dict

In [10]:
from datetime import date
bdays = {
    'John': date(1940, 10, 9),
    'Paul': date(1942, 6, 18),
    'George': date(1943, 2, 25),
    'Ringo': date(1940, 7, 7),
}
s5 = pd.Series(bdays)
s5

John      1940-10-09
Paul      1942-06-18
George    1943-02-25
Ringo     1940-07-07
dtype: object

#### An Array with Holes

In [11]:
ar = np.array([1, 2, 3, np.nan, 5, 6, 7, np.nan, 9, 10])
ar

array([ 1.,  2.,  3., nan,  5.,  6.,  7., nan,  9., 10.])

In [12]:
ar.mean()

nan

In [13]:
s6 = pd.Series(ar)
s6

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     7.0
7     NaN
8     9.0
9    10.0
dtype: float64

In [14]:
s6.mean(), sum([1, 2, 3, 5, 6, 7, 9, 10])/8

(5.375, 5.375)

In [15]:
s6.mean(skipna=False)

nan

In [16]:
s6.isna()

0    False
1    False
2    False
3     True
4    False
5    False
6    False
7     True
8    False
9    False
dtype: bool

In [17]:
s6.notna()

0     True
1     True
2     True
3    False
4     True
5     True
6     True
7    False
8     True
9     True
dtype: bool

### Check if `Series` contains an nan value

In [18]:
s6.isna().values.any()

True

### loc[] and iloc[]

In [19]:
rng = np.random.default_rng(seed=42)
s7 = pd.Series(rng.random(5), index=['a', 'b', 'c', 'd', 'e'])
s7

a    0.773956
b    0.438878
c    0.858598
d    0.697368
e    0.094177
dtype: float64

In [20]:
s7.loc['a'], s7.iloc[0]

(0.7739560485559633, 0.7739560485559633)

In [21]:
s7.loc['b':'d']

b    0.438878
c    0.858598
d    0.697368
dtype: float64

In [22]:
s7.iloc[1:4]

b    0.438878
c    0.858598
d    0.697368
dtype: float64

In [23]:
indices = ['b', 'c', 'd']
s7.loc[indices]

b    0.438878
c    0.858598
d    0.697368
dtype: float64

In [24]:
s7.loc[['a', 'c', 'e']]

a    0.773956
c    0.858598
e    0.094177
dtype: float64

In [25]:
s7.iloc[[0, 2, 4]]

a    0.773956
c    0.858598
e    0.094177
dtype: float64

### Alignment

In [26]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A', 'B', 'C', 'D', 'F'])
grades2 = pd.Series([76, 122, 151, 21, 0], index=['D', 'C', 'B', 'A', 'F'])

In [27]:
grades1

A    17
B    44
C    28
D     8
F     3
dtype: int64

In [28]:
grades2

D     76
C    122
B    151
A     21
F      0
dtype: int64

#### Adding `Series`

In [29]:
grades_all = grades1 + grades2
grades_all

A     38
B    195
C    150
D     84
F      3
dtype: int64

In [30]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A', 'B', 'C', 'D', 'F'])
grades2 = pd.Series([76, 122, 151, 21], index=['D', 'C', 'B', 'A'])
grades_all = grades1 + grades2
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      NaN
dtype: float64

#### `add()` with `fill_value`

In [31]:
grades_all = grades1.add(grades2, fill_value=0)
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      3.0
dtype: float64

### Comparing Series

In [32]:
mantle1968 = pd.Series([14, 1, 18, 54, 6], index=['2B', '3B', 'HR', 'RBI', 'SB'])
mantle1951 = pd.Series([11, 5, 13, 65, 8], index=['2B', '3B', 'HR', 'RBI', 'SB'])
mantle1968 > mantle1951

2B      True
3B     False
HR      True
RBI    False
SB     False
dtype: bool

In [33]:
mantle1968[mantle1968 > mantle1951]

2B    14
HR    18
dtype: int64

### Element-wise Operations

In [34]:
rng = np.random.default_rng(seed=42)
exam_grades = pd.Series(rng.integers(60, 101, 100))
exam_grades

0     63
1     91
2     86
3     77
4     77
      ..
95    71
96    69
97    87
98    86
99    65
Length: 100, dtype: int64

In [35]:
curved_grades = exam_grades.multiply(1.05)
curved_grades

0     66.15
1     95.55
2     90.30
3     80.85
4     80.85
      ...  
95    74.55
96    72.45
97    91.35
98    90.30
99    68.25
Length: 100, dtype: float64

In [36]:
def convert_to_letter(grade):
    if grade >= 90:
        return 'A'
    elif grade >= 80:
        return 'B'
    elif grade >= 70:
        return 'C'
    elif grade >= 65:
        return 'D'
    else:
        return 'F'

In [37]:
convert_to_letter(curved_grades)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [38]:
letter_grades = curved_grades.apply(convert_to_letter)
letter_grades

0     D
1     A
2     A
3     B
4     B
     ..
95    C
96    C
97    A
98    A
99    D
Length: 100, dtype: object