<a href="https://colab.research.google.com/github/happycode-ch/pandas_colab/blob/main/2_udemy_pandas_course_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

### Lecture 1: What is a Series? (A sequence of values with associated labels)

In [2]:
students = ['Andrew', 'Kate', 'Diebold']

In [3]:
type(students)

list

In [4]:
pd.Series(students)

0     Andrew
1       Kate
2    Diebold
dtype: object

In [5]:
ages = [23, 45, 19]

In [6]:
pd.Series(ages)

0    23
1    45
2    19
dtype: int64

In [7]:
heights = [167.4, 173.2, 130.4]

In [8]:
pd.Series(heights)

0    167.4
1    173.2
2    130.4
dtype: float64

In [9]:
# Pandas identifies the dtype

In [10]:
mixed = [True, 'say', {'my_brain': 95}]

In [11]:
pd.Series(mixed)

0                True
1                 say
2    {'my_brain': 95}
dtype: object

In [12]:
students

['Andrew', 'Kate', 'Diebold']

In [13]:
pd.Series(students)

0     Andrew
1       Kate
2    Diebold
dtype: object

Parameters vs. Arguements

In [14]:
pd.Series(data=students) # (parameter=arguements) arguements are actual data/ values associate with the parameter(data)

0     Andrew
1       Kate
2    Diebold
dtype: object

In [15]:
def greeting(something):
  print(something)

In [16]:
greeting('Good Morning')

Good Morning


What is the Data?

In [17]:
books_list = ['The Escape Artist', 'The Spy and the Traitor', 'The Sceptic\'s Guide to the Universe']

In [18]:
books_list

['The Escape Artist',
 'The Spy and the Traitor',
 "The Sceptic's Guide to the Universe"]

In [19]:
pd.Series(books_list)

0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
dtype: object

In [20]:
list_s = pd.Series(books_list)

In [21]:
# python list is an ordered data structure

In [22]:
books_dict = {0:'The Escape Artist', 1:'The Spy and the Traitor', 2:'The Sceptic\'s Guide to the Universe'}

In [23]:
dict_s = pd.Series(books_dict)

In [24]:
list_s.equals(dict_s)

True

In [25]:
pd.Series(714)

0    714
dtype: int64

In [26]:
pd.Series('Andy')

0    Andy
dtype: object

### The dtype Attribute

In [27]:
pd.Series(ages)

0    23
1    45
2    19
dtype: int64

In [28]:
pd.Series(ages, dtype='float')

0    23.0
1    45.0
2    19.0
dtype: float64

In [29]:
 # most cases, this is not necessary (dtype=)

In [30]:
name_series = pd.Series(students)

In [31]:
name_series.dtype

dtype('O')

In [32]:
# dtype attribute for Series that contains a string will always be OBJECT

#### What is the dtype Really?

In [33]:
heights

[167.4, 173.2, 130.4]

In [34]:
pd.Series(heights)

0    167.4
1    173.2
2    130.4
dtype: float64

In [35]:
heights2 = [167.4, '173.2', 130.4]

In [36]:
pd.Series(heights2) # contains string, now OBJECT

0    167.4
1    173.2
2    130.4
dtype: object

#### Index and RangeIndex

In [37]:
books_list

['The Escape Artist',
 'The Spy and the Traitor',
 "The Sceptic's Guide to the Universe"]

In [38]:
list_s

0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
dtype: object

In [39]:
pd.Series(data=books_list, index=['Harrowing', 'Thrilling', 'Interesting and Academic'])

Harrowing                                     The Escape Artist
Thrilling                               The Spy and the Traitor
Interesting and Academic    The Sceptic's Guide to the Universe
dtype: object

In [40]:
pd.Series(books_list, ['Harrowing', 'Thrilling', 'Interesting and Academic']) # Python will assume the arguements

Harrowing                                     The Escape Artist
Thrilling                               The Spy and the Traitor
Interesting and Academic    The Sceptic's Guide to the Universe
dtype: object

In [41]:
#stringdtype() beginning in 1.0.0

In [42]:
pd.Series(books_list, ['Harrowing', 'Thrilling', 'Interesting and Academic'], dtype='string')

Harrowing                                     The Escape Artist
Thrilling                               The Spy and the Traitor
Interesting and Academic    The Sceptic's Guide to the Universe
dtype: string

In [43]:
list_s.index

RangeIndex(start=0, stop=3, step=1)

In [44]:
type(list_s.index)

pandas.core.indexes.range.RangeIndex

In [45]:
pd.RangeIndex(start=4, stop=7, step=1)

RangeIndex(start=4, stop=7, step=1)

In [46]:
list(pd.RangeIndex(start=4, stop=7, step=1))

[4, 5, 6]

In [47]:
list(pd.RangeIndex(start=10, stop=-11, step=-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10]

In [48]:
# RangeIndex = immutable object

#### Series and Index Names

In [49]:
books_series = list_s

In [50]:
# intelligable: capable of being understood

In [51]:
books_series

0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
dtype: object

In [52]:
# attribute vs method

**Method:** a function bound to the object.

**Attribute:** a variable bound to the object.

In [53]:
books_series.size

3

In [54]:
list_s.equals(dict_s) # method

True

In [55]:
list_s.dtype # attribute

dtype('O')

In [56]:
books_series.name

In [57]:
books_series.name == None

True

In [58]:
books_series

0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
dtype: object

In [59]:
books_series.name = 'Recent Reads'

In [60]:
books_series

0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
Name: Recent Reads, dtype: object

In [61]:
# point of this name? when using dataframes, the name will become the column name

In [62]:
books_series.index.name == None

True

In [63]:
books_series.index.name = 'My Books'

In [64]:
books_series.index.name

'My Books'

In [65]:
books_series

My Books
0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
Name: Recent Reads, dtype: object

## Skill Challenge

1. Create a python list of length 4 that contains some fav actors (strings), Call this list-assign it to a var called actor names

2. Create another list, actor_ages with corresponding ages

3. Create Series stores actors ages and labels them using actor names, name='actors'

4. Create a Dictionary out of this (I did this manually at first) and then dynamically (did second)

In [66]:
actor_names = ['Gary Oldman', 'Arnold Schwarzenegger', 'Al Paccino', 'Robert De Niro', 'Philip Seymour Hoffman']

In [67]:
pd.Series(actor_names)

0               Gary Oldman
1     Arnold Schwarzenegger
2                Al Paccino
3            Robert De Niro
4    Philip Seymour Hoffman
dtype: object

In [68]:
actor_ages = [65, 76, 83, 80, 46]

In [69]:
pd.Series(data=actor_ages, index=actor_names, name='actors')

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors, dtype: int64

In [70]:
pd.Series(actor_ages, actor_names, name='actors')

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors, dtype: int64

In [71]:
pd.Series({'Gary Oldman':65, 'Arnold Schwarzenegger':76 , 'Al Paccino':83 , 'Robert De Niro':80 , 'Philip Seymour Hoffman':46}, name='actors') # manually

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors, dtype: int64

In [72]:
actors_dict = pd.Series(dict(map(lambda i,j : (i,j) , actor_names, actor_ages)), name='actors_dict') # dynamically using map

In [73]:
actors_dict

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors_dict, dtype: int64

In [74]:
list(zip(actor_names, actor_ages))

[('Gary Oldman', 65),
 ('Arnold Schwarzenegger', 76),
 ('Al Paccino', 83),
 ('Robert De Niro', 80),
 ('Philip Seymour Hoffman', 46)]

In [75]:
dict(zip(actor_names, actor_ages))

{'Gary Oldman': 65,
 'Arnold Schwarzenegger': 76,
 'Al Paccino': 83,
 'Robert De Niro': 80,
 'Philip Seymour Hoffman': 46}

In [76]:
pd.Series(dict(zip(actor_names, actor_ages)), name='actors')

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors, dtype: int64

### Another Solution to Step 4 Dict Comprehension

In [77]:
pd.Series({name:age for name, age in zip(actor_names, actor_ages)}, name='actors') # dict comprehension

Gary Oldman               65
Arnold Schwarzenegger     76
Al Paccino                83
Robert De Niro            80
Philip Seymour Hoffman    46
Name: actors, dtype: int64

### The head() and tail() Methods

In [78]:
int_series = pd.Series([i for i in range(60)])

In [79]:
int_series

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
26    26
27    27
28    28
29    29
30    30
31    31
32    32
33    33
34    34
35    35
36    36
37    37
38    38
39    39
40    40
41    41
42    42
43    43
44    44
45    45
46    46
47    47
48    48
49    49
50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
dtype: int64

In [80]:
pd.Series(range(60))

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
26    26
27    27
28    28
29    29
30    30
31    31
32    32
33    33
34    34
35    35
36    36
37    37
38    38
39    39
40    40
41    41
42    42
43    43
44    44
45    45
46    46
47    47
48    48
49    49
50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
dtype: int64

In [81]:
int_series.size

60

In [82]:
len(int_series)

60

In [83]:
int_series.head(n=7)

0    0
1    1
2    2
3    3
4    4
5    5
6    6
dtype: int64

In [84]:
int_series.tail(n=10)

50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
dtype: int64

In [85]:
pd.Series(range(100))

0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Length: 100, dtype: int64

In [86]:
pd.options.display.min_rows = 20

### Extracting By Index Position

In [87]:
from string import ascii_lowercase

In [88]:
ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [89]:
pd.Series(ascii_lowercase)

0    abcdefghijklmnopqrstuvwxyz
dtype: object

In [90]:
len(list(ascii_lowercase))

26

In [91]:
letters = list(ascii_lowercase)

In [92]:
alphabet = pd.Series(letters)

In [93]:
alphabet.head(6)

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

This is returning another series, not the correct way of accessing series elements

In [94]:
# square bracket notation
alphabet[1]

'b'

In [95]:
alphabet[0]

'a'

In [96]:
# What is the first letter? (done above)
# What is the 11th letter?
# What are the first 3 letters?
# What are the sixth through tenth letters?
# What are the last six letters?

In [97]:
# 2
alphabet[10]

'k'

In [98]:
# 3
alphabet[0:3] # 0, 1, 2 excluding 3

0    a
1    b
2    c
dtype: object

In [99]:
# 4

alphabet[5:10]

5    f
6    g
7    h
8    i
9    j
dtype: object

In [100]:
alphabet[-6:] # go to end, back 6 and then to the end

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

### Access Elements By Label

In [101]:
from string import ascii_uppercase

In [102]:
labled_alphabet = pd.Series(data=list(ascii_lowercase), index=map(lambda x: 'label_' + x, (ascii_uppercase)))

In [103]:
labled_alphabet.head(3)

label_A    a
label_B    b
label_C    c
dtype: object

In [104]:
# What is the first letter? (done above)
# What is the 11th letter?
# What are the first 3 letters?
# What are the sixth through tenth letters?
# What are the last six letters?

In [105]:
labled_alphabet[0]

'a'

In [106]:
labled_alphabet['label_A']

'a'

In [107]:
labled_alphabet[10]

'k'

In [108]:
labled_alphabet['label_K']

'k'

In [109]:
labled_alphabet[:3]

label_A    a
label_B    b
label_C    c
dtype: object

In [110]:
labled_alphabet[:'label_C'] # label c is included in label based indexing, vs. position based indexing

label_A    a
label_B    b
label_C    c
dtype: object

In [111]:
labled_alphabet[5:10]

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [112]:
labled_alphabet['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [113]:
labled_alphabet[-6:]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [114]:
labled_alphabet[:-6]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
dtype: object

In [115]:
labled_alphabet['label_U':]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [116]:
labled_alphabet = pd.Series(data=list(ascii_lowercase), index=map(lambda x: 'label_' + x, (ascii_uppercase)))

In [117]:
alphabet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

Ad different, more efficient way to acchieve the results from the above code

In [118]:
 alphabet.add_prefix('label_')

label_0     a
label_1     b
label_2     c
label_3     d
label_4     e
label_5     f
label_6     g
label_7     h
label_8     i
label_9     j
label_10    k
label_11    l
label_12    m
label_13    n
label_14    o
label_15    p
label_16    q
label_17    r
label_18    s
label_19    t
label_20    u
label_21    v
label_22    w
label_23    x
label_24    y
label_25    z
dtype: object

In [119]:
alphabet = alphabet.add_suffix('_some_cool_ending')

### Using Dot Notation

In [120]:
labled_alphabet['label_V']

'v'

In [121]:
labeled_alphabet = labled_alphabet

In [122]:
labeled_alphabet.label_V

'v'

### Boolean Masks and the .Ioc Indexer

In [123]:
# loc
labeled_alphabet.loc['label_F':'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

Why 2 different ways to do the same function loc() protypical to way to do label based abstraction (supports Boolean Mass) vs ['':'']

In [124]:
# boolean
books_series

My Books
0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
Name: Recent Reads, dtype: object

In [125]:
books_series.loc[[True, True, True]]

My Books
0                      The Escape Artist
1                The Spy and the Traitor
2    The Sceptic's Guide to the Universe
Name: Recent Reads, dtype: object

In [126]:
books_series.loc[[True, False, True]]

My Books
0                      The Escape Artist
2    The Sceptic's Guide to the Universe
Name: Recent Reads, dtype: object

In [127]:
# books_series.loc[[True, False]] index error

In [128]:
labeled_alphabet.size

26

In [129]:
labeled_alphabet.loc[[True for i in range(26)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [130]:
labeled_alphabet.loc[[True if i%2==0 else False for i in range(26)]]

label_A    a
label_C    c
label_E    e
label_G    g
label_I    i
label_K    k
label_M    m
label_O    o
label_Q    q
label_S    s
label_U    u
label_W    w
label_Y    y
dtype: object

### Extracting By Position With .iloc
1. loc => integer loc => indexing by position
1. loc => location => indexing by label

In [131]:
labeled_alphabet.iloc[0]

'a'

In [132]:
labeled_alphabet.iloc[1:3]

label_B    b
label_C    c
dtype: object

In [137]:
labeled_alphabet[1:3] # functionality the same / akin: similia to

label_B    b
label_C    c
dtype: object

In [134]:
labeled_alphabet.iloc[[1,4,9]]

label_B    b
label_E    e
label_J    j
dtype: object

# Bonus - Using Callables With .loc and iloc

In [144]:
# will take a series and an input(single-arguement function), and produce an indexing output (list of labels, list of booleans, a slice, etc)
labeled_alphabet.loc['label_V']

'v'

In [145]:
labeled_alphabet.loc[lambda x: 'label_V']

'v'

In [147]:
labeled_alphabet.loc[lambda x: ['label_V', 'label_A']]

label_V    v
label_A    a
dtype: object

In [149]:
# labeled_alphabet.loc[lambda x: [True, True]] fails, Boolean index has wrong length: 2 instead of 26

In [151]:
labeled_alphabet.loc[lambda x: [True for i in range(x.size)]] # produces a True for all the instances in the series, lambda returns list (as many True values as the series itself)

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [155]:
def every_fifth(x):
  return [True if (i+1)%5==0 else False  for i in range(x.size)]

In [156]:
labeled_alphabet.iloc[every_fifth] # every_fifth calls the series "labeled_alphabet" passed in arg or x

label_E    e
label_J    j
label_O    o
label_T    t
label_Y    y
dtype: object

### Selecting with .get()

In [157]:
labeled_alphabet.get('label_I') # why use get (convienences)

'i'

In [158]:
labeled_alphabet.loc['label_I']

'i'

In [160]:
labeled_alphabet['label_I']

'i'

In [161]:
labeled_alphabet.get('label_Inexistent') # returning object 'none'

In [164]:
labeled_alphabet.get('label_Inexistent', default=None)

In [166]:
labeled_alphabet.get('label_Inexistent', default='Counld not find anything by that label, sorry')

'Counld not find anything by that label, sorry'

In [167]:
labeled_alphabet.get('label_Inexistent', default=19)

19

In [168]:
labeled_alphabet.get('label_Inexistent', default={19:'20'}) # get() gracefully responds/ continues,  when it does not find a label

{19: '20'}

In [169]:
 # labeled_alphabet['label_Inexistent'] fails, errors out

In [170]:
labeled_alphabet.get(8) # mixed capabilities, Chimera: mixed creature.

'i'

In [172]:
labeled_alphabet.iloc[8]

'i'

In [173]:
labeled_alphabet[8]

'i'

### Section Recap

In [176]:
# Selection by Label
# [] = idx'ing series['label'] - slices, callables, boolean masks
# .loc[] = series.loc['label'] - slices, callables, boolean masks
# dot access = series.lable - no slice or boolean mask support
# .get() = series.get('label') - no slice support, provides default, forgiving

In [175]:
# Selection by Position
# [] = idx'ing series[0] - slices, callables, boolean masks
# .loc[] = series.loc['0] - slices, callables, boolean masks
# dot access /NOPE NA, for Selection by Position
# .get() = series.get(0) - no slice support, provides default, forgiving

### Skill Challenge
1. Create a Series of length containing the squares of integers from 0 to 99. Assign it to the variable squares.

In [205]:
squares = pd.Series(data=[i**2 for i in range(100)])

In [208]:
squares

0        0
1        1
2        4
3        9
4       16
5       25
6       36
7       49
8       64
9       81
      ... 
90    8100
91    8281
92    8464
93    8649
94    8836
95    9025
96    9216
97    9409
98    9604
99    9801
Length: 100, dtype: int64

In [195]:
# 2. Extract the Last three items from the squares series using the square bracket indexing.
squares[-3:]

97    9409
98    9604
99    9801
dtype: int64

In [196]:
# 2. Extract the Last three items from the squares series using the .tail() method.
squares.tail(n=3)

97    9409
98    9604
99    9801
dtype: int64

In [197]:
squares_indexing = squares[-3:]

In [198]:
squares_tail = squares.tail(n=3)

In [200]:
# 4. verify that the output from 2. and 3. are equal using the .equals() method.
squares_indexing.equals(squares_tail)

True

In [211]:
squares[-3:].equals(squares.tail(3)) # instead of assiging each methods results to a var

True