<a href="https://colab.research.google.com/github/jack-cao-623/python_learning/blob/main/series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Series
- *object* from Pandas library
- Series is 1D labelled array, or a single column of data

In [4]:
# libraries needed
import numpy as np
import pandas as pd

## Create a Pandas Series object from a Python list
- Series is a object; 1D labelled array
- Python list: can store elements in order; each element has a numeric position in line, i.e., the index
- Python dictionary: able to establish associations between keys and value, key:value pairs
- Series combines best of a list and a dictionary in a single object; equivalent to a column of data in a spreadsheet; preserve a numeric identifier to each element; but identifier can also be labelled with an identifier, like a dictionary
- Series also gives us methods

In [5]:
# list
ice_cream_flavors = ['Chocolate', 'Vanilla', 'Strawberry', 'Rum Raisin']

In [6]:
# create a Series object with ice_cream_flavors
pd.Series(data = ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [7]:
# pd.Series() is a class; blueprint for creating objects
# dtype means data type
  # tells us the type of data for elements in series; ice cream flavors is a str (string); referred to as an object
  # object covers more than strings

In [8]:
# pull out index
pd.Series(data = ice_cream_flavors).index

RangeIndex(start=0, stop=4, step=1)

In [9]:
# index defaults to 0, 1, 2, 3...ascending numeric
# this is like a list...don't get the dictionary benefit, see bullet point in title

In [10]:
# create another Series
lottery = [15, 3, 14, 6, 9, 10]
pd.Series(data = lottery)

0    15
1     3
2    14
3     6
4     9
5    10
dtype: int64

In [11]:
# dtype doesn't refer to index; it refers to the elements

In [12]:
registrations = [True, False, False, True, True]
pd.Series(registrations)

0     True
1    False
2    False
3     True
4     True
dtype: bool

In [13]:
# dtype doesn't refer to index; it refers to the elements

In [14]:
# convention is to include arguments, not data = ...; just put in the arguments; fewer keystrokes

In [15]:
# a Series is equivalent to 1 column of data; Dataframe is a bunch of Series glued together with common index

## Create a Pandas Series object with a dictionary
- key takeaway: index of a Series doesn't need to be numeric

In [16]:
# dictionary
sushi = {
    'salmon':'orange',
    'tuna':'red',
    'eel':'brown'
}
print(sushi)

{'salmon': 'orange', 'tuna': 'red', 'eel': 'brown'}


In [17]:
# create series
pd.Series(sushi)

salmon    orange
tuna         red
eel        brown
dtype: object

In [18]:
# remember dtype: object reers to data type of the values (orange, red, brown)

In [19]:
# dictionary is unordered; but pd.Series(sushi) is ordered and labeled
print(pd.Series(sushi).iloc[0])
print(pd.Series(sushi).loc['salmon'])

orange
orange


In [20]:
# Pandas Series lets you have duplicated labels for indices; can't do this in a dictionary

In [21]:
# create the same Series object but without a dictionary
pd.Series(
    data = ['orange', 'red', 'brown'],       # dictionary value equivalent
    index = ['salmon', 'tuna', 'eel']        # dictionary key equivalent
)

salmon    orange
tuna         red
eel        brown
dtype: object

## Intro to Methods
- method is a directive, command, behavior that we can have an object do for us

In [22]:
# example method: capitalize 'hello'
'hello'.upper()

'HELLO'

In [23]:
# upper() is an example method

In [24]:
# strings can't be mutated by methods, e.g.:
a = 'hello'
print(a.upper()) # 'HELLO'
print(a)         # 'hello'

HELLO
hello


In [25]:
# above, .upper() method doesn't mutate or change a

In [26]:
# but some methods may modify the object it's evoked upon
b = [1, 2, 3]
b.append(4)   
print(b)           # [1, 2, 3, 4]

[1, 2, 3, 4]


In [27]:
# above, .append() method changes the list, b

In [28]:
# create a Pandas Series
prices = pd.Series([2.99, 4.45, 1.36])
prices

0    2.99
1    4.45
2    1.36
dtype: float64

In [29]:
# add up values
prices.sum()

8.8

In [30]:
# multiply the values
prices.product()

18.095480000000006

In [31]:
print(2.99 * 4.45 * 1.36)

18.095480000000006


In [32]:
# find mean of the three values
prices.mean()

2.9333333333333336

In [33]:
# another way of finding the average
prices.sum() / len(prices)

2.9333333333333336

In [34]:
# the above methods don't require paremeters/arguments; other methods do require arguments

## Intro to attributes
- attribute is a detail/characteristic/fact/property that an object can tell us about itself
- method is a behavior or command, akin to a verb
- attribute is akin to an adjective

In [35]:
# create a Series
adjectives = pd.Series(['smart', 'handsome', 'charming', 'brilliant', 'humble', 'smart'])
print(adjectives)

0        smart
1     handsome
2     charming
3    brilliant
4       humble
5        smart
dtype: object


In [36]:
# how many elements are in Series
adjectives.size

6

In [37]:
# whether each element is unique, i.e., no duplicates
adjectives.is_unique

False

In [38]:
# methods have parentheses, e.g., series.method()
# attributes don't have parentheses, e.g., series.attribute
# when you do series. and wait, methods are indicated by box, attributes by wrench

In [39]:
# pull out values; Series consists of index and values; this is an attribute
adjectives.values

array(['smart', 'handsome', 'charming', 'brilliant', 'humble', 'smart'],
      dtype=object)

In [40]:
# above, values is a numpy array; pandas is built top of numpy
type(adjectives.values)

numpy.ndarray

In [41]:
# pull out index
adjectives.index

RangeIndex(start=0, stop=6, step=1)

In [42]:
type(adjectives.index)
# index is from pandas

pandas.core.indexes.range.RangeIndex

In [43]:
# data type
adjectives.dtype
  # 'O' for object

dtype('O')

In [44]:
prices.dtype

dtype('float64')

## Parameters and arguments
- Paramter is generic name, i.e., left side of equal sign inside a .method()
- Argument is specific/conrete value, i.e., right side of equal sign inside a .method()

E.g., difficulty is a parameter; easy, medium, hard are potentil arguments for the difficulty parameter

In [45]:
# create two lists
fruits = ['apple', 'banana', 'orange', 'blueberry', 'raspberry']
day_of_week = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']

In [46]:
# create series where fruits is the value and day_of_week is index
pd.Series(
    data = fruits,
    index = day_of_week
)

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [47]:
# above, data and index are parameters; fruits and day_of_week are arguments

In [48]:
# equivalent to the above
pd.Series(fruits, day_of_week)
  # first paramter is data; second parameter is index; decided by pandas development team

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [49]:
# can you switch it around?
pd.Series(index = day_of_week, data = fruits)
  # yes, just like R

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [50]:
# but convention is just to type the arguments, not the parameters and equal signs
# i.e., just right side and rely on position/sequence
# ORDER MATTERS!

In [51]:
pd.Series(day_of_week, fruits) # weekday is the data, fruits is the index
# ORDER MATTERS!

apple        Mon
banana       Tue
orange       Wed
blueberry    Thu
raspberry    Fri
dtype: object

In [52]:
# but when you're explicit about parameters, order doesn't matter
pd.Series(
    data = fruits,
    index = day_of_week
)

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [53]:
# same as above
pd.Series(
    index = day_of_week,
    data = fruits
)

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [54]:
# being explicit is known as keyword paramters or keyword arguments

In [55]:
# can also mix and match; include some keywords but not others
pd.Series(fruits, index = day_of_week)

Mon        apple
Tue       banana
Wed       orange
Thu    blueberry
Fri    raspberry
dtype: object

In [56]:
# pandas Series can have duplicate indices, unlike Python dictionary
pd.Series(
    data = [1, 2, 3, 3],
    index = ['a', 'b', 'c', 'c']
)

a    1
b    2
c    3
c    3
dtype: int64

In [57]:
# data and index arguments must be of the same length

## Import Pandas Series with pd.read_csv() function

In [58]:
pd.read_csv('https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/pokemon.csv')

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass
1,Ivysaur,Grass
2,Venusaur,Grass
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
716,Yveltal,Dark
717,Zygarde,Dragon
718,Diancie,Rock
719,Hoopa,Psychic


In [59]:
# above is not a Series; it's a DataFrame

In [60]:
pokemon = (
    pd.read_csv(
      'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/pokemon.csv',
      usecols = ['Pokemon']  # only import these columns; must be a list
    )              # this is a dataframe with one column
    .squeeze()     # convert dataframe to series
)

pokemon

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [61]:
# elipsis ... indicates gap in data; still there, but not showing it; otherwise, entire screen will be taken up

In [62]:
# another example
google = (
    pd.read_csv(
        'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/google_stock_price.csv',
        usecols = ['Stock Price']    # only read in this columns
    )
    .squeeze(axis = 'columns')  # convert to Series
)

google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

## .head() and .tail() method on a Series
- purpose is to preview first or last n entries
- returns a view
- copies vs. views becomes an issue when cleaning/manipulating/wrangling data

In [63]:
google.head(n = 5)     # view first entries in google

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [64]:
google.tail(n = 5)     # view last entries in google

3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

In [65]:
# view last 10 rows of pokemon
pokemon.tail(n = 10)

711     Bergmite
712      Avalugg
713       Noibat
714      Noivern
715      Xerneas
716      Yveltal
717      Zygarde
718      Diancie
719        Hoopa
720    Volcanion
Name: Pokemon, dtype: object

In [66]:
# above is equivalent to:
pokemon[-10:]

711     Bergmite
712      Avalugg
713       Noibat
714      Noivern
715      Xerneas
716      Yveltal
717      Zygarde
718      Diancie
719        Hoopa
720    Volcanion
Name: Pokemon, dtype: object

## Passing Series to Python built-in functions

In [67]:
# number of elements in series, null and non-null
len(pokemon)

721

In [68]:
len(pd.Series([1, np.nan, 2])) # length of 3

3

In [69]:
type(pokemon) # pokemon is a series

pandas.core.series.Series

In [70]:
dir(pokemon)[:5]  # returns a list of methods and attributes available for pokemon
# equivalent to pokemon.[shift + tab]

['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_REVERSED', '_AXIS_TO_AXIS_NUMBER']

In [71]:
sorted(pokemon)[:5]  # sort pokemon in alphabetical order; returns a list

['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aegislash']

In [72]:
type(sorted(pokemon)) # this is a list; nested

list

In [73]:
# sort google
sorted(google)[:5] # first 5 elements

[49.95, 50.07, 50.12, 50.7, 50.74]

In [74]:
# convert pokemon series to a list
list(pokemon)[:5]

['Bulbasaur', 'Ivysaur', 'Venusaur', 'Charmander', 'Charmeleon']

In [75]:
# convert pokemon series to a dicitionary where keys are indices and elements are values
dict(pokemon) ## super long don't run

{0: 'Bulbasaur',
 1: 'Ivysaur',
 2: 'Venusaur',
 3: 'Charmander',
 4: 'Charmeleon',
 5: 'Charizard',
 6: 'Squirtle',
 7: 'Wartortle',
 8: 'Blastoise',
 9: 'Caterpie',
 10: 'Metapod',
 11: 'Butterfree',
 12: 'Weedle',
 13: 'Kakuna',
 14: 'Beedrill',
 15: 'Pidgey',
 16: 'Pidgeotto',
 17: 'Pidgeot',
 18: 'Rattata',
 19: 'Raticate',
 20: 'Spearow',
 21: 'Fearow',
 22: 'Ekans',
 23: 'Arbok',
 24: 'Pikachu',
 25: 'Raichu',
 26: 'Sandshrew',
 27: 'Sandslash',
 28: 'Nidoran',
 29: 'Nidorina',
 30: 'Nidoqueen',
 31: 'Nidoran♂',
 32: 'Nidorino',
 33: 'Nidoking',
 34: 'Clefairy',
 35: 'Clefable',
 36: 'Vulpix',
 37: 'Ninetales',
 38: 'Jigglypuff',
 39: 'Wigglytuff',
 40: 'Zubat',
 41: 'Golbat',
 42: 'Oddish',
 43: 'Gloom',
 44: 'Vileplume',
 45: 'Paras',
 46: 'Parasect',
 47: 'Venonat',
 48: 'Venomoth',
 49: 'Diglett',
 50: 'Dugtrio',
 51: 'Meowth',
 52: 'Persian',
 53: 'Psyduck',
 54: 'Golduck',
 55: 'Mankey',
 56: 'Primeape',
 57: 'Growlithe',
 58: 'Arcanine',
 59: 'Poliwag',
 60: 'Poliwhirl',


In [76]:
# return highest and lowest stock values
print(google.max())
print(google.min())

782.22
49.95


In [77]:
# above is equivalent to:
print(max(google))
print(min(google))

782.22
49.95


## The .sort_values() method

In [78]:
# sort pokemon alphabetically
pokemon.sort_values()

459    Abomasnow
62          Abra
358        Absol
616     Accelgor
680    Aegislash
         ...    
570      Zoroark
569        Zorua
40         Zubat
633     Zweilous
717      Zygarde
Name: Pokemon, Length: 721, dtype: object

In [79]:
# method chaining
(
    pokemon
      .sort_values()   # sort alphabetically
      .head()          # view first entries
)

459    Abomasnow
62          Abra
358        Absol
616     Accelgor
680    Aegislash
Name: Pokemon, dtype: object

In [80]:
# sort in descending order
(
    pokemon
      .sort_values(ascending = False)    # reverse alphabetical order
      .head(n = 5)
)

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

In [81]:
# sort google stock prices
(
    google
      .sort_values(ascending = True)
)

11       49.95
9        50.07
0        50.12
10       50.70
12       50.74
         ...  
3010    771.61
3007    772.88
3009    773.18
2859    776.60
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [82]:
# note that index moves with the sort
# 11 was in position 11

In [83]:
# from highest to lowest stock price
(
    google
      .sort_values(ascending = False)
)

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
         ...  
12       50.74
10       50.70
0        50.12
9        50.07
11       49.95
Name: Stock Price, Length: 3012, dtype: float64

## The sort_index() method

In [84]:
# pokemon series, but pokemon is index and type is entry
pokemon2 = (
    pd.read_csv(
        'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/pokemon.csv',
        index_col = 'Pokemon'
    )
      .squeeze(axis = 'columns')  # convert DataFrame to Series
)

pokemon2

Pokemon
Bulbasaur       Grass
Ivysaur         Grass
Venusaur        Grass
Charmander       Fire
Charmeleon       Fire
               ...   
Yveltal          Dark
Zygarde        Dragon
Diancie          Rock
Hoopa         Psychic
Volcanion        Fire
Name: Type, Length: 721, dtype: object

In [85]:
# sort index
pokemon2.sort_index(ascending = True)

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Accelgor         Bug
Aegislash      Steel
              ...   
Zoroark         Dark
Zorua           Dark
Zubat         Poison
Zweilous        Dark
Zygarde       Dragon
Name: Type, Length: 721, dtype: object

In [86]:
# sort reverse alphabetical order
(
    pokemon2
      .sort_index(ascending = False)
)

Pokemon
Zygarde       Dragon
Zweilous        Dark
Zubat         Poison
Zorua           Dark
Zoroark         Dark
              ...   
Aegislash      Steel
Accelgor         Bug
Absol           Dark
Abra         Psychic
Abomasnow      Grass
Name: Type, Length: 721, dtype: object

## Check for Inclusion with Python's in keyword

In [87]:
pokemon # we'll work this series

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [88]:
# is 'car' in 'racecar'
'car' in 'racecar'

True

In [89]:
# is 2 in [1, 2, 3]
2 in [1, 2, 3]

True

In [90]:
# by default, in checks in series index, not series values
'Bulbasaur' in pokemon
  # False because 'Bulbasaur' isn't in the index

False

In [91]:
100 in pokemon
  # True because 100 is an index in pokemon Series

True

In [92]:
# above is equivalent to:
100 in pokemon.index

True

In [93]:
# check if 'Bulbasaur' is in pokemon values
'Bulbasaur' in pokemon.values

True

In [94]:
# check if 'Mewtwo' is in pokemon
'Mewtwo' in pokemon.values

True

In [95]:
# check if 'Darth Vader' is in pokemon
'Darth Vader' in pokemon.values

False

## Exract Series Value by Index Position

In [96]:
# extract value in first index
pokemon[0]

'Bulbasaur'

In [97]:
# above is equivalent to
pokemon.iloc[0]

'Bulbasaur'

In [98]:
# slices
pokemon[0:10] # 0 thru 9

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Pokemon, dtype: object

In [99]:
# above is equivalent to:
pokemon[:10]

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Pokemon, dtype: object

In [100]:
# get 51 thru 60
pokemon[51:61]

51       Meowth
52      Persian
53      Psyduck
54      Golduck
55       Mankey
56     Primeape
57    Growlithe
58     Arcanine
59      Poliwag
60    Poliwhirl
Name: Pokemon, dtype: object

In [101]:
# pull out values at position 100, 200, and 151
pokemon[[100, 200, 151]]

100    Electrode
200        Unown
151    Chikorita
Name: Pokemon, dtype: object

In [102]:
# above is equivalent to
pokemon.iloc[[100, 200, 151]]

100    Electrode
200        Unown
151    Chikorita
Name: Pokemon, dtype: object

In [103]:
# more slicing
pokemon[17:87]

17       Pidgeot
18       Rattata
19      Raticate
20       Spearow
21        Fearow
         ...    
82    Farfetch'd
83         Doduo
84        Dodrio
85          Seel
86       Dewgong
Name: Pokemon, Length: 70, dtype: object

In [104]:
# last 13 values
pokemon[-13:]

708    Trevenant
709    Pumpkaboo
710    Gourgeist
711     Bergmite
712      Avalugg
713       Noibat
714      Noivern
715      Xerneas
716      Yveltal
717      Zygarde
718      Diancie
719        Hoopa
720    Volcanion
Name: Pokemon, dtype: object

In [105]:
# above is equivalen to
pokemon.tail(n = 13)

708    Trevenant
709    Pumpkaboo
710    Gourgeist
711     Bergmite
712      Avalugg
713       Noibat
714      Noivern
715      Xerneas
716      Yveltal
717      Zygarde
718      Diancie
719        Hoopa
720    Volcanion
Name: Pokemon, dtype: object

In [106]:
# last element of pokemon
pokemon.iloc[-1]

'Volcanion'

In [107]:
# above is equivalento to
pokemon[-1:]

720    Volcanion
Name: Pokemon, dtype: object

In [108]:
#pokemon[-1] # won't work

## Extra Series value by Index Label

In [109]:
pokemon2

Pokemon
Bulbasaur       Grass
Ivysaur         Grass
Venusaur        Grass
Charmander       Fire
Charmeleon       Fire
               ...   
Yveltal          Dark
Zygarde        Dragon
Diancie          Rock
Hoopa         Psychic
Volcanion        Fire
Name: Type, Length: 721, dtype: object

In [110]:
# pokemon type for 'Bulbasaur'
pokemon2['Bulbasaur']

'Grass'

In [111]:
# above is equivalent to:
pokemon2.loc['Bulbasaur']

'Grass'

In [112]:
# can also do integer position
pokemon2[0]

'Grass'

In [113]:
# above, we don't see 0 or other numbers for index, but they're "there"

In [114]:
# pull out multiple parts from pokemon2
pokemon2[['Mewtwo', 'Mew', 'Charizard']]

Pokemon
Mewtwo       Psychic
Mew          Psychic
Charizard       Fire
Name: Type, dtype: object

In [115]:
# every index has a label and a numeric position; can use label or numeric position

## The get method
- similar to .iloc[] and .loc[]
- advantage is there's a default parameter that outputs argument when index isn't in series

In [116]:
pokemon2.get(key = 'Bulbasaur', default = 'Not in Series')

'Grass'

In [117]:
pokemon2.get(key = [0, 5, 10])  # in index 0, 5, and 10

Pokemon
Bulbasaur    Grass
Charizard     Fire
Metapod        Bug
Name: Type, dtype: object

In [118]:
# above is equivalent to:
pokemon2.iloc[[0, 5, 10]]

Pokemon
Bulbasaur    Grass
Charizard     Fire
Metapod        Bug
Name: Type, dtype: object

In [119]:
pokemon2.get(key = ['Charizard', 'Mew'], default = 'Does not exist')

Pokemon
Charizard       Fire
Mew          Psychic
Name: Type, dtype: object

In [120]:
# above is equivalen to:
pokemon2.loc[['Charizard', 'Mew']]

Pokemon
Charizard       Fire
Mew          Psychic
Name: Type, dtype: object

In [121]:
# some indices are in series; others aren't
pokemon2.get(key = ['Charizard', 'Mew', 'Darth Vader'], default = 'Does not exist')

'Does not exist'

In [122]:
# above, Charizard and Mew are indices in the series, but Darth Vader isn't

## Override a Series value

In [123]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [124]:
# change 'Bulbasaur' to 'bulbasaur', i.e., make it lowecase
pokemon.iloc[0] = pokemon.values[0].lower()

In [125]:
pokemon.head()

0     bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [126]:
# change Charmander, Charmeleon, and Charizard to charmander, charmeleon,and charizard
pokemon.iloc[3:6] = ['charmander', 'charmeleon', 'charizard']

In [127]:
print(pokemon.iloc[3:6])

3    charmander
4    charmeleon
5     charizard
Name: Pokemon, dtype: object


In [128]:
pokemon.head(n = 10)

0     bulbasaur
1       Ivysaur
2      Venusaur
3    charmander
4    charmeleon
5     charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Pokemon, dtype: object

In [129]:
# change values for string indices

In [130]:
pokemon2.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [131]:
# Change Bulbasaur - Grass to - awesome
pokemon2['Bulbasaur'] = 'awesome'

In [132]:
pokemon2.head()

Pokemon
Bulbasaur     awesome
Ivysaur         Grass
Venusaur        Grass
Charmander       Fire
Charmeleon       Fire
Name: Type, dtype: object

In [133]:
# add a new entry to pokemon
pokemon[1500] = 'Darth Vader'

In [134]:
pokemon.tail()

717         Zygarde
718         Diancie
719           Hoopa
720       Volcanion
1500    Darth Vader
Name: Pokemon, dtype: object

## The .copy() method

In [135]:
# import pokemon as a dataframe
pokemon_df = (
    pd.read_csv(
        'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/pokemon.csv',
        usecols = ['Pokemon']
    )
)

In [136]:
pokemon_df.head()

Unnamed: 0,Pokemon
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon


In [137]:
# view of pokemon_df as a series; view, not a copy, in other words, a persepctive
pokemon_series = pokemon_df.squeeze(axis = 'columns')

In [138]:
pokemon_series.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [139]:
# in pokemon_series, change 'Bulbasaur' to 'Whatever'
pokemon_series[0] = 'Whatever'
pokemon_series.head(n = 1)

0    Whatever
Name: Pokemon, dtype: object

In [140]:
# Bulbasaur has changed to Whatever in pokemon_df
pokemon_df.head()

Unnamed: 0,Pokemon
0,Whatever
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon


In [141]:
# reason for this behavior is pokemon_series is a view on pokemon_df
# pokemon_df is the house; pokemon_series is a door to the house; view is just super close to it so you don't see the full house
# change the door, change the house
# changing pokemon_series introduces side effects; mutated pokemon_df

In [142]:
# here's how you only change pokemon_series

# reimport pokemon_df
pokemon_df = (
    pd.read_csv(
        'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/pokemon.csv',
        usecols = ['Pokemon']
    )
)

# make pokemon_series, but this time a copy using .copy() method
pokemon_series = pokemon_df.squeeze(axis = 'columns').copy()

In [143]:
# make change to pokemon_series
pokemon_series[0] = 'Whatever'
pokemon_series.get(key = 0)

'Whatever'

In [144]:
# change didn't impact pokemon_df
pokemon_df.head(n = 1)

Unnamed: 0,Pokemon
0,Bulbasaur


## The inplace parameter
- In a method call, there's a parameter called inplace; if set to True, it modifies the underlying object
- Equivalent to reassigning to variable --> this way is encouaraged
- inplace = True will be deprecated; discouraged from using this

In [145]:
# see here:
# https://github.com/jack-cao-623/python_learning/blob/main/why_you_should_never_use_inplace_true.ipynb

In [149]:
# series
fb_stock = pd.Series(
    data = [100, 120, 115, 120, 150, 90]
)

print(fb_stock)


0    100
1    120
2    115
3    120
4    150
5     90
dtype: int64


In [150]:
# sort values
fb_stock = fb_stock.sort_values()
print(fb_stock)

5     90
0    100
2    115
1    120
3    120
4    150
dtype: int64


In [151]:
# above is equivalent to; but above is preferred; 
# inplace = True is discourage; see other notebook linked to abve
fb_stock = pd.Series(
    data = [100, 120, 115, 120, 150, 90]
)

fb_stock.sort_values(inplace = True)

print(fb_stock)

5     90
0    100
2    115
1    120
3    120
4    150
dtype: int64


In [153]:
# create dataframe
google_df = pd.DataFrame(
    {
        'stock': [0, 3, 2, 1]
    }
)

print(google_df)

   stock
0      0
1      3
2      2
3      1


In [156]:
# this results in an error
(
    google_df
      .squeeze(axis = 'columns')    # view as a series
      .sort_values(inplace = True)  # sort; is this what it means that you can't chain?
)

ValueError: ignored

## Math Methods on Series objects

In [157]:
# series
google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [159]:
# .count() method counts the number of non-null/non-missing values
google.count()

3012

In [160]:
# .sum() method adds up series values
google.sum()

1006942.0

In [161]:
# .mean() method tells you average of series values
google.mean()

334.3100929614874

In [162]:
# above is equivalent to:
google.sum() / google.count()

334.3100929614874

In [163]:
# .product() multiples series values together
google.product()

inf

In [164]:
# .std() method calculates standard deviation
google.std()

173.18720477113115

In [165]:
# .min() method gives you smallest values in series
google.min()

49.95

In [166]:
# .max() method gives you largest value in series
google.max()

782.22

In [167]:
# .median() method gives you median value in series
google.median()

283.315

In [168]:
# .mode() method gives you the mode
google.mode()

0    291.21
dtype: float64

In [173]:
# above is equivalent to
google.value_counts().index[0]

291.21

In [174]:
# get a bunch of summary statistics
google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64