# Series

### Pandas libraries

In [1]:
import pandas as pd
from pandas import DataFrame, Series

### 1. Create a Series object from a Python List

In [7]:
# one dimension array 
# more powerfull version of a Python list
# one column information and store sequence data
# Series = List + Dict
# dtype : object -> String

ice_cream = ['chocolate', 'vanilla', 'strawberry', 'rum raisin']


pd.Series(ice_cream)

0     chocolate
1       vanilla
2    strawberry
3    rum raisin
dtype: object

#### Using integers

In [9]:
# index is auto assigned as numeric starting at zero (left column)
lottery = [4, 8, 15, 16, 23, 42]

pd.Series(lottery)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

#### Using booleans

In [11]:
registrations = [True, False, False, False, True]

pd.Series(registrations)

0     True
1    False
2    False
3    False
4     True
dtype: bool

### 2. Create a Series object from a Python Dictionary

In [18]:
# dictionary keys are used as index for the Series

person = {
    "name": "Alex",
    "age": 40,
    "city": "Suwanee",
    "state": "GA"    
}

pd.Series(person)

name        Alex
age           40
city     Suwanee
state         GA
dtype: object

### 3. Attributes

In [25]:
names = ['Alex', 'John', 'Max', 'Todd', 'Matthew']

s = pd.Series(names)


In [27]:
# Series values
s.values

array(['Alex', 'John', 'Max', 'Todd', 'Matthew'], dtype=object)

In [28]:
# Series index
s.index

RangeIndex(start=0, stop=5, step=1)

In [30]:
# Series type
s.dtype

dtype('O')

### 4. Methods

In [31]:
prices = [2.99, 4.45, 1.36]

s = pd.Series(prices)

In [33]:
# sum method
s.sum()

8.8

In [34]:
# product method
s.product()

18.095480000000006

In [36]:
# min / max methods
s.min()
s.max()

1.36

### 5. Parameters and arguments

In [38]:
# the size of the list of data and the list of index must be the same
fruits = ['Apple', 'Orange', 'Plum', 'Grape', 'Blueberry']

weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

pd.Series(data=fruits, index=weekdays)

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [39]:
# index value can be duplicated
# note that with duplicate indexes, some operations might not work

fruits = ['Apple', 'Orange', 'Plum', 'Grape', 'Blueberry', 'Watermelon']

weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday']

pd.Series(data=fruits, index=weekdays)

Monday            Apple
Tuesday          Orange
Wednesday          Plum
Thursday          Grape
Friday        Blueberry
Monday       Watermelon
dtype: object

### 6. Import series with read_csv method

In [92]:
# read_csv method returns a dataframe by default. To force the return to be a Series object, use squeeze = True

pokemon = pd.read_csv('../resources/pokemon.csv',usecols=['Pokemon'], squeeze = True)

google = pd.read_csv('../resources/google_stock_price.csv', squeeze = True)

In [54]:
# name of the series - use the header of the csv file
pokemon.name

'Pokemon'

### 7. Head and Tail methods

In [57]:
pokemon.head(5)

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [58]:
reduced_pokemon = pokemon.head(10)

In [60]:
last_line = google.tail(1)

last_line

3011    782.22
Name: Stock Price, dtype: float64

In [61]:
type(last_line)

pandas.core.series.Series

### 8. Python Built-In Functions

In [66]:
# len 
# same as 
# pokemon.size
len(pokemon)

721

In [68]:
# sort
# see also :
# pokemon.sort_values, pokemon.sort_index

sorted(pokemon)

['Abomasnow',
 'Abra',
 'Absol',
 'Accelgor',
 'Aegislash',
 'Aerodactyl',
 'Aggron',
 'Aipom',
 'Alakazam',
 'Alomomola',
 'Altaria',
 'Amaura',
 'Ambipom',
 'Amoonguss',
 'Ampharos',
 'Anorith',
 'Arbok',
 'Arcanine',
 'Arceus',
 'Archen',
 'Archeops',
 'Ariados',
 'Armaldo',
 'Aromatisse',
 'Aron',
 'Articuno',
 'Audino',
 'Aurorus',
 'Avalugg',
 'Axew',
 'Azelf',
 'Azumarill',
 'Azurill',
 'Bagon',
 'Baltoy',
 'Banette',
 'Barbaracle',
 'Barboach',
 'Basculin',
 'Bastiodon',
 'Bayleef',
 'Beartic',
 'Beautifly',
 'Beedrill',
 'Beheeyem',
 'Beldum',
 'Bellossom',
 'Bellsprout',
 'Bergmite',
 'Bibarel',
 'Bidoof',
 'Binacle',
 'Bisharp',
 'Blastoise',
 'Blaziken',
 'Blissey',
 'Blitzle',
 'Boldore',
 'Bonsly',
 'Bouffalant',
 'Braixen',
 'Braviary',
 'Breloom',
 'Bronzong',
 'Bronzor',
 'Budew',
 'Buizel',
 'Bulbasaur',
 'Buneary',
 'Bunnelby',
 'Burmy',
 'Butterfree',
 'Cacnea',
 'Cacturne',
 'Camerupt',
 'Carbink',
 'Carnivine',
 'Carracosta',
 'Carvanha',
 'Cascoon',
 'Castform',


In [69]:
# convert to a list or to a dict
list(pokemon)
dict(pokemon)

{0: 'Bulbasaur',
 1: 'Ivysaur',
 2: 'Venusaur',
 3: 'Charmander',
 4: 'Charmeleon',
 5: 'Charizard',
 6: 'Squirtle',
 7: 'Wartortle',
 8: 'Blastoise',
 9: 'Caterpie',
 10: 'Metapod',
 11: 'Butterfree',
 12: 'Weedle',
 13: 'Kakuna',
 14: 'Beedrill',
 15: 'Pidgey',
 16: 'Pidgeotto',
 17: 'Pidgeot',
 18: 'Rattata',
 19: 'Raticate',
 20: 'Spearow',
 21: 'Fearow',
 22: 'Ekans',
 23: 'Arbok',
 24: 'Pikachu',
 25: 'Raichu',
 26: 'Sandshrew',
 27: 'Sandslash',
 28: 'Nidoran',
 29: 'Nidorina',
 30: 'Nidoqueen',
 31: 'Nidoran♂',
 32: 'Nidorino',
 33: 'Nidoking',
 34: 'Clefairy',
 35: 'Clefable',
 36: 'Vulpix',
 37: 'Ninetales',
 38: 'Jigglypuff',
 39: 'Wigglytuff',
 40: 'Zubat',
 41: 'Golbat',
 42: 'Oddish',
 43: 'Gloom',
 44: 'Vileplume',
 45: 'Paras',
 46: 'Parasect',
 47: 'Venonat',
 48: 'Venomoth',
 49: 'Diglett',
 50: 'Dugtrio',
 51: 'Meowth',
 52: 'Persian',
 53: 'Psyduck',
 54: 'Golduck',
 55: 'Mankey',
 56: 'Primeape',
 57: 'Growlithe',
 58: 'Arcanine',
 59: 'Poliwag',
 60: 'Poliwhirl',


In [71]:
# max and min functions
max(pokemon)

'Zygarde'

In [73]:
# min function

min(google)

49.95

### 9. Additional Attributes

In [74]:
# check for unique values

pokemon.is_unique

True

In [75]:
google.is_unique

False

In [76]:
# number of dimensions
pokemon.ndim

1

In [78]:
# shape (rows x columns)

pokemon.shape

(721,)

In [79]:
# size - counts nulls values

google.size

3012

### 10. Sort values

In [87]:
# sort by pokemon name

pokemon.sort_values(ascending = True).head(5)

459    Abomasnow
62          Abra
358        Absol
616     Accelgor
680    Aegislash
Name: Pokemon, dtype: object

### 11. Inplace operations

In [91]:
google.sort_values(ascending = False, inplace = True)

google.head(5)

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
Name: Stock Price, dtype: float64

### 12. Sort Index

In [99]:
pokemon.sort_values(ascending = False, inplace = True)
pokemon.head(5)

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

In [101]:
# sort by index put the series in the original shape

pokemon.sort_index(ascending = True, inplace=True)
pokemon.head(5)

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

### 13. in Keyword

In [103]:
pokemon.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [106]:
# by default, Pandas look at the index and not for the values

"Bulbasaur" in pokemon

False

In [108]:
2 in pokemon
2 in pokemon.index

True

In [109]:
# in order to solve it, you need to use Series.values method

"Bulbasaur" in pokemon.values

True

### 14. Extract values by Index Position

In [111]:
# using bracket 

pokemon[10]

'Metapod'

In [112]:
# using a list

pokemon [[100, 200, 300]]

100    Electrode
200        Unown
300     Delcatty
Name: Pokemon, dtype: object

In [113]:
# slice

pokemon [50:55]

50    Dugtrio
51     Meowth
52    Persian
53    Psyduck
54    Golduck
Name: Pokemon, dtype: object

### 15. Extract values by Index Label

In [115]:
# reloading pokemon series to use a different index first

pokemon = pd.read_csv('../resources/pokemon.csv', index_col='Pokemon', squeeze = True)

pokemon.head(5)

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [116]:
# even using string index, you still can use the index numeric to extract values

pokemon[0]

'Grass'

In [117]:
# using the string value

pokemon['Venusaur']

'Grass'

In [119]:
# using a list to extract values

pokemon [['Charmeleon', 'Ivysaur']]

Pokemon
Charmeleon     Fire
Ivysaur       Grass
Name: Type, dtype: object

In [120]:
# using a list to extract values that does not exists
# single values returns an error


pokemon [['Charmeleon', 'Ivysaur', 'DOES_NOT_EXISTS']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


Pokemon
Charmeleon          Fire
Ivysaur            Grass
DOES_NOT_EXISTS      NaN
Name: Type, dtype: object

### 16. Extract values by get method

In [122]:
# always try to keep the index sorted in order to help with performance
pokemon.sort_index(inplace=True)
pokemon.head(5)

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Accelgor         Bug
Aegislash      Steel
Name: Type, dtype: object

In [123]:
# Using get method
pokemon.get('Abomasnow')

'Grass'

In [125]:
# Using a list
pokemon.get(['Abomasnow', 'Aegislash'])

Pokemon
Abomasnow    Grass
Aegislash    Steel
Name: Type, dtype: object

In [129]:
# Passing a not not valid key
# default not found only works for single arguments

pokemon.get('NOT_FOUND', default='This is not a pokemon')

'This is not a pokemon'

### 17. Math methods on Series Objects

In [131]:
google.head(5)

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [133]:
# count
# count exclude NaN values / len does not exclude

google.count()

3012

In [134]:
# sum

google.sum()

1006942.0

In [136]:
# mean

google.mean()

334.31009296148744

In [138]:
# standard deviation

google.std()

173.18720477113106

In [139]:
# max

google.max()

782.22

In [140]:
# min

google.min()

49.95

In [141]:
# median

google.median()

283.315

In [142]:
# mode

google.mode()

0    291.21
dtype: float64

In [143]:
# describe - stats summary

google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

### 18. idxmax and idxmin Methods

In [147]:
s_max = google.max()
s_min = google.min()

print (f"Max : {s_max} \t Min : {s_min}")

Max : 782.22 	 Min : 49.95


In [148]:
# return the index position that is storing the max/min vlaues

i_max = google.idxmax()
i_min = google.idxmin()

print (f"Max : {i_max} \t Min : {i_min}")

Max : 3011 	 Min : 11


In [150]:
s_max_value = google[3011]
s_min_value = google[11]

print (f"Max : {s_max_value} \t Min : {s_min_value}")

Max : 782.22 	 Min : 49.95


In [154]:
# just in one line

print (f"Max : {google[google.idxmax()]} \t Min : {google[google.idxmin()]}")

Max : 782.22 	 Min : 49.95


### 19. The .value_counts() Method

In [157]:
pokemon.head(5)

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Accelgor         Bug
Aegislash      Steel
Name: Type, dtype: object

In [156]:
# count of the unique values (distinct count)

pokemon.value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Fire         47
Psychic      47
Rock         41
Electric     36
Ground       30
Dark         28
Poison       28
Fighting     25
Dragon       24
Ghost        23
Ice          23
Steel        22
Fairy        17
Flying        3
Name: Type, dtype: int64

In [160]:
# Reversing the presentation order

pokemon.value_counts(ascending = True)

Flying        3
Fairy        17
Steel        22
Ice          23
Ghost        23
Dragon       24
Fighting     25
Poison       28
Dark         28
Ground       30
Electric     36
Rock         41
Psychic      47
Fire         47
Bug          63
Grass        66
Normal       93
Water       105
Name: Type, dtype: int64

### ***20. The .apply() Method***

In [161]:
google.head(6)

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
5    53.90
Name: Stock Price, dtype: float64

In [168]:
# create a custom function

def classify_performance(num):
    if num < 300:
        return "OK"
    
    elif num >= 300 and num < 650:
        return "Satisfactory"
    
    else:
        return "Amazing"



In [169]:
# call the apply method to call the custom function

google.apply(classify_performance).value_counts()

OK              1721
Satisfactory    1075
Amazing          216
Name: Stock Price, dtype: int64

In [171]:
# using lambda expression

google.apply(lambda stock_price : stock_price * -1).head(10)

0   -50.12
1   -54.10
2   -54.65
3   -52.38
4   -52.95
5   -53.90
6   -53.02
7   -50.95
8   -51.13
9   -50.07
Name: Stock Price, dtype: float64

## 21. The .map() Method

In [175]:
pokemon_names = pd.read_csv('../resources/pokemon.csv',usecols=['Pokemon'], squeeze = True)
pokemon_names.head(5)

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [177]:
pokemon_types = pd.read_csv('../resources/pokemon.csv',index_col='Pokemon', squeeze = True)
pokemon_types.head(5)

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [180]:
# simular to vlookup method in Excel or a join in SQL
# 
# use to join 2 datasets
#
# on pokemon_names series
#    get the first value = Bulbasaur
# look at pokemon_types series
#    return the value for the index name = Bulbasaur     

pokemon_names.map(pokemon_types).head(5)

0    Grass
1    Grass
2    Grass
3     Fire
4     Fire
Name: Pokemon, dtype: object

In [182]:
pokemon_names.map(pokemon_types).value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Fire         47
Psychic      47
Rock         41
Electric     36
Ground       30
Dark         28
Poison       28
Fighting     25
Dragon       24
Ice          23
Ghost        23
Steel        22
Fairy        17
Flying        3
Name: Pokemon, dtype: int64