# Series

In [1]:
%config Completer.use_jedi = False
%config IPCompleter.greedy=True


In [2]:
import pandas as pd
import numpy as np

## Criando um objeto `pd.Series` a partir de uma lista Python

In [3]:
countries = ["Brazil", "India", "USA", "Italy", "Germany", "France", "Argentina", "Canada", "Norway", "Portugal", "Spain"]

pd.Series(countries)

0        Brazil
1         India
2           USA
3         Italy
4       Germany
5        France
6     Argentina
7        Canada
8        Norway
9      Portugal
10        Spain
dtype: object

In [4]:
lottery = [1, 76, 54, 3, 89, 30]

pd.Series(lottery)

0     1
1    76
2    54
3     3
4    89
5    30
dtype: int64

In [5]:
valid = [True, False, False, True, False]

pd.Series(valid)

0     True
1    False
2    False
3     True
4    False
dtype: bool

## Criando um objeto `Series` a partir de um dicionário Python

In [6]:
webster = {"Oi":"Normal",
           "Oiii":"Feliz",
           "Oiiiiii":"Doidao"}

pd.Series(webster)

Oi         Normal
Oiii        Feliz
Oiiiiii    Doidao
dtype: object

### `Series` Attributes

In [7]:
about_me = ["Leonardo", "24 anos", "Palmeirense", "Analista de Dados", "Birigui-SP"]

myseries = pd.Series(about_me)

In [8]:
myseries.values

array(['Leonardo', '24 anos', 'Palmeirense', 'Analista de Dados',
       'Birigui-SP'], dtype=object)

In [9]:
myseries.index # retorna RangeIndex com inicio, fim e passo

RangeIndex(start=0, stop=5, step=1)

In [10]:
myseries.dtype # retorna o tipo de dados dos elementos da série

dtype('O')

### Métodos imbutidos em um `pd.Series`

In [11]:
prices = [1.22, 5.88, 3.99, 1.59, 10.99]
s = pd.Series(prices)
s

0     1.22
1     5.88
2     3.99
3     1.59
4    10.99
dtype: float64

In [12]:
# Alguns métodos que funcionam com séries numericas
s.sum(), s.mean(), s.median(), s.product()

(23.67, 4.734, 3.99, 500.1552930024)

### Parâmetros e argumentos de um `pd.Series`

In [13]:
# Parâmetro - Argumento
# Volume - 1 a 10
# Canal - 1 a 99

In [14]:
fruits = ["Apple", "Orange", "Grape", "Blackberry", "Strawberry"]
dow = ["Mon","Tue","Wed","Thu","Fri"]

In [15]:
pd.Series(data=fruits, index=dow) #Escolhendo o que usar como index de uma Série

Mon         Apple
Tue        Orange
Wed         Grape
Thu    Blackberry
Fri    Strawberry
dtype: object

In [16]:
pd.Series(data=dow, index=fruits) #Escolhendo o que usar como index de uma Série
pd.Series(dow, index=fruits)

Apple         Mon
Orange        Tue
Grape         Wed
Blackberry    Thu
Strawberry    Fri
dtype: object

### Importando  um `pd.Series` a partir de um arquivo CSV

In [17]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', usecols=["Stock Price"], squeeze=True)


# squeeze - quebra o dataframe em pandas Series

### Os métodos `.top()` e `.tail()`

In [18]:
top = pokemon.head(9)
top

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
Name: Pokemon, dtype: object

In [19]:
last = google.tail(3)
last

3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

### Funções `built-in` do Python e um `pd.Series`

In [20]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [21]:
len(pokemon), len(google)

(721, 3012)

In [22]:
type(pokemon)

pandas.core.series.Series

In [23]:
dir(pokemon)

['T',
 '_AXIS_LEN',
 '_AXIS_NAMES',
 '_AXIS_NUMBERS',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__

In [24]:
sorted(google)

[49.95,
 50.07,
 50.12,
 50.7,
 50.74,
 50.95,
 51.1,
 51.1,
 51.13,
 52.38,
 52.61,
 52.95,
 53.02,
 53.7,
 53.9,
 54.1,
 54.65,
 55.69,
 55.94,
 56.93,
 58.69,
 58.86,
 59.07,
 59.13,
 59.62,
 59.86,
 60.35,
 63.37,
 64.74,
 65.47,
 66.22,
 67.46,
 67.56,
 68.47,
 68.63,
 68.8,
 69.12,
 69.36,
 70.17,
 70.38,
 70.93,
 71.98,
 73.9,
 74.51,
 74.62,
 82.47,
 83.68,
 83.69,
 83.85,
 84.27,
 84.59,
 84.62,
 84.91,
 85.14,
 85.63,
 85.74,
 86.13,
 86.16,
 86.19,
 86.19,
 86.63,
 87.29,
 87.41,
 87.71,
 88.06,
 88.15,
 88.47,
 88.81,
 89.21,
 89.22,
 89.26,
 89.4,
 89.54,
 89.56,
 89.61,
 89.61,
 89.7,
 89.8,
 89.89,
 89.9,
 89.93,
 89.93,
 89.95,
 90.11,
 90.13,
 90.16,
 90.27,
 90.35,
 90.43,
 90.58,
 90.62,
 90.81,
 90.9,
 90.91,
 91.42,
 91.78,
 92.26,
 92.34,
 92.41,
 92.42,
 92.5,
 92.51,
 92.55,
 92.84,
 92.86,
 92.89,
 92.94,
 93.06,
 93.39,
 93.41,
 93.61,
 93.61,
 93.86,
 93.9,
 93.9,
 93.95,
 94.05,
 94.18,
 94.19,
 94.31,
 94.35,
 94.52,
 94.53,
 95.07,
 95.22,
 95.59,
 95.6,
 

In [25]:
list(google)

[50.12,
 54.1,
 54.65,
 52.38,
 52.95,
 53.9,
 53.02,
 50.95,
 51.13,
 50.07,
 50.7,
 49.95,
 50.74,
 51.1,
 51.1,
 52.61,
 53.7,
 55.69,
 55.94,
 56.93,
 58.69,
 59.62,
 58.86,
 59.13,
 60.35,
 59.86,
 59.07,
 63.37,
 65.47,
 64.74,
 66.22,
 67.46,
 69.12,
 68.47,
 69.36,
 68.8,
 67.56,
 68.63,
 70.38,
 70.93,
 71.98,
 74.51,
 73.9,
 70.17,
 74.62,
 86.13,
 93.61,
 90.81,
 92.89,
 96.55,
 95.22,
 97.92,
 97.34,
 95.74,
 92.26,
 84.59,
 86.19,
 84.27,
 83.85,
 91.42,
 90.91,
 92.34,
 86.19,
 86.16,
 83.69,
 84.62,
 82.47,
 83.68,
 87.29,
 89.61,
 90.43,
 90.9,
 89.89,
 89.61,
 90.11,
 88.06,
 85.63,
 84.91,
 86.63,
 85.74,
 85.14,
 89.26,
 89.8,
 88.15,
 89.95,
 92.42,
 91.78,
 93.06,
 93.86,
 95.86,
 96.28,
 96.35,
 98.7,
 96.3,
 101.25,
 97.15,
 96.66,
 94.18,
 96.83,
 97.43,
 96.67,
 97.59,
 97.57,
 99.89,
 101.85,
 98.55,
 96.86,
 94.05,
 90.27,
 88.47,
 94.53,
 93.95,
 95.07,
 97.71,
 95.85,
 102.88,
 105.32,
 102.08,
 97.92,
 99.22,
 95.69,
 93.9,
 93.61,
 96.4,
 97.52,
 99.11,
 

In [26]:
dict(pokemon)

{0: 'Bulbasaur',
 1: 'Ivysaur',
 2: 'Venusaur',
 3: 'Charmander',
 4: 'Charmeleon',
 5: 'Charizard',
 6: 'Squirtle',
 7: 'Wartortle',
 8: 'Blastoise',
 9: 'Caterpie',
 10: 'Metapod',
 11: 'Butterfree',
 12: 'Weedle',
 13: 'Kakuna',
 14: 'Beedrill',
 15: 'Pidgey',
 16: 'Pidgeotto',
 17: 'Pidgeot',
 18: 'Rattata',
 19: 'Raticate',
 20: 'Spearow',
 21: 'Fearow',
 22: 'Ekans',
 23: 'Arbok',
 24: 'Pikachu',
 25: 'Raichu',
 26: 'Sandshrew',
 27: 'Sandslash',
 28: 'Nidoran',
 29: 'Nidorina',
 30: 'Nidoqueen',
 31: 'Nidoran♂',
 32: 'Nidorino',
 33: 'Nidoking',
 34: 'Clefairy',
 35: 'Clefable',
 36: 'Vulpix',
 37: 'Ninetales',
 38: 'Jigglypuff',
 39: 'Wigglytuff',
 40: 'Zubat',
 41: 'Golbat',
 42: 'Oddish',
 43: 'Gloom',
 44: 'Vileplume',
 45: 'Paras',
 46: 'Parasect',
 47: 'Venonat',
 48: 'Venomoth',
 49: 'Diglett',
 50: 'Dugtrio',
 51: 'Meowth',
 52: 'Persian',
 53: 'Psyduck',
 54: 'Golduck',
 55: 'Mankey',
 56: 'Primeape',
 57: 'Growlithe',
 58: 'Arcanine',
 59: 'Poliwag',
 60: 'Poliwhirl',


In [27]:
max(pokemon), min(pokemon), max(google), min(google)

('Zygarde', 'Abomasnow', 782.22, 49.95)

### Os atributos de um `pd.Series`

In [28]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [29]:
# verificando se os elementos da série são unicos
google.is_unique, pokemon.is_unique

(False, True)

In [30]:
# atributos relacionados a formato e tamanho, numero de dimensões
google.ndim, google.shape, pokemon.shape

(1, (3012,), (721,))

In [31]:
pokemon.size, google.size

(721, 3012)

### O método `.sort_values()` de um `pd.Series`

In [32]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [33]:
# ordedando de forma crescente
pokemon.sort_values().head(6)

459     Abomasnow
62           Abra
358         Absol
616      Accelgor
680     Aegislash
141    Aerodactyl
Name: Pokemon, dtype: object

In [34]:
# ordenando de forma decrescente
pokemon.sort_values(ascending=False).head(4)

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
Name: Pokemon, dtype: object

### O parâmetro `Inplace` e o `Sort_index`

In [35]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [36]:
pokemon.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [37]:
# substituindo o objeto com o parâmetro inplace
pokemon.sort_values(ascending=False, inplace=True)
pokemon.head(3)

717     Zygarde
633    Zweilous
40        Zubat
Name: Pokemon, dtype: object

In [38]:
google.head(3)

0    50.12
1    54.10
2    54.65
Name: Stock Price, dtype: float64

In [39]:
google.sort_values(ascending=False, inplace=True)
google.head(3)

3011    782.22
2859    776.60
3009    773.18
Name: Stock Price, dtype: float64

In [40]:
google.sort_index(inplace=True)
google.head(3)

0    50.12
1    54.10
2    54.65
Name: Stock Price, dtype: float64

### A keyword `in` do Python

In [41]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [42]:
pokemon.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [43]:
# como (não) usar o método in do Python em Pandas Series
"Pikachu" in pokemon, 100 in pokemon.index, "Pikachu" in pokemon.values

(False, True, True)

In [44]:
"Ivy" in pokemon.values

False

### Extraindo valore pelo índice

In [45]:
pokemon = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [46]:
pokemon[200]

'Unown'

In [47]:
google[100]

96.67

In [48]:
pokemon[[100,500,300]]

100    Electrode
500     Oshawott
300     Delcatty
Name: Pokemon, dtype: object

In [49]:
pokemon[1:20]

1        Ivysaur
2       Venusaur
3     Charmander
4     Charmeleon
5      Charizard
6       Squirtle
7      Wartortle
8      Blastoise
9       Caterpie
10       Metapod
11    Butterfree
12        Weedle
13        Kakuna
14      Beedrill
15        Pidgey
16     Pidgeotto
17       Pidgeot
18       Rattata
19      Raticate
Name: Pokemon, dtype: object

### Extraindo valores pelo rótulo do índex

In [50]:
pokemon = pd.read_csv('../Dados/pokemon.csv', index_col=["Pokemon"], squeeze=True)
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [51]:
pokemon[0] == pokemon['Bulbasaur']

True

In [52]:
pokemon["Pikachu"], pokemon["Ditto"]

('Electric', 'Normal')

In [53]:
pokemon[["Charizard", "Jolteon"]]

Pokemon
Charizard        Fire
Jolteon      Electric
Name: Type, dtype: object

In [54]:
pokemon["Charizard":"Slowbro"]

Pokemon
Charizard     Fire
Squirtle     Water
Wartortle    Water
Blastoise    Water
Caterpie       Bug
             ...  
Golem         Rock
Ponyta        Fire
Rapidash      Fire
Slowpoke     Water
Slowbro      Water
Name: Type, Length: 75, dtype: object

### O Método `.get()` de um `pd.Series`

In [55]:
pokemon = pd.read_csv('../Dados/pokemon.csv', index_col=["Pokemon"], squeeze=True)


In [56]:
pokemon.sort_index(inplace=True)
pokemon.head()

Pokemon
Abomasnow      Grass
Abra         Psychic
Absol           Dark
Accelgor         Bug
Aegislash      Steel
Name: Type, dtype: object

In [57]:
pokemon.get('Absol')

'Dark'

In [58]:
pokemon.get([0,5,7])

Pokemon
Abomasnow      Grass
Aerodactyl      Rock
Aipom         Normal
Name: Type, dtype: object

In [59]:
pokemon.get(key='Pikachu', default='Index not on Series')

'Electric'

In [60]:
pokemon.get(key='Pikachuu', default='Index not on Series')

'Index not on Series'

### Métodos matemáticos e estatísticos em um `pd.Series`

In [61]:
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)
google.head(4)

0    50.12
1    54.10
2    54.65
3    52.38
Name: Stock Price, dtype: float64

In [62]:
# MÉTODOS MATEMÁTICOS
google.count(), google.sum(), google.mean(), google.sum()/google.count()

(3012, 1006942.0, 334.3100929614874, 334.3100929614874)

In [63]:
# MÉTODOS ESTATÍSTICOS
google.max(), google.min(), google.median(), google.mode()

(782.22,
 49.95,
 283.315,
 0    291.21
 dtype: float64)

In [64]:
# .mode() retorna varios valores, se houver
pd.Series([1,2,3,1,3,4,1,3,5]).mode()

0    1
1    3
dtype: int64

In [65]:
#todos os métodos matemáticos de uma vez, pode usar o .get('key') para pegar a medida requerida
google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

### Os métodos `.idxmax()` e `.idxmin()`

In [66]:
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)

In [67]:
google.idxmax(), google[google.idxmax()], google.max()

(3011, 782.22, 782.22)

In [68]:
google.idxmin(), google[google.idxmin()], google.min()

(11, 49.95, 49.95)

### O método `.value_counts()`

In [69]:
pokemon = pd.read_csv('../Dados/pokemon.csv', index_col=["Pokemon"], squeeze=True)


In [70]:
# retorna os valores, agregando e somando a quatidade
pokemon.value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Psychic      47
Fire         47
Rock         41
Electric     36
Ground       30
Poison       28
Dark         28
Fighting     25
Dragon       24
Ghost        23
Ice          23
Steel        22
Fairy        17
Flying        3
Name: Type, dtype: int64

In [71]:
# podemos aninhar métodos a um .value_counts()
pokemon.value_counts().sum() == pokemon.count()

True

### O método `.apply()`

In [72]:
google = pd.read_csv('../Dados/google_stock_price.csv', squeeze=True)
google.head(5)

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [73]:
# funcao customizada para aplicar a serie
def classify(number):
    if number < 300: return "OK"
    elif number>=300 and number < 600: return "Satisfactory"
    else: return "Fantastic"

In [74]:
# aplica uma função a todos os dados da serie
google.apply(classify)

0              OK
1              OK
2              OK
3              OK
4              OK
          ...    
3007    Fantastic
3008    Fantastic
3009    Fantastic
3010    Fantastic
3011    Fantastic
Name: Stock Price, Length: 3012, dtype: object

In [75]:
google.apply(classify).value_counts()

OK              1721
Satisfactory    1011
Fantastic        280
Name: Stock Price, dtype: int64

In [76]:
# utilizando funções anonimas com lambda
google.apply(lambda number: number+1).head()

0    51.12
1    55.10
2    55.65
3    53.38
4    53.95
Name: Stock Price, dtype: float64

### O método `.map()`

In [77]:
pokemon_names = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
pokemon_types = pd.read_csv('../Dados/pokemon.csv', index_col=["Pokemon"], squeeze=True)

In [78]:
# restaurando o index original
pokemon_names.map(pokemon_types).head()

0    Grass
1    Grass
2    Grass
3     Fire
4     Fire
Name: Pokemon, dtype: object

In [79]:
pokemon_names = pd.read_csv('../Dados/pokemon.csv', usecols=["Pokemon"], squeeze=True)
pokemon_types = pd.read_csv('../Dados/pokemon.csv', index_col=["Pokemon"], squeeze=True).to_dict()

In [80]:
pokemon_types

{'Bulbasaur': 'Grass',
 'Ivysaur': 'Grass',
 'Venusaur': 'Grass',
 'Charmander': 'Fire',
 'Charmeleon': 'Fire',
 'Charizard': 'Fire',
 'Squirtle': 'Water',
 'Wartortle': 'Water',
 'Blastoise': 'Water',
 'Caterpie': 'Bug',
 'Metapod': 'Bug',
 'Butterfree': 'Bug',
 'Weedle': 'Bug',
 'Kakuna': 'Bug',
 'Beedrill': 'Bug',
 'Pidgey': 'Normal',
 'Pidgeotto': 'Normal',
 'Pidgeot': 'Normal',
 'Rattata': 'Normal',
 'Raticate': 'Normal',
 'Spearow': 'Normal',
 'Fearow': 'Normal',
 'Ekans': 'Poison',
 'Arbok': 'Poison',
 'Pikachu': 'Electric',
 'Raichu': 'Electric',
 'Sandshrew': 'Ground',
 'Sandslash': 'Ground',
 'Nidoran': 'Poison',
 'Nidorina': 'Poison',
 'Nidoqueen': 'Poison',
 'Nidoran♂': 'Poison',
 'Nidorino': 'Poison',
 'Nidoking': 'Poison',
 'Clefairy': 'Fairy',
 'Clefable': 'Fairy',
 'Vulpix': 'Fire',
 'Ninetales': 'Fire',
 'Jigglypuff': 'Normal',
 'Wigglytuff': 'Normal',
 'Zubat': 'Poison',
 'Golbat': 'Poison',
 'Oddish': 'Grass',
 'Gloom': 'Grass',
 'Vileplume': 'Grass',
 'Paras': 'Bug'

In [81]:
pokemon_names.map(pokemon_types).head(10)

0    Grass
1    Grass
2    Grass
3     Fire
4     Fire
5     Fire
6    Water
7    Water
8    Water
9      Bug
Name: Pokemon, dtype: object