In [69]:
import pandas as pd
import numpy as np

#Series

In [2]:
a = pd.Series([1,2,3,4])

In [3]:
a.values

array([1, 2, 3, 4], dtype=int64)

In [4]:
a.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [5]:
a = pd.Series([1,2,3,4], index=['a','b','c','d'])

In [6]:
a

a    1
b    2
c    3
d    4
dtype: int64

In [7]:
a['a'] = 10 # index
a

a    10
b     2
c     3
d     4
dtype: int64

In [9]:
a[a > 3] # filtering with boolean array

a    10
d     4
dtype: int64

In [10]:
a * 2 # scalar multiplication

a    20
b     4
c     6
d     8
dtype: int64

In [12]:
'b' in a # verifica index -> equivalente a 'b' in a.index

True

In [13]:
a = pd.Series({'oi':1, 'tim':2, 'vivo':3}) # dict

In [14]:
a

oi      1
tim     2
vivo    3
dtype: int64

In [18]:
a.name = 'operadoras totais'
a.index.name = 'operadoras'
a

operadoras
oi            1
tim           2
vivo          3
Name: operadoras totais, dtype: int64

## automatically alignment

In [15]:
b = pd.Series({'oi':2, 'tim':3})

In [16]:
a + b

oi       3
tim      5
vivo   NaN
dtype: float64

# DataFrame

In [8]:
a = pd.DataFrame({'nome':['ab', 'ju', 'ele'], 'idade':[25, 27, 22]})

In [9]:
a['idade'] # column

0    25
1    27
2    22
Name: idade, dtype: int64

In [10]:
a.idade

0    25
1    27
2    22
Name: idade, dtype: int64

In [12]:
a.ix[0] # row

idade    25
nome     ab
Name: 0, dtype: object

In [13]:
a['idade'] = range(3) # elementwise

In [14]:
a

Unnamed: 0,idade,nome
0,0,ab
1,1,ju
2,2,ele


### nova coluna

In [15]:
a['altura'] = [1.6, 1.5, 1.8]

In [16]:
a

Unnamed: 0,idade,nome,altura
0,0,ab,1.6
1,1,ju,1.5
2,2,ele,1.8


### removendo coluna

In [18]:
del a['altura']

## DataFrame from dict of dicts

In [19]:
a = pd.DataFrame({'idade':{'ab':25, 'ju':27, 'ele':28}, 'altura':{'ab':1.7, 'ju': 1.5, 'ele': 1.8}})

In [20]:
a

Unnamed: 0,altura,idade
ab,1.7,25
ele,1.8,28
ju,1.5,27


### transpose

In [21]:
a.T

Unnamed: 0,ab,ele,ju
altura,1.7,1.8,1.5
idade,25.0,28.0,27.0


### renomeando colunas e index

In [22]:
a.columns.name = "atributos"
a.index.name = "nome"
a

atributos,altura,idade
nome,Unnamed: 1_level_1,Unnamed: 2_level_1
ab,1.7,25
ele,1.8,28
ju,1.5,27


### extraindo valores

In [23]:
a.values

array([[  1.7,  25. ],
       [  1.8,  28. ],
       [  1.5,  27. ]])

## Index

In [24]:
a.index[0] = 'oi' # index é imutável -> para ser, de forma segura, compartilhado entre outras estruturas

TypeError: Indexes does not support mutable operations

* Index
* Int64Index
* MultiIndex
* DatetimeIndex
* PeriodIndex

\# pesquisar!

In [27]:
'ab' in a.index

True

In [28]:
b = pd.DataFrame({'idade':{'ab':25, 'jão':27}, 'altura':{'ab':1.7, 'jão': 1.5}})

In [29]:
a,b

(atributos  altura  idade
 nome                    
 ab            1.7     25
 ele           1.8     28
 ju            1.5     27,      altura  idade
 ab      1.7     25
 jão     1.5     27)

In [31]:
a.index.difference(b.index)

Index([u'ele', u'ju'], dtype='object')

In [32]:
a.index.append(b.index)

Index([u'ab', u'ele', u'ju', u'ab', u'jão'], dtype='object')

In [34]:
a.index.intersection(b.index)

Index([u'ab'], dtype='object')

In [35]:
a.index.union(b.index)

Index([u'ab', u'ele', u'ju', u'jão'], dtype='object')

In [36]:
a.index.isin(b.index)

array([ True, False, False], dtype=bool)

In [37]:
a.index.delete(0)

Index([u'ele', u'ju'], dtype='object')

In [39]:
a.index.drop('ab')

Index([u'ele', u'ju'], dtype='object')

In [42]:
a.index.insert(0, 'eh') # posição e new index element

Index([u'eh', u'ab', u'ele', u'ju'], dtype='object')

In [44]:
a.index.is_unique

True

In [45]:
a.index.unique()

array(['ab', 'ele', 'ju'], dtype=object)

In [48]:
a.index = [0, 2, 1]

In [49]:
a

atributos,altura,idade
0,1.7,25
2,1.8,28
1,1.5,27


In [50]:
a.index.is_monotonic

False

## Reindexing

In [51]:
a

atributos,altura,idade
0,1.7,25
2,1.8,28
1,1.5,27


In [52]:
a.reindex([0,1,2])

atributos,altura,idade
0,1.7,25
1,1.5,27
2,1.8,28


In [53]:
a.reindex([0, 1, 2, 3], fill_value=0)

atributos,altura,idade
0,1.7,25
1,1.5,27
2,1.8,28
3,0.0,0


In [60]:
a = pd.Series(['blue', 'purple', 'yellow'], index=[0,2,4])

In [61]:
a.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [62]:
a.reindex(range(6), method='bfill')

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

In [63]:
a = pd.Series(['blue', 'purple', 'yellow'], index=[0,4,6])

In [64]:
a.reindex(range(7), method='ffill', limit=1)

0      blue
1      blue
2       NaN
3       NaN
4    purple
5    purple
6    yellow
dtype: object

\# testar argumentos level, copy

## Droping entries

In [65]:
a

0      blue
4    purple
6    yellow
dtype: object

In [66]:
a.drop(0)

4    purple
6    yellow
dtype: object

In [68]:
a.drop([0, 4])

6    yellow
dtype: object

In [82]:
a = pd.DataFrame(np.arange(16).reshape((4,4)))

In [71]:
a

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [72]:
a.drop(0)

Unnamed: 0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [73]:
a.drop(0, axis=1)

Unnamed: 0,1,2,3
0,1,2,3
1,5,6,7
2,9,10,11
3,13,14,15


In [74]:
a.drop([0, 1], axis=1)

Unnamed: 0,2,3
0,2,3
1,6,7
2,10,11
3,14,15


## Indexing, selection and filtering

In [75]:
a[0:2] # 2 exclusivo

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7


In [83]:
a.index = [str(x) for x in a.index]

In [84]:
a['0':'2'] # '2' inclusivo

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [85]:
a.ix[['0', '2'], [1,0,3]]

Unnamed: 0,1,0,3
0,1,0,3
2,9,8,11


## Arithmetic and data alignment

In [86]:
a = pd.Series([1,2], index=['a','b'])
b = pd.Series([2,3], index=['b','c'])
a+b

a   NaN
b     4
c   NaN
dtype: float64

In [87]:
a = pd.DataFrame(np.arange(4).reshape((2,2)), index=['a', 'b'])
b = pd.DataFrame(np.arange(4).reshape((2,2)), index=['b', 'c'])
a+b

Unnamed: 0,0,1
a,,
b,2.0,4.0
c,,


In [88]:
a.add(b, fill_value=0)

Unnamed: 0,0,1
a,0,1
b,2,4
c,2,3


* add
* sub
* div
* mul