## Data frames

In [66]:
import pandas as pd

In [67]:
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

print(type(items))

<class 'dict'>


In [68]:
# Os labels são formados pela união dos índices
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [69]:
# Dessa vez sem passar os índices
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}

df = pd.DataFrame(data)
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [70]:
shopping_carts.index

Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

In [71]:
shopping_carts.columns

Index(['Bob', 'Alice'], dtype='object')

In [26]:
len(shopping_carts.columns)

2

In [17]:
shopping_carts.values

array([[245., 500.],
       [ nan,  40.],
       [ nan, 110.],
       [ 25.,  45.],
       [ 55.,  nan]])

In [18]:
shopping_carts.shape

(5, 2)

In [22]:
shopping_carts.ndim

2

In [27]:
shopping_carts.size

10

## Subsetting

In [28]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [31]:
alice_shopping_cart = pd.DataFrame(items, columns = ['Bob'])
alice_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [32]:
sel_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'])
sel_shopping_cart

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


In [33]:
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


### Adicionando índices

In [36]:
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]}
df = pd.DataFrame(data)
df

Unnamed: 0,Integers,Floats
0,1,4.5
1,2,8.2
2,3,9.6


In [37]:
# indíces novos
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]}
df = pd.DataFrame(data, index = ['label 1', 'label 2', 'label 3'])
df

Unnamed: 0,Integers,Floats
label 1,1,4.5
label 2,2,8.2
label 3,3,9.6


In [108]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

store_items = pd.DataFrame(items2)
store_items

Unnamed: 0,bikes,glasses,pants,watches
0,20,,30,35
1,15,50.0,5,10


In [109]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])
store_items

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35
store 2,15,50.0,5,10


In [110]:
store_items[['bikes']]

Unnamed: 0,bikes
store 1,20
store 2,15


In [111]:
store_items[['bikes', 'pants']]

Unnamed: 0,bikes,pants
store 1,20,30
store 2,15,5


###  Como acessar e alterar linhas e células diretamente

Mais uma vez escolhas de design bem estranhas.

In [112]:
store_items.loc[['store 1']]

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35


In [113]:
# a coluna sempre vem antes da linha ao acessar uma célula desse jeito(WTF????)
store_items['bikes']['store 2']

15

In [114]:
# nova coluna
store_items['shirts'] = [15,2]
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts
store 1,20,,30,35,15
store 2,15,50.0,5,10,2


In [115]:
# nova coluna com operação aritmética
store_items['suits'] = store_items['shirts'] + store_items['pants']
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts,suits
store 1,20,,30,35,15,45
store 2,15,50.0,5,10,2,7


# IMPORTANTE

### Inserindo novos valores no data frame

In [116]:
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]
new_store = pd.DataFrame(new_items, index = ['store 3'])
new_store

Unnamed: 0,bikes,glasses,pants,watches
store 3,20,4,30,35


In [117]:
store_items = store_items.append(new_store)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,watches
store 1,20,,30,15.0,45.0,35
store 2,15,50.0,5,2.0,7.0,10
store 3,20,4.0,30,,,35


#### Usando valores que já existem no data frame para criar uma nova coluna

In [118]:
store_items['new watches'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits,watches,new watches
store 1,20,,30,15.0,45.0,35,
store 2,15,50.0,5,2.0,7.0,10,10.0
store 3,20,4.0,30,,,35,35.0


In [119]:
# escolhe a posição da nova coluna, o label do índice e os valores
# não permite duplicatas

try:
    store_items.insert(4, 'shoes', [8,5,0])
except ValueError:
    print("Não é possível inserir duplicatas")
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches,new watches
store 1,20,,30,15.0,8,45.0,35,
store 2,15,50.0,5,2.0,5,7.0,10,10.0
store 3,20,4.0,30,,0,,35,35.0


In [120]:
# remove uma coluna inteira

try:
    store_items.pop('new watches')
except KeyError:
    print("A coluna não existe na tabela")
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,0,,35


In [121]:
# axis = 1 remove colunas, axis = 0 remove linhas (meu deus do céu hahahaha)
try:
    store_items = store_items.drop(['watches', 'shoes'], axis = 1)
except KeyError:
    print("Elementos não encontrados no eixo")
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
store 2,15,50.0,5,2.0,7.0
store 3,20,4.0,30,,


In [122]:
try:
    store_items = store_items.drop(['store 1', 'store 2'], axis = 0)
except KeyError:
    print("Elementos não encontrados no eixo")
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 3,20,4.0,30,,


### Renomeando colunas e linhas

O rename() aceita como argumento índices ou colunas na forma de um dicionário. A **chave** é o nome antigo e o **valor** é o novo nome.

In [123]:
store_items = store_items.rename(columns = {'bikes': 'hats'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store 3,20,4.0,30,,


In [124]:
store_items = store_items.rename(index = {'store 3': 'last store'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
last store,20,4.0,30,,


### Mudando o indice para uma das colunas existente

Deve existir um bom motivo para fazer isso.

In [125]:
store_items = store_items.set_index('pants')
store_items

Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,
