Pandas trabalha com dataframes, pode-se dizer que é uma tabela.
Possui duas classes principais:
- Series
    - Unidimensional
- Dataframe
    - Bidimensional

In [1]:
import pandas as pd

In [2]:
data = pd.Series([0.25,0.5,0.75,1.0])

In [5]:
data #Lembra muito o numpy, pois pandas utiliza o numpy

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [6]:
type(data)

pandas.core.series.Series

In [11]:
data.values #Array do numpy

array([0.25, 0.5 , 0.75, 1.  ])

In [8]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [9]:
data[0]

0.25

In [10]:
data[1]

0.5

In [12]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [14]:
data = pd.Series([0.25,0.5,0.75,1.0], index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [15]:
data['a']

0.25

In [16]:
notas = pd.Series([10,9,8,7], index=['pedro','bia','charles','davi'])
notas

pedro      10
bia         9
charles     8
davi        7
dtype: int64

In [19]:
population_dict = {'California': 38_332_521,
                   'Texas': 26_448_193,
                   'New York': 19_651_127,
                   'Florida': 19_552_860,
                   'Illinois': 12_882_135}

In [20]:
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

## DataFrame
Pode ser descrito como uma sequência de Series

In [31]:
area_dict = {'California': 423967, 'Texas': 695662,
             'New York': 141297, 'Florida': 170312,
             'Illinois': 149995, 'Paraíba':56585}

In [32]:
area = pd.Series(area_dict)

In [33]:
states = pd.DataFrame({'population': population,
                      'area': area})

In [34]:
states

Unnamed: 0,population,area
California,38332521.0,423967
Florida,19552860.0,170312
Illinois,12882135.0,149995
New York,19651127.0,141297
Paraíba,,56585
Texas,26448193.0,695662


Os dataframes possuem valores e indices também

In [35]:
states.index

Index(['California', 'Florida', 'Illinois', 'New York', 'Paraíba', 'Texas'], dtype='object')

In [36]:
states.columns

Index(['population', 'area'], dtype='object')

In [39]:
states['area'] #Retorna coluna

California    423967
Florida       170312
Illinois      149995
New York      141297
Paraíba        56585
Texas         695662
Name: area, dtype: int64

In [40]:
states['California'] #Não retorna linha

KeyError: 'California'

In [41]:
states['area']['California']

423967

In [42]:
import numpy as np

In [47]:
data = np.random.random((2,2))
data

array([[0.4116155 , 0.60857705],
       [0.11725528, 0.1453827 ]])

In [48]:
pd.DataFrame(data)

Unnamed: 0,0,1
0,0.411615,0.608577
1,0.117255,0.145383


In [50]:
df = pd.DataFrame(data, columns=["a", "b"], index=["x", "y"])
df

Unnamed: 0,a,b
x,0.411615,0.608577
y,0.117255,0.145383


## Indexação

In [51]:
data = pd.Series([0.25,0.5,0.75,1.0], index=['a','b','c','d'])

In [52]:
data['a']

0.25

In [58]:
data > 0.5 # O pandas também utiliza as mascaras de formatação como o numpy

a    False
b    False
c     True
d     True
dtype: bool

In [54]:
data[data > 0.5]

c    0.75
d    1.00
dtype: float64

In [57]:
data[['a', 'b']]

a    0.25
b    0.50
dtype: float64

In [59]:
data[['b', 'a']]

b    0.50
a    0.25
dtype: float64

In [60]:
data['x'] = 99
data

a     0.25
b     0.50
c     0.75
d     1.00
x    99.00
dtype: float64

In [61]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [65]:
data = pd.Series([0.25,0.5,0.75,1.0], index=['d','c','b','a'])

In [66]:
data

d    0.25
c    0.50
b    0.75
a    1.00
dtype: float64

In [67]:
data['a':'c']

Series([], dtype: float64)

In [68]:
data['d':'b']

d    0.25
c    0.50
b    0.75
dtype: float64

In [69]:
data

d    0.25
c    0.50
b    0.75
a    1.00
dtype: float64

In [70]:
data[1:3]

c    0.50
b    0.75
dtype: float64

Quando se trabalha com o índice, ele inclui o último valor, quando se trabalha com a posição dos índices, ele não inclui.

In [71]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=[0,1,2,3])

In [72]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [73]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [74]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=[2, 3, 4, 5])

In [75]:
data

2    0.25
3    0.50
4    0.75
5    1.00
dtype: float64

In [76]:
data[1:3]

3    0.50
4    0.75
dtype: float64

In [77]:
data[4:5]

Series([], dtype: float64)

In [79]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=[42, 3, 15, 1])
data

42    0.25
3     0.50
15    0.75
1     1.00
dtype: float64

In [80]:
data[1:3]

3     0.50
15    0.75
dtype: float64

In [81]:
data[42:3]

Series([], dtype: float64)

In [83]:
data[1]

1.0

In [84]:
data.loc[1] #índice de 'nome' 1

1.0

In [85]:
data.iloc[1] #indice real de posição

0.5

In [86]:
data

42    0.25
3     0.50
15    0.75
1     1.00
dtype: float64

In [87]:
data.loc[42:3]

42    0.25
3     0.50
dtype: float64

In [88]:
data.iloc[1:3]

3     0.50
15    0.75
dtype: float64

In [89]:
data.iloc[0:4:2]

42    0.25
15    0.75
dtype: float64

In [90]:
states['population']

California    38332521.0
Florida       19552860.0
Illinois      12882135.0
New York      19651127.0
Paraíba              NaN
Texas         26448193.0
Name: population, dtype: float64

In [91]:
states.area

California    423967
Florida       170312
Illinois      149995
New York      141297
Paraíba        56585
Texas         695662
Name: area, dtype: int64

In [92]:
states['population'] / states['area']

California     90.413926
Florida       114.806121
Illinois       85.883763
New York      139.076746
Paraíba              NaN
Texas          38.018740
dtype: float64

In [93]:
states['density'] = states['population'] / states['area']

In [94]:
states

Unnamed: 0,population,area,density
California,38332521.0,423967,90.413926
Florida,19552860.0,170312,114.806121
Illinois,12882135.0,149995,85.883763
New York,19651127.0,141297,139.076746
Paraíba,,56585,
Texas,26448193.0,695662,38.01874


In [95]:
states['population'] + 100

California    38332621.0
Florida       19552960.0
Illinois      12882235.0
New York      19651227.0
Paraíba              NaN
Texas         26448293.0
Name: population, dtype: float64

In [98]:
states = states.T
states

Unnamed: 0,population,area,density
California,38332521.0,423967.0,90.413926
Florida,19552860.0,170312.0,114.806121
Illinois,12882135.0,149995.0,85.883763
New York,19651127.0,141297.0,139.076746
Paraíba,,56585.0,
Texas,26448193.0,695662.0,38.01874


In [99]:
states = states.T
states

Unnamed: 0,California,Florida,Illinois,New York,Paraíba,Texas
population,38332520.0,19552860.0,12882140.0,19651130.0,,26448190.0
area,423967.0,170312.0,149995.0,141297.0,56585.0,695662.0
density,90.41393,114.8061,85.88376,139.0767,,38.01874


In [100]:
states = states.T
states

Unnamed: 0,population,area,density
California,38332521.0,423967.0,90.413926
Florida,19552860.0,170312.0,114.806121
Illinois,12882135.0,149995.0,85.883763
New York,19651127.0,141297.0,139.076746
Paraíba,,56585.0,
Texas,26448193.0,695662.0,38.01874


In [101]:
states.loc['California']

population    3.833252e+07
area          4.239670e+05
density       9.041393e+01
Name: California, dtype: float64

In [102]:
California = states.loc['California']

In [103]:
California

population    3.833252e+07
area          4.239670e+05
density       9.041393e+01
Name: California, dtype: float64

In [104]:
states.loc['California', 'area']

423967.0

In [106]:
states.loc['Florida':'New York', 'population':'area']

Unnamed: 0,population,area
Florida,19552860.0,170312.0
Illinois,12882135.0,149995.0
New York,19651127.0,141297.0


In [107]:
states.iloc[0,1]

423967.0

In [108]:
states.iloc[1:4, 1:3]

Unnamed: 0,area,density
Florida,170312.0,114.806121
Illinois,149995.0,85.883763
New York,141297.0,139.076746


In [109]:
states.loc[['New York', 'California']]

Unnamed: 0,population,area,density
New York,19651127.0,141297.0,139.076746
California,38332521.0,423967.0,90.413926


In [110]:
states.loc[['New York', 'California'], ['population', 'density']]

Unnamed: 0,population,density
New York,19651127.0,139.076746
California,38332521.0,90.413926


In [111]:
states.loc[states['density'] > 100]

Unnamed: 0,population,area,density
Florida,19552860.0,170312.0,114.806121
New York,19651127.0,141297.0,139.076746


In [113]:
states[states['density'] > states.loc['Florida', 'density']]

Unnamed: 0,population,area,density
New York,19651127.0,141297.0,139.076746
