## Operações no Pandas

In [2]:
import pandas as pd

df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   col1    4 non-null      int64 
 1   col2    4 non-null      int64 
 2   col3    4 non-null      object
dtypes: int64(2), object(1)
memory usage: 228.0+ bytes


In [4]:
df.memory_usage()

Index    132
col1      32
col2      32
col3      32
dtype: int64

## Informações sobre valores excluivos

In [6]:
df["col2"].unique()

array([444, 555, 666])

In [7]:
df["col2"].nunique()

3

In [9]:
df["col2"].value_counts()

col2
444    2
555    1
666    1
Name: count, dtype: int64

In [11]:
df["col2"].value_counts().index

Index([444, 555, 666], dtype='int64', name='col2')

## Aplicando funções

In [13]:
def comp(x):
    return x ** 2 + 3

In [16]:
df["col1"].apply(comp)

0     4
1     7
2    12
3    19
Name: col1, dtype: int64

In [17]:
df["col1_calc"] = df["col1"].apply(comp)

In [18]:
df

Unnamed: 0,col1,col2,col3,col1_calc
0,1,444,abc,4
1,2,555,def,7
2,3,666,ghi,12
3,4,444,xyz,19


In [19]:
df["col1"].apply(lambda x: x **2 + 3)

0     4
1     7
2    12
3    19
Name: col1, dtype: int64

In [20]:
df["col1"].sum()

10

In [21]:
df["col1"].mean()

2.5

In [22]:
df["col1"].product()

24

In [23]:
df["col1"].std()

1.2909944487358056

In [24]:
df["col1"].max()

4

In [25]:
df["col1"].min()

1

In [28]:
df["col1"].idxmax()

3

In [29]:
df["col1"].sum()

10

In [30]:
df[df["col2"] == 444]

Unnamed: 0,col1,col2,col3,col1_calc
0,1,444,abc,4
3,4,444,xyz,19


In [33]:
df[df["col2"] == 444]["col1"].sum()

5

In [35]:
df.sort_values(by="col2")

Unnamed: 0,col1,col2,col3,col1_calc
0,1,444,abc,4
3,4,444,xyz,19
1,2,555,def,7
2,3,666,ghi,12


In [36]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
     'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}

df = pd.DataFrame(data)

In [37]:
df

Unnamed: 0,A,B,C,D
0,foo,one,x,1
1,foo,one,y,3
2,foo,two,x,2
3,bar,two,y,5
4,bar,one,x,4
5,bar,one,y,1


In [39]:
dict_map = {"one": "1", "two": "2"}

In [43]:
df["B"].map(dict_map)

0    1
1    1
2    2
3    2
4    1
5    1
Name: B, dtype: object

In [44]:
df["E"] = df["B"].map(dict_map)

In [45]:
df

Unnamed: 0,A,B,C,D,E
0,foo,one,x,1,1
1,foo,one,y,3,1
2,foo,two,x,2,2
3,bar,two,y,5,2
4,bar,one,x,4,1
5,bar,one,y,1,1


In [48]:
df.pivot_table(index="A", columns="B", values="D")

B,one,two
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,2.5,5.0
foo,2.0,2.0
