In [3]:
import pandas as pd

In [8]:
# Syntax

df = pd.DataFrame(
    {
        'a': [4, 5, 6], 
        'b': [7, 8, 9],
        'c': [10, 12, 48]
    },
    index=[1,2,3],
    columns=['a', 'b', 'c']
)
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,12
3,6,9,48


In [11]:
df = pd.DataFrame(
    [
        [1, 2, 3], 
        [4, 5, 6], 
        [7, 8, 9]
    ],
    index=[1, 2, 3], 
    columns=['A', 'B', 'C']
)
df

Unnamed: 0,A,B,C
1,1,2,3
2,4,5,6
3,7,8,9


In [134]:
df = pd.DataFrame(
    {
        'a': [4, 5, 6], 
        'b': [7, 8, 9],
        'c': [10, 12, 48]
    },
    index=pd.MultiIndex.from_tuples(
        [
            ('d', 1), ('d', 2), ('e', 2)
        ], 
        names=['n', 'v']
    )
)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,12
e,2,6,9,48


In [61]:
# Method Chaining
df = (pd.melt(df).rename(columns={'variable':'var', 'value': 'val'}).query('val >= 6'))
df

Unnamed: 0,var,val
2,a,6
3,b,7
4,b,8
5,b,9
6,c,10
7,c,12
8,c,48


In [71]:
# Reshaping Data 

df1 = pd.DataFrame({
        'a': [4, 5, 6], 
        'b': [7, 8, 9],
        'c': [10, 12, 48]
    },
    index=[1,2,3]
)
df1

df2 = pd.DataFrame({
        'a': [1, 2, 3], 
        'b': [11, 13, 14],
        'c': [20, 22, 44]
    },
    index=[4,5,6]
)

In [72]:
# reunir colunas em linhas
pd.melt(df1)

Unnamed: 0,variable,value
0,a,4
1,a,5
2,a,6
3,b,7
4,b,8
5,b,9
6,c,10
7,c,12
8,c,48


In [74]:
# concatena dfs
pd.concat([df1, df2])

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,12
3,6,9,48
4,1,11,20
5,2,13,22
6,3,14,44


In [78]:
# espalhar linhas em colunas
df.pivot(columns='a', values='c')

a,4,5,6
1,10.0,,
2,,12.0,
3,,,48.0


In [85]:
# anexar colunas de dataframes
pd.concat([df1, df2], axis=1)

Unnamed: 0,a,b,c,a.1,b.1,c.1
1,4.0,7.0,10.0,,,
2,5.0,8.0,12.0,,,
3,6.0,9.0,48.0,,,
4,,,,1.0,11.0,20.0
5,,,,2.0,13.0,22.0
6,,,,3.0,14.0,44.0


In [87]:
# Ordenando
df1.sort_values('a')

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,12
3,6,9,48


In [92]:
# ordenando
df1.sort_values('a', ascending=False)

Unnamed: 0,a,b,c
3,6,9,48
2,5,8,12
1,4,7,10


In [96]:
# renomeando coluna
df1 = df1.rename(columns={'a':'valores'})
df1

Unnamed: 0,valores,b,c
1,4,7,10
2,5,8,12
3,6,9,48


In [115]:
# Organiza os index
df1.sort_index()

Unnamed: 0,valores,b,c
1,4,7,10
2,5,8,12
3,6,9,48


In [118]:
# reseta o index
df1.reset_index()

Unnamed: 0,index,valores,b,c
0,1,4,7,10
1,2,5,8,12
2,3,6,9,48


In [120]:
# Remover coluna
df1.drop(columns='b')

Unnamed: 0,valores,c
1,4,10
2,5,12
3,6,48


In [123]:
# subset observations(ROWS)
df[df.a > 5]

Unnamed: 0,a,b,c
3,6,9,48


In [136]:
df = df.append({'a' : 4 , 'b' : 7, 'c' : 10} , ignore_index=True)
df

Unnamed: 0,a,b,c
0,4,7,10
1,5,8,12
2,6,9,48
3,4,7,10
4,4,7,10


In [138]:
# removendo linhas duplicadas
df.drop_duplicates()

Unnamed: 0,a,b,c
0,4,7,10
1,5,8,12
2,6,9,48


In [141]:
df.head(2)

Unnamed: 0,a,b,c
0,4,7,10
1,5,8,12


In [143]:
df.tail(2)

Unnamed: 0,a,b,c
3,4,7,10
4,4,7,10


In [177]:
# selecionar aleatoriamente a fração de linhas
df.sample(frac=0.4)

Unnamed: 0,a,b,c
3,4,7,10
4,4,7,10


In [185]:
# selecionar aleatoriamente uma quantidade de colunas
df.sample(n=2)

Unnamed: 0,a,b,c
3,4,7,10
0,4,7,10
