In [37]:
import pandas as pd
import numpy as np
import seaborn as sns

In [38]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Melt

In [39]:
pd.melt(df, id_vars=['sepal_length'], value_vars=['species']).head()

Unnamed: 0,sepal_length,variable,value
0,5.1,species,setosa
1,4.9,species,setosa
2,4.7,species,setosa
3,4.6,species,setosa
4,5.0,species,setosa


<strong> method chaining </strong>

In [40]:
pd.melt(df, id_vars=['sepal_length'], value_vars=['species']).rename(columns={
    'variable': 'category',
    'value': 'species'}).head()

Unnamed: 0,sepal_length,category,species
0,5.1,species,setosa
1,4.9,species,setosa
2,4.7,species,setosa
3,4.6,species,setosa
4,5.0,species,setosa


In [41]:
pd.melt(df, value_vars=['sepal_length', 'species']).describe()

Unnamed: 0,variable,value
count,300,300
unique,2,38
top,sepal_length,setosa
freq,150,50


## Concatenation

In [42]:
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1,s2], ignore_index=True)

0    a
1    b
2    c
3    d
dtype: object

In [43]:
pd.concat([s1,s2], keys=['s1','s2'])

s1  0    a
    1    b
s2  0    c
    1    d
dtype: object

In [44]:
pd.concat([s1, s2], keys=['s1', 's2'],names=['Series name', 'Row ID'])

Series name  Row ID
s1           0         a
             1         b
s2           0         c
             1         d
dtype: object

In [45]:
df1 = pd.DataFrame([['a', 1], ['b', 2]],columns=['letter', 'number'])
df2 = pd.DataFrame([['c', 3], ['d', 4]],columns=['letter', 'number'])
pd.concat([df1,df2])

Unnamed: 0,letter,number
0,a,1
1,b,2
0,c,3
1,d,4


In [46]:
df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],columns=['letter', 'number', 'animal'])
pd.concat([df1,df3])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  


Unnamed: 0,animal,letter,number
0,,a,1
1,,b,2
0,cat,c,3
1,dog,d,4


In [47]:
pd.concat([df1,df3], join='inner')

Unnamed: 0,letter,number
0,a,1
1,b,2
0,c,3
1,d,4


In [48]:
df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],columns=['animal', 'name'])
pd.concat([df1,df4], axis=1)

Unnamed: 0,letter,number,animal,name
0,a,1,bird,polly
1,b,2,monkey,george


In [49]:
df5 = pd.DataFrame([1], index=['a'])
df6 = pd.DataFrame([2], index=['a'])
pd.concat([df5, df6], verify_integrity=False)

Unnamed: 0,0
a,1
a,2


## Pivot

In [50]:
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two','two'],
                   'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                    'baz': [1, 2, 3, 4, 5, 6]})
df

Unnamed: 0,foo,bar,baz
0,one,A,1
1,one,B,2
2,one,C,3
3,two,A,4
4,two,B,5
5,two,C,6


In [51]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


<strong> method chaining </strong>

In [52]:
df = df.pivot(index='foo', columns='bar', values='baz').reset_index()

In [53]:
df.melt(id_vars=['foo'], value_vars=['A','B','C']).sort_values(
    ['foo','bar']).rename(columns= {'value':'baz'})

Unnamed: 0,foo,bar,baz
0,one,A,1
2,one,B,2
4,one,C,3
1,two,A,4
3,two,B,5
5,two,C,6


## Others

<strong> sort_values, rename, sort_index, reset_index, drop </strong>

In [58]:
df = sns.load_dataset('iris')

<strong> default: low to high </strong>

In [59]:
df.sort_values('sepal_length').head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
42,4.4,3.2,1.3,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa


<strong> high to low </strong>

In [60]:
df.sort_values('sepal_length', ascending=False).head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
131,7.9,3.8,6.4,2.0,virginica
135,7.7,3.0,6.1,2.3,virginica
122,7.7,2.8,6.7,2.0,virginica
117,7.7,3.8,6.7,2.2,virginica
118,7.7,2.6,6.9,2.3,virginica


In [61]:
df.rename(columns = {'sepal_length': 'sepal_length_renamed'}).head()

Unnamed: 0,sepal_length_renamed,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [62]:
df.sort_values('sepal_length', ascending=False).sort_index().head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [63]:
df.sort_values('sepal_length', ascending=False).reset_index().head()

Unnamed: 0,index,sepal_length,sepal_width,petal_length,petal_width,species
0,131,7.9,3.8,6.4,2.0,virginica
1,135,7.7,3.0,6.1,2.3,virginica
2,122,7.7,2.8,6.7,2.0,virginica
3,117,7.7,3.8,6.7,2.2,virginica
4,118,7.7,2.6,6.9,2.3,virginica


In [64]:
df.drop(columns=['sepal_length', 'sepal_width']).head()

Unnamed: 0,petal_length,petal_width,species
0,1.4,0.2,setosa
1,1.4,0.2,setosa
2,1.3,0.2,setosa
3,1.5,0.2,setosa
4,1.4,0.2,setosa
