In [1]:
import pandas as pd
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df1

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [2]:
df2 = pd.DataFrame({'A': [4, 5, 6], 'B': [7, 8, 9]})
df2

Unnamed: 0,A,B
0,4,7
1,5,8
2,6,9


In [3]:
df1.append(df2)

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
0,4,7
1,5,8
2,6,9


In [4]:
df1.append(df2, ignore_index=True)

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
3,4,7
4,5,8
5,6,9


In [5]:
pd.concat([df1, df2])

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
0,4,7
1,5,8
2,6,9


In [6]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,A.1,B.1
0,1,4,4,7
1,2,5,5,8
2,3,6,6,9


In [7]:
pd.concat([df1, df2], keys=['day1', 'day2'])

Unnamed: 0,Unnamed: 1,A,B
day1,0,1,4
day1,1,2,5
day1,2,3,6
day2,0,4,7
day2,1,5,8
day2,2,6,9


In [8]:
df3 = pd.concat([df1, df2], keys=['day1', 'day2'])
df3.loc["day2"]


Unnamed: 0,A,B
0,4,7
1,5,8
2,6,9


In [9]:
import numpy as np
df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'],'value': range(4)})
df1


Unnamed: 0,key,value
0,A,0
1,B,1
2,C,2
3,D,3


In [10]:
df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], 'value': range(10, 14)})
df2


Unnamed: 0,key,value
0,B,10
1,D,11
2,D,12
3,E,13


In [11]:
df1.merge(df2, on='key')

Unnamed: 0,key,value_x,value_y
0,B,1,10
1,D,3,11
2,D,3,12


In [12]:
df1.merge(df2, on='key', how='left')

Unnamed: 0,key,value_x,value_y
0,A,0,
1,B,1,10.0
2,C,2,
3,D,3,11.0
4,D,3,12.0


In [13]:
df1.merge(df2, on='key', how='right')

Unnamed: 0,key,value_x,value_y
0,B,1.0,10
1,D,3.0,11
2,D,3.0,12
3,E,,13


In [14]:
df1.merge(df2, on='key', how='outer')

Unnamed: 0,key,value_x,value_y
0,A,0.0,
1,B,1.0,10.0
2,C,2.0,
3,D,3.0,11.0
4,D,3.0,12.0
5,E,,13.0


## Optional Behavior
### 'inner' Use only the key combinations observed in both tables
### 'left' Use all key combinations found in the left table
### 'right' Use all key combinations found in the right table
### 'outer' Use all key combinations observed in both tables together

In [15]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [16]:
np.concatenate([arr, arr], axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [17]:
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

In [18]:
pd.concat([s1, s2, s3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [19]:
pd.concat([s1, s2, s3],sort=False, axis=1)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [20]:
s4 = pd.concat([s1, s3])
s4

a    0
b    1
f    5
g    6
dtype: int64

In [21]:
pd.concat([s1, s4],sort=False, axis=1)

Unnamed: 0,0,1
a,0.0,0
b,1.0,1
f,,5
g,,6


In [22]:
pd.concat([s1, s4], axis=1, join='inner')

Unnamed: 0,0,1
a,0,0
b,1,1


In [23]:
result = pd.concat([s1, s1, s3], keys=['one', 'two', 'three'])
result

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [24]:
result.unstack()

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [25]:
pd.concat([s1, s2, s3], axis=1,sort=False, keys=['one', 'two', 'three'])

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [26]:
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'], columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'], columns=['three', 'four'])

In [27]:
df1

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [28]:
df2

Unnamed: 0,three,four
a,5,6
c,7,8


In [29]:
pd.concat([df1, df2], axis=1,sort=True, keys=['level1', 'level2'])

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [30]:
df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],'key2' : ['one', 'two', 'one', 'two', 'one'],'data1' : np.random.randn(5), 'data2' : np.random.randn(5)})

In [31]:
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,1.396303,0.14643
1,a,two,-2.084753,1.072328
2,b,one,-1.060582,0.201722
3,b,two,0.123625,-0.292732
4,a,one,0.816477,0.026474


In [32]:
grouped = df['data1'].groupby(df['key1'])
grouped

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fcaeaaae7f0>

In [33]:
grouped.mean()

key1
a    0.042676
b   -0.468478
Name: data1, dtype: float64

In [34]:
means = df['data1'].groupby([df['key1'], df['key2']]).mean()

In [35]:
means

key1  key2
a     one     1.106390
      two    -2.084753
b     one    -1.060582
      two     0.123625
Name: data1, dtype: float64

In [36]:
means.unstack()

key2,one,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,1.10639,-2.084753
b,-1.060582,0.123625
