In [1]:
# We look here at merging data from different data sources

import numpy as np
import pandas as pd

In [28]:
def make_df(cols, ind):
    
    """Quickly make a DataFrame"""
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    return pd.DataFrame(data, ind)

# example DataFrame
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

In [9]:
x = [1, 2, 3]
y = [4, 5, 6]
z = [7, 8, 9]
np.concatenate([x, y, z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
x = [
    [1, 2],
    [3, 4]
]

np.concatenate([x, x], axis = 1)

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [24]:
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
print("Ser-1:\n", ser1, "\n\nSer-2: \n", ser2)

Ser-1:
 1    A
2    B
3    C
dtype: object 

Ser-2: 
 4    D
5    E
6    F
dtype: object


In [25]:
pd.concat([ser1, ser2], axis=0)

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [29]:
df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])

In [30]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [31]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [33]:
pd.concat([df1, df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [34]:
df3 = make_df('AB', [0, 1])
df4 = make_df('CD', [0, 1])

In [35]:
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [36]:
df4

Unnamed: 0,C,D
0,C0,D0
1,C1,D1


In [37]:
pd.concat([df3, df4], axis=1)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [38]:
x = make_df('AB', [0, 1])
y = make_df('AB', [2, 3])
y.index = x.index

In [41]:
pd.concat([x, y], axis=0)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


In [42]:
pd.concat(
    [x, y],
    ignore_index=True
)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [43]:
pd.concat(
    [x, y],
    keys=['a', 'b']
)

Unnamed: 0,Unnamed: 1,A,B
a,0,A0,B0
a,1,A1,B1
b,0,A2,B2
b,1,A3,B3


In [52]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 5])

In [53]:
pd.concat([df5, df6], axis = 0)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
5,,B5,C5,D5


In [58]:
pd.concat([df5, df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
5,B5,C5


In [59]:
pd.concat([df5, df6], join_axes=[df5.columns])

  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
5,,B5,C5


In [60]:
df1.append(df2)

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
