# CONCATENACIÓN DE MATRICES Y DF

In [49]:
import pandas as pd
import numpy as np

pd.options.display.float_format = '{:_.2f}'.format
np.set_printoptions(precision=1)

## Concatenar con NUMPY

In [50]:
x1 = np.random.rand(2,5)*10
x2 = np.random.rand(2,5)*-1

In [51]:
np.concatenate([x1,x2])

array([[ 5.9,  7.3,  5.1,  6.1,  2.8],
       [ 1.4,  8.4,  9.4,  5.1,  2.6],
       [-0.9, -0.3, -0.8, -0.9, -0.3],
       [-0.1, -0.5, -0.6, -0.8, -0.2]])

In [52]:
np.concatenate([x1,x2],axis=1)

array([[ 5.9,  7.3,  5.1,  6.1,  2.8, -0.9, -0.3, -0.8, -0.9, -0.3],
       [ 1.4,  8.4,  9.4,  5.1,  2.6, -0.1, -0.5, -0.6, -0.8, -0.2]])

## Concatenar con PANDAS

In [53]:
s1 = pd.Series(x1[0], index=['a','b','c','d','e'])
s2 = pd.Series(x2[0], index=['c','d','e','f','g'])

In [54]:
pd.concat([s1,s2])

a    5.92
b    7.26
c    5.12
d    6.07
e    2.83
c   -0.89
d   -0.27
e   -0.76
f   -0.85
g   -0.33
dtype: float64

In [55]:
pd.concat([s1,s2], axis=1)
# Se concatenan respetando los índices

Unnamed: 0,0,1
a,5.92,
b,7.26,
c,5.12,-0.89
d,6.07,-0.27
e,2.83,-0.76
f,,-0.85
g,,-0.33


In [56]:
print(type(s1))
print(type(s1.reset_index()))
print(type(s1.reset_index(drop=True)))

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [57]:
pd.concat([s1.reset_index(drop=True),s2.reset_index(drop=True)], axis=1)
# ya no se respeta el índice creado previamente, si no el nuevo reseteado

Unnamed: 0,0,1
0,5.92,-0.89
1,7.26,-0.27
2,5.12,-0.76
3,6.07,-0.85
4,2.83,-0.33


### DataFrames

In [58]:
df1 = pd.DataFrame(np.random.rand(3,2)*10,columns=['a','b'])
df2 = pd.DataFrame(np.random.rand(3,2)*-1,columns=['a','b'], index = [2,3,4])

In [59]:
pd.concat([df1,df2])
# Se repite el índice 2

Unnamed: 0,a,b
0,9.54,9.03
1,6.27,2.44
2,5.49,1.97
2,-0.5,-0.88
3,-0.7,-0.48
4,-0.25,-0.13


In [60]:
pd.concat([df1,df2], axis=1)
# Se comparte el índice 2

Unnamed: 0,a,b,a.1,b.1
0,9.54,9.03,,
1,6.27,2.44,,
2,5.49,1.97,-0.5,-0.88
3,,,-0.7,-0.48
4,,,-0.25,-0.13


Para

In [61]:
pd.concat([df1,df2], axis=1, join='inner')

Unnamed: 0,a,b,a.1,b.1
2,5.49,1.97,-0.5,-0.88


In [62]:
pd.concat([
           df1.reset_index(drop=True),
           df2.reset_index(drop=True)
           ], axis=1)
# ya no se respeta el índice creado previamente, si no el nuevo reseteado

Unnamed: 0,a,b,a.1,b.1
0,9.54,9.03,-0.5,-0.88
1,6.27,2.44,-0.7,-0.48
2,5.49,1.97,-0.25,-0.13


Usando .append()

In [63]:
df1.append(df2)

Unnamed: 0,a,b
0,9.54,9.03
1,6.27,2.44
2,5.49,1.97
2,-0.5,-0.88
3,-0.7,-0.48
4,-0.25,-0.13


In [64]:
df1.T.append(df2.T).T

Unnamed: 0,a,b,a.1,b.1
0,9.54,9.03,,
1,6.27,2.44,,
2,5.49,1.97,-0.5,-0.88
3,,,-0.7,-0.48
4,,,-0.25,-0.13


## Update

actualiza los valores de df1 tomando los de df2 , pero sólo de las columnas que tienen en común.

In [65]:
print(df1.update(df2))

None


# MERGE

Es una concatenación cuando los dos DF contienen una columna en común

In [66]:
df_left = pd.DataFrame(
                      {'X': ['x0', 'x1', 'x2', 'x3'],
                        'W': ['w0', 'w1', 'w2', 'w3'],
                        'Y': ['y0', 'y1', 'y2', 'y3'],
                        'Mix': ['y2','y3','a2','a3']},
                       index=[0,1,2,3])
df_left

Unnamed: 0,X,W,Y,Mix
0,x0,w0,y0,y2
1,x1,w1,y1,y3
2,x2,w2,y2,a2
3,x3,w3,y3,a3


In [67]:
df_right = pd.DataFrame(
                       {'Z': ['z2', 'z3', 'z4', 'z5'],
                         'A': ['a2', 'a3', 'a4', 'a5'], 
                         'Y': ['y2', 'y3', 'y4', 'y5']},
                        index=[2,3,4,5])
df_right

Unnamed: 0,Z,A,Y
2,z2,a2,y2
3,z3,a3,y3
4,z4,a4,y4
5,z5,a5,y5


La columna común es 'Y'

In [69]:
# pd.merge(df_left, df_right)
# internamente se realiza con estos parámetros
pd.merge(df_left, df_right, how='inner', on='Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x2,w2,y2,a2,z2,a2
1,x3,w3,y3,a3,z3,a3


In [72]:
pd.merge(df_left, df_right, how='inner', left_on='Mix', right_on='Y')

Unnamed: 0,X,W,Y_x,Mix,Z,A,Y_y
0,x0,w0,y0,y2,z2,a2,y2
1,x1,w1,y1,y3,z3,a3,y3


In [73]:
pd.merge(df_left, df_right, how='inner', left_on='Mix', right_on='A')

Unnamed: 0,X,W,Y_x,Mix,Z,A,Y_y
0,x2,w2,y2,a2,z2,a2,y2
1,x3,w3,y3,a3,z3,a3,y3


![](http://www.datasciencemadesimple.com/wp-content/uploads/2017/09/join-or-merge-in-python-pandas-1.png)

In [74]:
pd.merge(df_left, df_right, how='left', on='Y')

Unnamed: 0,X,W,Y,Mix,Z,A
0,x0,w0,y0,y2,,
1,x1,w1,y1,y3,,
2,x2,w2,y2,a2,z2,a2
3,x3,w3,y3,a3,z3,a3
