# データの連結

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
arr1 = np.arange(9).reshape((3, 3))

In [3]:
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

## 列方向に連結

In [4]:
np.concatenate([arr1, arr1], axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

## 行方向に連結

In [5]:
np.concatenate([arr1, arr1], axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [6]:
ser1 = Series([0, 1, 2], index=['T', 'U', 'V'])

In [7]:
ser2 = Series([3, 4], index=['X', 'Y'])

In [8]:
ser1

T    0
U    1
V    2
dtype: int64

In [9]:
ser2

X    3
Y    4
dtype: int64

In [10]:
pd.concat([ser1, ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [12]:
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [13]:
pd.concat([ser1, ser2], keys=['cat1', 'cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [14]:
pd.concat([ser1, ser2], axis=1, keys=['cat1', 'cat2'])

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [15]:
df1 = DataFrame(np.random.randn(4, 3), columns=['X', 'Y', 'Z'])

In [16]:
df1

Unnamed: 0,X,Y,Z
0,1.242353,-0.309249,0.289885
1,1.063451,0.128782,0.042441
2,-0.523881,-2.594776,-0.64701
3,-0.998205,-1.452273,-0.476235


In [17]:
df2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [18]:
df2

Unnamed: 0,Y,Q,X
0,-0.037129,0.059768,-1.649101
1,-2.482105,-1.802293,1.830953
2,-0.711873,0.158742,-0.147931


In [19]:
pd.concat([df1, df2])

Unnamed: 0,Q,X,Y,Z
0,,1.242353,-0.309249,0.289885
1,,1.063451,0.128782,0.042441
2,,-0.523881,-2.594776,-0.64701
3,,-0.998205,-1.452273,-0.476235
0,0.059768,-1.649101,-0.037129,
1,-1.802293,1.830953,-2.482105,
2,0.158742,-0.147931,-0.711873,


In [20]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,Q,X,Y,Z
0,,1.242353,-0.309249,0.289885
1,,1.063451,0.128782,0.042441
2,,-0.523881,-2.594776,-0.64701
3,,-0.998205,-1.452273,-0.476235
4,0.059768,-1.649101,-0.037129,
5,-1.802293,1.830953,-2.482105,
6,0.158742,-0.147931,-0.711873,


## データを組み合わせる

In [22]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [23]:
ser1 = Series([2, np.nan, 4, np.nan, 6, np.nan],
             index = ['Q', 'R', 'S', 'T', 'U', 'V'])

In [24]:
ser1

Q    2.0
R    NaN
S    4.0
T    NaN
U    6.0
V    NaN
dtype: float64

In [26]:
ser2 = Series(np.arange(len(ser1)), dtype=np.float64, index = ['Q', 'R', 'S', 'T', 'U', 'V'])

In [27]:
ser2

Q    0.0
R    1.0
S    2.0
T    3.0
U    4.0
V    5.0
dtype: float64

In [28]:
np.where(pd.isnull(ser1))

(array([1, 3, 5]),)

## ser1でnanの場合はser2の値を使う。ser2の値がnanだったらser1の値を使う

In [29]:
Series(np.where(pd.isnull(ser1), ser2, ser1))

0    2.0
1    1.0
2    4.0
3    3.0
4    6.0
5    5.0
dtype: float64

In [30]:
ser1.combine_first(ser2)

Q    2.0
R    1.0
S    4.0
T    3.0
U    6.0
V    5.0
dtype: float64

In [31]:
df_odds = DataFrame({'X': [1, np.nan, 3, np.nan],
                    'Y':[np.nan, 5, np.nan, 9],
                    'Z':[np.nan, 9, np.nan, 11]})

In [32]:
df_odds

Unnamed: 0,X,Y,Z
0,1.0,,
1,,5.0,9.0
2,3.0,,
3,,9.0,11.0


In [33]:
df_evens = DataFrame({'X': [2, 4, np.nan, 6, 8],
                    'Y':[np.nan, 10, 12, 14, 16]})

In [34]:
df_evens

Unnamed: 0,X,Y
0,2.0,
1,4.0,10.0
2,,12.0
3,6.0,14.0
4,8.0,16.0


In [35]:
df_odds.combine_first(df_evens)

Unnamed: 0,X,Y,Z
0,1.0,,
1,4.0,5.0,9.0
2,3.0,12.0,
3,6.0,9.0,11.0
4,8.0,16.0,
