## 데이터프레임의 결합
1. 단순한 행, 열 결합

In [1]:
import pandas as pd

In [4]:
df1 = pd.DataFrame({
    'S1' : [1,2,3,4],
    'S2' : [5,6,7,8]
})
df1

Unnamed: 0,S1,S2
0,1,5
1,2,6
2,3,7
3,4,8


In [6]:
df2 = pd.DataFrame([1,2])
df2

Unnamed: 0,0
0,1
1,2


## concat()
1. 데이터프레임을 결합하는 함수
2. 매개변수 axis -> 행을 추가할지 열을 추가할지 지정
3. 단순하게 데이터프레임을 결합
4. 매개변수 ignore_index -> 기본값은 false, 인덱스의 값을 그대로 유지할지 지정

In [7]:
## 단순하게 행을 추가 
pd.concat([df1,df2], axis =0)

Unnamed: 0,S1,S2,0
0,1.0,5.0,
1,2.0,6.0,
2,3.0,7.0,
3,4.0,8.0,
0,,,1.0
1,,,2.0


In [8]:
## Case1 인덱스 지정 
pd.concat([df1,df2], axis =0).reset_index(drop=True)

Unnamed: 0,S1,S2,0
0,1.0,5.0,
1,2.0,6.0,
2,3.0,7.0,
3,4.0,8.0,
4,,,1.0
5,,,2.0


In [9]:
## Case2 인덱스 지정 
pd.concat([df1,df2], axis =0, ignore_index = True)

Unnamed: 0,S1,S2,0
0,1.0,5.0,
1,2.0,6.0,
2,3.0,7.0,
3,4.0,8.0,
4,,,1.0
5,,,2.0


In [10]:
## 단순하게 열을 추가 
pd.concat([df1,df2], axis =1)

Unnamed: 0,S1,S2,0
0,1,5,1.0
1,2,6,2.0
2,3,7,
3,4,8,


In [11]:
df3 = pd.DataFrame({
    'S1' : [1,2,]
})
df3

Unnamed: 0,S1
0,1
1,2


In [14]:
pd.concat([df1,df3],axis=0)

Unnamed: 0,S1,S2
0,1,5.0
1,2,6.0
2,3,7.0
3,4,8.0
0,1,
1,2,


## merge()
1. 데이터프레임을 결합하는 함수
2. 특정 조건에 맞춰서 열을 추가
3. 매개변수 on ->  조건
4. 매경변수 how -> 데이터프레임의 기준점(left, right, inner, outer)

In [16]:
data1 = [
    ['전기전자', '005930', '삼성전자', 74400],
    ['화학', '051910', ' LG화학', 896000],
    ['전기전자', '000660', 'SK하이닉스', ' 101500']
]
column1 = ['업종', '종목코드', '종목명', '현재가']

df1 = pd.DataFrame(data = data1, columns= column1)
df1

Unnamed: 0,업종,종목코드,종목명,현재가
0,전기전자,5930,삼성전자,74400
1,화학,51910,LG화학,896000
2,전기전자,660,SK하이닉스,101500


In [19]:
data2 = [
    ['은행', 2.92], ['보험', 0.37], ['화학', 0.06], ['전기전자', -2.43]
]
column2 = ['업종', '등락률']

df2 = pd.DataFrame(data=data2, columns=column2)
df2

Unnamed: 0,업종,등락률
0,은행,2.92
1,보험,0.37
2,화학,0.06
3,전기전자,-2.43


In [20]:
pd.merge(left = df1, right = df2, on= '업종', how= 'left')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,전기전자,5930,삼성전자,74400,-2.43
1,화학,51910,LG화학,896000,0.06
2,전기전자,660,SK하이닉스,101500,-2.43


In [25]:
pd.merge(left = df1, right = df2, on= '업종', how= 'right')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,은행,,,,2.92
1,보험,,,,0.37
2,화학,51910.0,LG화학,896000.0,0.06
3,전기전자,5930.0,삼성전자,74400.0,-2.43
4,전기전자,660.0,SK하이닉스,101500.0,-2.43


In [31]:
df3 = pd.DataFrame({
    '업종' : ['서비스업'],
    '종목코드': ['035720'],
    '종목명' : ['카카오'],
    '현재가' : [121500]
})
df3

Unnamed: 0,업종,종목코드,종목명,현재가
0,서비스업,35720,카카오,121500


In [33]:
df4 = pd.concat([df1,df3], axis=0, ignore_index=True)
df4

Unnamed: 0,업종,종목코드,종목명,현재가
0,전기전자,5930,삼성전자,74400
1,화학,51910,LG화학,896000
2,전기전자,660,SK하이닉스,101500
3,서비스업,35720,카카오,121500


In [34]:
pd.merge(df4, df2, on='업종', how= 'left')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,전기전자,5930,삼성전자,74400,-2.43
1,화학,51910,LG화학,896000,0.06
2,전기전자,660,SK하이닉스,101500,-2.43
3,서비스업,35720,카카오,121500,


In [35]:
pd.merge(df4, df2, on='업종', how= 'right')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,은행,,,,2.92
1,보험,,,,0.37
2,화학,51910.0,LG화학,896000.0,0.06
3,전기전자,5930.0,삼성전자,74400.0,-2.43
4,전기전자,660.0,SK하이닉스,101500.0,-2.43


In [36]:
pd.merge(df4, df2, on='업종', how= 'inner')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,전기전자,5930,삼성전자,74400,-2.43
1,전기전자,660,SK하이닉스,101500,-2.43
2,화학,51910,LG화학,896000,0.06


In [37]:
pd.merge(df4, df2, on='업종', how= 'outer')

Unnamed: 0,업종,종목코드,종목명,현재가,등락률
0,전기전자,5930.0,삼성전자,74400.0,-2.43
1,전기전자,660.0,SK하이닉스,101500.0,-2.43
2,화학,51910.0,LG화학,896000.0,0.06
3,서비스업,35720.0,카카오,121500.0,
4,은행,,,,2.92
5,보험,,,,0.37
