- [1주차 솔루션](https://goo.gl/aBHxyq)
- [2주차 설명+요약본](https://goo.gl/r2vbMH)
- [transaction dataset](https://goo.gl/bLyBgS)
- [과제](https://goo.gl/k8bnUr)

In [1]:
import pandas as pd

## 1. Load Data
### 1-1. Csv

In [25]:
transaction = pd.read_csv("../data/transaction/transaction.csv",
                         index_col="Name", # 인덱스로 사용할 컬럼
                         usecols=["Name", "date", "amount"], # 필요한 컬럼
                         parse_dates=["date"] # 형식이 datetime으로 변경됨, 빈번하게 쓰임
                         )
transaction

In [26]:
transaction.dtypes

### 1-2. Tsv

In [27]:
pd.read_csv("../data/transaction/transaction.tsv", sep="\t")

### 1-3. Excel

In [28]:
pd.read_excel('../data/transaction/transaction.xls')

### 1-4. HDF 

In [13]:
pd.read_hdf('../data/transaction/transaction.h5')

### 1-5. Url 

In [14]:
transaction_url = "https://goo.gl/WhzcFa"
pd.read_csv(transaction_url)

# 2. 데이터베이스에서 읽어오기
- 문법
- 속도
- - -
[ORM - sql alchemy] 
- https://www.sqlalchemy.org/
- http://docs.sqlalchemy.org/en/latest/orm/examples.html
- sql alchemy가 지원이 되게! (27:50)

In [19]:
import sqlite3

connection = sqlite3.connect("../data/transaction/pandas-transaction.db")
query = "select * from 'transaction'"

pd.read_sql(query, connection)

In [29]:
query = "select date, amount from 'transaction'"

pd.read_sql(query, connection)

In [30]:
query = "select * from 'transaction' limit 3"

pd.read_sql(query, connection)

# 3. 여러 개의 데이터를 하나로 합치기
- 인덱스가 일치할 경우 : concat
- pd.concat([df1, df2], axis=n)
    - axis = 0 -> 옆으로 붙이기
    - axis = 1 -> 위 아래 붙이기

In [33]:
transaction2017 = pd.read_csv("../data/transaction/transaction.csv")
transaction2016 = pd.read_csv("../data/transaction/transaction2016.csv")

## concat == concatenate
transaction = pd.concat([transaction2017, transaction2016])
transaction

In [34]:
transaction2016 = pd.read_csv("../data/transaction/transaction2016.csv", index_col="Name")
transaction2016

Unnamed: 0_level_0,amount,date,result
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lee,400,2016-01-01,confirmed
Young,300,2016-01-05,confirmed
Kim,700,2016-01-06,confirmed
Choi,800,2016-01-07,canceled
Yoon,500,2016-01-11,confirmed
Jang,200,2016-01-13,confirmed
Park,300,2016-01-20,canceled


In [36]:
how2016 = [
    {'how': "무통장 입금", 'installment': "없음"},
    {'how': "카드 결제", 'installment': "무이자"},
    {'how': "카드 결제", 'installment': "3개월"},
]

how2016 = pd.DataFrame(how2016, index=["Park", "Kim", "Choi"])
how2016

Unnamed: 0,how,installment
Park,무통장 입금,없음
Kim,카드 결제,무이자
Choi,카드 결제,3개월


#### pd.concat([df1, df2], axis)
- axis = 0 -> 옆으로 붙이기
- axis = 1 -> 위 아래 붙이기

In [37]:
pd.concat([transaction2016, how2016], axis=1)

Unnamed: 0,amount,date,result,how,installment
Choi,800,2016-01-07,canceled,카드 결제,3개월
Jang,200,2016-01-13,confirmed,,
Kim,700,2016-01-06,confirmed,카드 결제,무이자
Lee,400,2016-01-01,confirmed,,
Park,300,2016-01-20,canceled,무통장 입금,없음
Yoon,500,2016-01-11,confirmed,,
Young,300,2016-01-05,confirmed,,


## 4. 서로 다른 형태의 데이터를 하나로 합치기
- 인덱스가 일치하지 않는 경우 : merge

In [38]:
transaction = pd.read_csv('../data/transaction/transaction.csv')
transaction

Unnamed: 0,Name,date,amount,result
0,Kang,2017-01-01,500,confirmed
1,Kim,2017-01-03,700,confirmed
2,Choi,2017-01-05,800,confirmed
3,Park,2017-01-07,500,canceled
4,Lee,2017-01-09,700,confirmed
5,Yoon,2017-01-10,200,canceled


In [39]:
profile = [
    {'Name': "Kang", 'Phone': "010-1234-5678", 'Email': 'kang@gmail.com'},
    {'Name': "Kim", 'Phone': "010-7871-6123", 'Email': 'kim@naver.com'},
    {'Name': "Lee", 'Phone': "010-6201-9204", 'Email': 'lee@gmail.com'},
    {'Name': "Jang", 'Phone': "010-7711-3553", 'Email': 'jang@gmail.com'},
]

profile = pd.DataFrame(profile)
profile

Unnamed: 0,Email,Name,Phone
0,kang@gmail.com,Kang,010-1234-5678
1,kim@naver.com,Kim,010-7871-6123
2,lee@gmail.com,Lee,010-6201-9204
3,jang@gmail.com,Jang,010-7711-3553


In [41]:
pd.merge(transaction, profile, how='inner')

Unnamed: 0,Name,date,amount,result,Email,Phone
0,Kang,2017-01-01,500,confirmed,kang@gmail.com,010-1234-5678
1,Kim,2017-01-03,700,confirmed,kim@naver.com,010-7871-6123
2,Lee,2017-01-09,700,confirmed,lee@gmail.com,010-6201-9204


In [42]:
pd.merge(transaction, profile, how='outer')

Unnamed: 0,Name,date,amount,result,Email,Phone
0,Kang,2017-01-01,500.0,confirmed,kang@gmail.com,010-1234-5678
1,Kim,2017-01-03,700.0,confirmed,kim@naver.com,010-7871-6123
2,Choi,2017-01-05,800.0,confirmed,,
3,Park,2017-01-07,500.0,canceled,,
4,Lee,2017-01-09,700.0,confirmed,lee@gmail.com,010-6201-9204
5,Yoon,2017-01-10,200.0,canceled,,
6,Jang,,,,jang@gmail.com,010-7711-3553


In [43]:
pd.merge(transaction, profile, how='left')

Unnamed: 0,Name,date,amount,result,Email,Phone
0,Kang,2017-01-01,500,confirmed,kang@gmail.com,010-1234-5678
1,Kim,2017-01-03,700,confirmed,kim@naver.com,010-7871-6123
2,Choi,2017-01-05,800,confirmed,,
3,Park,2017-01-07,500,canceled,,
4,Lee,2017-01-09,700,confirmed,lee@gmail.com,010-6201-9204
5,Yoon,2017-01-10,200,canceled,,


In [44]:
pd.merge(transaction, profile, how='right')

Unnamed: 0,Name,date,amount,result,Email,Phone
0,Kang,2017-01-01,500.0,confirmed,kang@gmail.com,010-1234-5678
1,Kim,2017-01-03,700.0,confirmed,kim@naver.com,010-7871-6123
2,Lee,2017-01-09,700.0,confirmed,lee@gmail.com,010-6201-9204
3,Jang,,,,jang@gmail.com,010-7711-3553


main이 되는 column을 중심으로 left 조인