# Database, DBMS, SQL

In [123]:
import pandas as pd
import sqlite3

## 데이터 베이스 연동하기

In [124]:
connect = sqlite3.connect('../data/pandas-transaction.db')

# select, create, update, delete 
query = 'SELECT * FROM sqlite_master'

schema = pd.read_sql(query, connect)

for table in schema['sql']:
    print(table)

CREATE TABLE "transaction" (
"Name" TEXT,
  "date" TEXT,
  "amount" INTEGER,
  "product" TEXT,
  "result" TEXT
)
CREATE TABLE "product" (
"product" TEXT,
  "price" REAL
)
CREATE TABLE "user" (
"Name" TEXT,
  "Age" INTEGER,
  "Sex" TEXT,
  "email" TEXT
)


## 데이터 불러오기

In [125]:
query = '''
SELECT * FROM 'transaction'
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Kang,2017-01-01,500,apple,confirmed
1,Kim,2017-01-03,700,banana,confirmed
2,Choi,2017-01-05,800,apple,confirmed
3,Park,2017-01-07,500,cereal,canceled
4,Kang,2017-01-08,1200,kiwi,confirmed
5,Choi,2017-01-09,100,melon,confirmed
6,Lee,2017-01-09,700,banana,confirmed
7,Yoon,2017-01-10,200,cereal,canceled


In [126]:
# AS 생략 가능
# 상수(Constant)
# 새 컬럼을 만들 때는 반드시 따옴표를 함께 써야 한다. 그리고 실제 컬럼명도 따옴표 포함이다.

query = '''
SELECT Name AS name, date, 'DS School' AS Company FROM 'transaction'
'''

pd.read_sql(query, connect)

Unnamed: 0,name,date,Company
0,Kang,2017-01-01,DS School
1,Kim,2017-01-03,DS School
2,Choi,2017-01-05,DS School
3,Park,2017-01-07,DS School
4,Kang,2017-01-08,DS School
5,Choi,2017-01-09,DS School
6,Lee,2017-01-09,DS School
7,Yoon,2017-01-10,DS School


In [127]:
query = '''
SELECT * FROM 'transaction'
LIMIT 3, 2
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Park,2017-01-07,500,cereal,canceled
1,Kang,2017-01-08,1200,kiwi,confirmed


## 기본 기능

### 정렬

In [128]:
query = '''
SELECT * FROM 'transaction'
ORDER BY amount DESC, date ASC
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Kang,2017-01-08,1200,kiwi,confirmed
1,Choi,2017-01-05,800,apple,confirmed
2,Kim,2017-01-03,700,banana,confirmed
3,Lee,2017-01-09,700,banana,confirmed
4,Kang,2017-01-01,500,apple,confirmed
5,Park,2017-01-07,500,cereal,canceled
6,Yoon,2017-01-10,200,cereal,canceled
7,Choi,2017-01-09,100,melon,confirmed


## group 함수

In [129]:
query = '''
SELECT SUM(date) FROM 'transaction'
'''

pd.read_sql(query, connect)

Unnamed: 0,SUM(date)
0,16136.0


In [130]:
query = '''
SELECT DISTINCT(amount) FROM 'transaction'
'''

pd.read_sql(query, connect)

Unnamed: 0,amount
0,500
1,700
2,800
3,1200
4,100
5,200


### group by

In [131]:
query = '''
SELECT date, SUM(amount), AVG(amount) FROM 'transaction'
GROUP BY date
'''

pd.read_sql(query, connect)

Unnamed: 0,date,SUM(amount),AVG(amount)
0,2017-01-01,500,500.0
1,2017-01-03,700,700.0
2,2017-01-05,800,800.0
3,2017-01-07,500,500.0
4,2017-01-08,1200,1200.0
5,2017-01-09,800,400.0
6,2017-01-10,200,200.0


## 조건

In [132]:
query = '''
SELECT * FROM 'transaction'
WHERE (amount > 500) AND (amount < 1000)
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Kim,2017-01-03,700,banana,confirmed
1,Choi,2017-01-05,800,apple,confirmed
2,Lee,2017-01-09,700,banana,confirmed


In [133]:
query = '''
SELECT * FROM 'transaction'
WHERE product <> 'banana'
    AND (amount > 500) AND (amount < 1000)
'''

# <> == !=

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Choi,2017-01-05,800,apple,confirmed


In [134]:
query = '''
SELECT * FROM 'transaction'
WHERE NOT (product <> 'banana')
ORDER BY date DESC
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Lee,2017-01-09,700,banana,confirmed
1,Kim,2017-01-03,700,banana,confirmed


In [135]:
query = '''
SELECT * FROM 'transaction'
WHERE amount BETWEEN 500 AND 800
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Kang,2017-01-01,500,apple,confirmed
1,Kim,2017-01-03,700,banana,confirmed
2,Choi,2017-01-05,800,apple,confirmed
3,Park,2017-01-07,500,cereal,canceled
4,Lee,2017-01-09,700,banana,confirmed


In [136]:
query = '''
SELECT * FROM 'transaction'
WHERE product NOT IN ('apple', 'banana')
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Park,2017-01-07,500,cereal,canceled
1,Kang,2017-01-08,1200,kiwi,confirmed
2,Choi,2017-01-09,100,melon,confirmed
3,Yoon,2017-01-10,200,cereal,canceled


In [137]:
# %: 0개 이상의 문자, n개의 문자
# _: 1개의 문자

query = '''
SELECT * FROM 'transaction'
WHERE product NOT LIKE 'a_%e'
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Kim,2017-01-03,700,banana,confirmed
1,Park,2017-01-07,500,cereal,canceled
2,Kang,2017-01-08,1200,kiwi,confirmed
3,Choi,2017-01-09,100,melon,confirmed
4,Lee,2017-01-09,700,banana,confirmed
5,Yoon,2017-01-10,200,cereal,canceled


In [138]:
query = '''
SELECT * FROM (SELECT * FROM 'transaction'
WHERE ((amount > 100 AND amount < 500) OR amount >= 700)
    AND date NOT IN ('2017-01-01')
    AND product LIKE '%a%'
    AND result = 'confirmed'
ORDER BY amount DESC, date DESC)
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result
0,Choi,2017-01-05,800,apple,confirmed
1,Lee,2017-01-09,700,banana,confirmed
2,Kim,2017-01-03,700,banana,confirmed


## Join 

In [139]:
query = '''
SELECT * FROM 'product'
'''

pd.read_sql(query, connect)

Unnamed: 0,product,price
0,apple,5.0
1,blueberry,7.0
2,banana,2.5
3,cereal,4.5
4,kiwi,10.0
5,melon,12.5
6,pineapple,15.0


In [140]:
query = '''
SELECT * FROM 'user'
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,Age,Sex,email
0,Kang,30,male,kang@gmail.com
1,Kim,22,female,kim@naver.com
2,Park,37,male,park@dsschool.co.kr
3,Lee,15,female,lee@empas.co.kr
4,Son,29,male,son@google.co.uk
5,Moon,40,female,moon@hanmail.com
6,Choi,34,male,choi@yahoo.com


In [141]:
query = '''
SELECT * FROM 'transaction'
INNER JOIN 'user' ON 'transaction'.Name = 'user'.Name
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result,Name.1,Age,Sex,email
0,Kang,2017-01-01,500,apple,confirmed,Kang,30,male,kang@gmail.com
1,Kim,2017-01-03,700,banana,confirmed,Kim,22,female,kim@naver.com
2,Choi,2017-01-05,800,apple,confirmed,Choi,34,male,choi@yahoo.com
3,Park,2017-01-07,500,cereal,canceled,Park,37,male,park@dsschool.co.kr
4,Kang,2017-01-08,1200,kiwi,confirmed,Kang,30,male,kang@gmail.com
5,Choi,2017-01-09,100,melon,confirmed,Choi,34,male,choi@yahoo.com
6,Lee,2017-01-09,700,banana,confirmed,Lee,15,female,lee@empas.co.kr


In [142]:
query = '''
SELECT A.*, B.email FROM 'transaction' AS A
LEFT JOIN 'user' AS B ON A.Name = B.Name
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result,email
0,Kang,2017-01-01,500,apple,confirmed,kang@gmail.com
1,Kim,2017-01-03,700,banana,confirmed,kim@naver.com
2,Choi,2017-01-05,800,apple,confirmed,choi@yahoo.com
3,Park,2017-01-07,500,cereal,canceled,park@dsschool.co.kr
4,Kang,2017-01-08,1200,kiwi,confirmed,kang@gmail.com
5,Choi,2017-01-09,100,melon,confirmed,choi@yahoo.com
6,Lee,2017-01-09,700,banana,confirmed,lee@empas.co.kr
7,Yoon,2017-01-10,200,cereal,canceled,


In [143]:
query = '''
SELECT * FROM 'transaction' AS A
LEFT JOIN 'user' AS B ON A.Name = B.Name
LEFT JOIN 'product' AS C ON A.product = C.product
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result,Name.1,Age,Sex,email,product.1,price
0,Kang,2017-01-01,500,apple,confirmed,Kang,30.0,male,kang@gmail.com,apple,5.0
1,Kim,2017-01-03,700,banana,confirmed,Kim,22.0,female,kim@naver.com,banana,2.5
2,Choi,2017-01-05,800,apple,confirmed,Choi,34.0,male,choi@yahoo.com,apple,5.0
3,Park,2017-01-07,500,cereal,canceled,Park,37.0,male,park@dsschool.co.kr,cereal,4.5
4,Kang,2017-01-08,1200,kiwi,confirmed,Kang,30.0,male,kang@gmail.com,kiwi,10.0
5,Choi,2017-01-09,100,melon,confirmed,Choi,34.0,male,choi@yahoo.com,melon,12.5
6,Lee,2017-01-09,700,banana,confirmed,Lee,15.0,female,lee@empas.co.kr,banana,2.5
7,Yoon,2017-01-10,200,cereal,canceled,,,,,cereal,4.5


## Subquery

In [152]:
# case when
# having
# partition

In [151]:
query = '''
SELECT * FROM (SELECT * FROM 'transaction' AS A
    LEFT JOIN 'user' AS B ON A.Name = B.Name
    LEFT JOIN 'product' AS C ON A.product = C.product
    WHERE A.product IN ('apple', 'banana')
    ORDER BY amount)
'''

pd.read_sql(query, connect)

Unnamed: 0,Name,date,amount,product,result,Name:1,Age,Sex,email,product:1,price
0,Kang,2017-01-01,500,apple,confirmed,Kang,30,male,kang@gmail.com,apple,5.0
1,Kim,2017-01-03,700,banana,confirmed,Kim,22,female,kim@naver.com,banana,2.5
2,Lee,2017-01-09,700,banana,confirmed,Lee,15,female,lee@empas.co.kr,banana,2.5
3,Choi,2017-01-05,800,apple,confirmed,Choi,34,male,choi@yahoo.com,apple,5.0
