## SQL 활용 데이터 프레임 생성

Python에서 Pandas와의 연계를 위한 MySQL과의 연동은 다양한 방법이 있지만 
그 중에서 pymysql 과 SQLAlchemy 가 가장 널리 사용된다.

In [1]:
# 필요한 패키지 가져오기
import pymysql
from sqlalchemy import create_engine
from pandas import DataFrame
from pandas import read_sql, read_sql_table

#### 1. pymysql 사용

##### 1) 데이터베이스 접속

In [2]:
dbcon = pymysql.connect(host="127.0.0.1", 
                        port = 3406,
                        user = "root",
                        password = "1234",
                        db = "board",
                        charset = "utf8"
                        )

##### 2) 데이터 조회

기본 사용 방법: 테이블의 각 rexord를 튜플로 표현하는 리스트 객체를 얻을 수 있다

In [3]:
# 데이터 조회를 위한 커서 객체 생성

cursor = dbcon.cursor()

In [4]:
# 데이터 조회를 위한 sql문 처리

sql = "SELECT * FROM board_main_post"
cursor.execute(sql)
result = cursor.fetchall()
result

((1,
  'I live for you alone.',
  'This is my desire to honour you.',
  datetime.datetime(2023, 6, 15, 1, 26, 25, 979626),
  datetime.datetime(2023, 6, 15, 6, 6, 34, 734273),
  2),
 (2,
  'Parenting',
  "Parenting is difficult. Children don't follow your instructions.",
  datetime.datetime(2023, 6, 15, 2, 41, 40, 198218),
  datetime.datetime(2023, 6, 15, 2, 41, 40, 198349),
  1),
 (3,
  'hello',
  'My love how are you?',
  datetime.datetime(2023, 6, 15, 5, 46, 3, 855349),
  datetime.datetime(2023, 6, 15, 6, 5, 57, 403641),
  3),
 (4,
  'Thinking',
  'I am thinking',
  datetime.datetime(2023, 6, 15, 6, 21, 47, 528438),
  datetime.datetime(2023, 6, 15, 6, 21, 47, 528538),
  1),
 (5,
  'This is fun!',
  "I'm currently enjoying this right now.",
  datetime.datetime(2023, 6, 15, 6, 46, 58, 987932),
  datetime.datetime(2023, 6, 15, 6, 46, 58, 987983),
  5),
 (6,
  '수정된 제목',
  '수정된 내용',
  datetime.datetime(2023, 6, 27, 4, 51, 38),
  datetime.datetime(2023, 6, 27, 4, 55, 51),
  None))

딕셔너리 형태로 데이터 조회

In [5]:
# 데이터 조회를 위한 커서 객체 생성: 객체 생성시 파라미터를 전달해야 된다

cursor = dbcon.cursor(pymysql.cursors.DictCursor)

In [6]:
# 데이터 조회하기

sql = "SELECT * FROM board_main_post"
cursor.execute(sql)
result = cursor.fetchall()
result

[{'id': 1,
  'title': 'I live for you alone.',
  'contents': 'This is my desire to honour you.',
  'created_at': datetime.datetime(2023, 6, 15, 1, 26, 25, 979626),
  'updated_at': datetime.datetime(2023, 6, 15, 6, 6, 34, 734273),
  'author_id': 2},
 {'id': 2,
  'title': 'Parenting',
  'contents': "Parenting is difficult. Children don't follow your instructions.",
  'created_at': datetime.datetime(2023, 6, 15, 2, 41, 40, 198218),
  'updated_at': datetime.datetime(2023, 6, 15, 2, 41, 40, 198349),
  'author_id': 1},
 {'id': 3,
  'title': 'hello',
  'contents': 'My love how are you?',
  'created_at': datetime.datetime(2023, 6, 15, 5, 46, 3, 855349),
  'updated_at': datetime.datetime(2023, 6, 15, 6, 5, 57, 403641),
  'author_id': 3},
 {'id': 4,
  'title': 'Thinking',
  'contents': 'I am thinking',
  'created_at': datetime.datetime(2023, 6, 15, 6, 21, 47, 528438),
  'updated_at': datetime.datetime(2023, 6, 15, 6, 21, 47, 528538),
  'author_id': 1},
 {'id': 5,
  'title': 'This is fun!',
  'co

In [7]:
# 조회 결과를 데이터프레임으로 변환

df = DataFrame(result)
df

Unnamed: 0,id,title,contents,created_at,updated_at,author_id
0,1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2.0
1,2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1.0
2,3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3.0
3,4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1.0
4,5,This is fun!,I'm currently enjoying this right now.,2023-06-15 06:46:58.987932,2023-06-15 06:46:58.987983,5.0
5,6,수정된 제목,수정된 내용,2023-06-27 04:51:38.000000,2023-06-27 04:55:51.000000,


In [8]:
# 데이터프레임에 대한 인덱스 설정

df.set_index('id', inplace= True)
df

Unnamed: 0_level_0,title,contents,created_at,updated_at,author_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2.0
2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1.0
3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3.0
4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1.0
5,This is fun!,I'm currently enjoying this right now.,2023-06-15 06:46:58.987932,2023-06-15 06:46:58.987983,5.0
6,수정된 제목,수정된 내용,2023-06-27 04:51:38.000000,2023-06-27 04:55:51.000000,


##### 3) 입력, 수정, 삭제

In [21]:
# # 입력 INSERT INTO

# sql = """INSERT INTO board_main_post (title, contents, created_at, updated_at) 
#          VALUES ('pandas test', '이것은 테스트 입니다.', now(), now())"""
# print(sql)

# rows = cursor.execute(sql)
# print("%d개의 행이 저장됨" % rows)
# print("생성된 PK: %d" % cursor.lastrowid)

# # 처리 결과를 실제로 반영함
# dbcon.commit()

# # 되돌리기
# # -> 이미 commit()한 내역은 적용안됨
# # dbcon.rollback()

INSERT INTO board_main_post (title, contents, created_at, updated_at) 
         VALUES ('pandas test', '이것은 테스트 입니다.', now(), now())
1개의 행이 저장됨
생성된 PK: 6


In [9]:
df

Unnamed: 0_level_0,title,contents,created_at,updated_at,author_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2.0
2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1.0
3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3.0
4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1.0
5,This is fun!,I'm currently enjoying this right now.,2023-06-15 06:46:58.987932,2023-06-15 06:46:58.987983,5.0
6,수정된 제목,수정된 내용,2023-06-27 04:51:38.000000,2023-06-27 04:55:51.000000,


In [10]:
# # 수정 UPDATE ... SET

# sql = """UPDATE board_main_post
#          SET title = '수정된 제목',
#              contents = '수정된 내용',
#              updated_at = now()
#         WHERE id = 6"""
# print(sql)

# rows = cursor.execute(sql)
# print("%d개의 행이 삭제됨" % rows)

# dbcon.commit()

UPDATE board_main_post
         SET title = '수정된 제목',
             contents = '수정된 내용',
             updated_at = now()
        WHERE id = 6
1개의 행이 삭제됨


In [12]:
df

Unnamed: 0_level_0,title,contents,created_at,updated_at,author_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2.0
2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1.0
3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3.0
4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1.0
5,This is fun!,I'm currently enjoying this right now.,2023-06-15 06:46:58.987932,2023-06-15 06:46:58.987983,5.0
6,pandas test,이것은 테스트 입니다.,2023-06-27 04:51:38.000000,2023-06-27 04:51:38.000000,


In [10]:
# # 삭제 DELETE

# sql = "DELETE FROM board_main_post WHERE id >= 5"
# print(sql)

# rows = cursor.execute(sql)
# print("%d개의 행이 삭제됨" % rows)

# dbcon.commit()

DELETE FROM board_main_post WHERE id >= 5
2개의 행이 삭제됨


In [11]:
df

Unnamed: 0_level_0,title,contents,created_at,updated_at,author_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2.0
2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1.0
3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3.0
4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1.0
5,This is fun!,I'm currently enjoying this right now.,2023-06-15 06:46:58.987932,2023-06-15 06:46:58.987983,5.0
6,수정된 제목,수정된 내용,2023-06-27 04:51:38.000000,2023-06-27 04:55:51.000000,


##### 4) 데이터베이스 접속 해제

In [12]:
cursor.close()
dbcon.close()

#### 2. SQLAlchemy 사용

##### 1) 데이터베이스 접속

In [16]:
# 데이터베이스 접속 패키지 설치

pymysql.install_as_MySQLdb()
import MySQLdb

In [21]:
# 접속 문자열 생성: "mysql+mysqldb://계정이름:비밀번호@:포트번호/데이터베이스이름?charset=인코딩"

conStr = "mysql+mysqldb://root:1234@127.0.0.1:3406/board?charset=utf8"
conStr

'mysql+mysqldb://root:1234@127.0.0.1:3406/board?charset=utf8'

In [22]:
# 데이터베이스 접속하기

engine = create_engine(conStr)
conn = engine.connect()

##### 2) 데이터 조회하기

In [23]:
# SQL문 사용하기

df = read_sql("SELECT * FROM board_main_post", index_col = "id", con=conn)
df

Unnamed: 0_level_0,title,contents,created_at,updated_at,author_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,I live for you alone.,This is my desire to honour you.,2023-06-15 01:26:25.979626,2023-06-15 06:06:34.734273,2
2,Parenting,Parenting is difficult. Children don't follow ...,2023-06-15 02:41:40.198218,2023-06-15 02:41:40.198349,1
3,hello,My love how are you?,2023-06-15 05:46:03.855349,2023-06-15 06:05:57.403641,3
4,Thinking,I am thinking,2023-06-15 06:21:47.528438,2023-06-15 06:21:47.528538,1


In [28]:
# 테이블의 데이터를 직접 가져오기

# # 모든 데이터 조회

# df = read_sql_table ('board_main_post', con=conn)
# df

# # 인덱스를 지정한 조회

# df = read_sql_table('board_main_post', index_col = 'id', con=conn)
# df

# 특정 컬럼만 가져오기

df = read_sql_table('board_main_post', index_col = 'id', columns=['title', 'contents'], con=conn)
df

Unnamed: 0_level_0,title,contents
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,I live for you alone.,This is my desire to honour you.
2,Parenting,Parenting is difficult. Children don't follow ...
3,hello,My love how are you?
4,Thinking,I am thinking


##### 3) 데이터 내보내기

- name = '테이블명' 이름으로 기존 테이블이 있는 경우 -> 해당 테이블의 컬럼명에 맞게 데이터를 넣을 수 있음
- if_exists = 'append' 옵션을 넣는 경우 -> 기존 테이블에 데이터를 추가로 넣음
- if_exists = 'fail' 옵션을 넣는 경우 -> 기존 테이블이 있으면, 아무일도 하지 않음
- if_exists = 'replace' 옵션을 넣는 경우 -> 기존 테이블이 있으면, 기존 테이블 삭제 후 다시 테이블 생성 후 새로운 데이터 입력

이미 만들어진 테이블이 없을 경우 -> name = '테이블명' 이름으로 테이블 자동생성, 데이터 입력 가능
테이블이 자동으로 만들어지므로, 테이블 구조가 최적화되지 않아 자동으로 테이블 만드는 것은 비추천

In [31]:
df.to_sql(name='new_table', con=conn, if_exists='append', index=False)
conn.commit()

##### 4) 데이터베이스 접속 해제

In [32]:
conn.close()