#### https://realpython.com/python-mysql/

- WSL Ubuntu
    - >docker pull mysql
    - >docker ps
    - >docker images
    - >docker run --name test_mysql -e MYSQL_ROOT_PASSWORD=1111 -d -p 3307:3306 mysql
-  test_mysql인 MySQL 컨테이너를 백그라운드에서 실행하며, MySQL의 root 비밀번호를 1111로 설정하고 로컬 머신의 3307 포트를 컨테이너의 3306 포트에 매핑하여 외부에서 MySQL 서버에 접속

In [3]:
! pip install mysql-connector-python



In [9]:
pip install mysql-connector-python
pip install mysql-connector-python

Note: you may need to restart the kernel to use updated packages.


In [1]:
# ! pip install mysql-connector-python
# 다운로드 확인
import mysql.connector

### 1. 연결 설정과 신규 DB 생성

In [19]:
from getpass import getpass
from mysql.connector import connect, Error

try:
    with connect(
        host="localhost",                     # Docker 컨테이너 포워딩된 호스트 주소
        user=input("Enter username: "),       # MySQL 사용자 이름
        password=getpass("Enter password: "), # MySQL 사용자 비밀번호
        db='mysql',                           # 접속할 mysql 데이터베이스 이름
        port=3307                             # 포워딩된 포트 번호
    ) as connection:
        print(connection)
        create_db_query = "CREATE DATABASE online_movie_rating"  # online_movie_rating이라는 db생성
        with connection.cursor() as cursor:
            cursor.execute(create_db_query)

except Error as e:
    print(e)

<mysql.connector.connection_cext.CMySQLConnection object at 0x0000029D5F0EA280>


### 2. 기존 데이터베이스에 연결

In [20]:
import mysql.connector
from mysql.connector import Error

def connect_to_mysql():
    """MySQL 데이터베이스에 연결하거나 재연결하는 함수"""
    try:
        connection = mysql.connector.connect(host='localhost',
                                             port=3307,
                                             database='online_movie_rating',
                                             user='root',
                                             password='1111')
        if connection.is_connected():
            print("Database connection was successful.")
            return connection
        else:
            print("Database connection failed.")
            return None
    except Error as e:
        print(f"Error while connecting to MySQL: {e}")
        return None


# 데이터베이스 연결 시도
connection = connect_to_mysql()

Database connection was successful.


##### 모든 DB 목록 확인

In [25]:
show_db_query = "SHOW DATABASES"
with connection.cursor() as cursor:
    cursor.execute(show_db_query)
    for db in cursor:
        print(db)

('information_schema',)
('mysql',)
('online_movie_rating',)
('performance_schema',)
('sys',)


#### 1. create_reviewers_table

In [22]:
create_reviewers_table_query = """
CREATE TABLE reviewers (
    id INT AUTO_INCREMENT PRIMARY KEY,
    first_name VARCHAR(100),
    last_name VARCHAR(100)
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_reviewers_table_query)
    connection.commit()

### 3. 특정 DB에서 테이블 생성, 변경 및 삭제
<img src ='https://files.realpython.com/media/schema.2e80e67d2ae1.png' width =600 height=600>

#### 2. create_movies_table

In [23]:
create_movies_table_query = """
CREATE TABLE movies(
    id INT AUTO_INCREMENT PRIMARY KEY,
    title VARCHAR(100),
    release_year YEAR(4),
    genre VARCHAR(100),
    collection_in_mil INT
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_movies_table_query)
    connection.commit()

#### 3. create_ratings_table

In [24]:
create_ratings_table_query = """
CREATE TABLE ratings (
    movie_id INT,
    reviewer_id INT,
    rating DECIMAL(2,1),
    FOREIGN KEY(movie_id) REFERENCES movies(id),
    FOREIGN KEY(reviewer_id) REFERENCES reviewers(id),
    PRIMARY KEY(movie_id, reviewer_id)
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_ratings_table_query)
    connection.commit()

- ratings은 reviewer_id(외래 키)와 movie_id(외래 키)를 연결하고(reviewer_id: movie_id = m:m) 게시된 등급에 대한 정보 rating 속성을 갖는다.

In [13]:
connection.cursor()

<mysql.connector.cursor_cext.CMySQLCursor at 0x29d5f6bf280>

#### To list all the tables in the online_movie_rating database, you would use the SHOW TABLES 

In [26]:
# First, select the database to use
select_db_query = "USE online_movie_rating"

# Then, show all tables within the selected database
show_tables_query = "SHOW TABLES"

# Execute the queries with the database connection
with connection.cursor() as cursor:
    cursor.execute(select_db_query)
    # No commit is needed here since USE doesn't modify any data

    cursor.execute(show_tables_query)
    # Fetch and print all table names
    for table in cursor:
        print(table)

('movies',)
('ratings',)
('reviewers',)


-  with connect(...) 문을 사용할 때, 문의 블록이 끝나면 자동으로 연결이 닫힘. 재연결하려면

In [11]:
from mysql.connector import connect, Error

def create_db_connection():
    """데이터베이스에 연결을 시도하고 연결 객체를 반환합니다."""
    try:
        connection = connect(
            host="localhost",
            user=input("Enter username: "),
            password=getpass("Enter password: "),
            db='online_movie_rating',
            port=3307
        )
        print("MySQL Database connection successful")
        return connection
    except Error as e:
        print(f"Error: {e}")

def close_connection(connection):
    """데이터베이스 연결을 닫습니다."""
    if connection.is_connected():
        connection.close()
        print("MySQL connection is closed")

In [12]:
# 연결을 생성
connection = create_db_connection()

# 작업이 끝나면 연결을 닫기
# close_connection(connection)

MySQL Database connection successful


### 테이블 모두 지우기

In [28]:
delete_movies_table_query = "DROP TABLE IF EXISTS movies"
delete_ratings_table_query = "DROP TABLE IF EXISTS ratings"
delete_reviewers_table_query = "DROP TABLE IF EXISTS reviewers"

with connection.cursor() as cursor:
    cursor.execute(delete_movies_table_query)
    # cursor.execute(delete_ratings_table_query)
    # cursor.execute(delete_reviewers_table_query)
    connection.commit()

DatabaseError: 3730 (HY000): Cannot drop table 'movies' referenced by a foreign key constraint 'ratings_ibfk_1' on table 'ratings'.

##### A. CREATE TABLE문 을 사용하여 테이블 생성
- A.1. movies 테이블 생성

In [5]:
create_movies_table_query = """
CREATE TABLE movies(
    id INT AUTO_INCREMENT PRIMARY KEY,
    title VARCHAR(100),
    release_year YEAR(4),
    genre VARCHAR(100),
    collection_in_mil INT
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_movies_table_query)
    connection.commit()
    # 기본적으로 MySQL 커넥터는 트랜잭션을 자동 커밋하지 않음
    # MySQL에서는 트랜잭션에서 언급된 수정 사항이 COMMIT마지막에 명령을 사용할 때만 발생
    # 실제 테이블에서 변경을 수행하려면 모든 트랜잭션 후에 항상 이 connection.commit() 메서드를 호출


ProgrammingError: 1050 (42S01): Table 'movies' already exists

- A.2. reviewers 테이블 생성

In [6]:
create_reviewers_table_query = """
CREATE TABLE reviewers (
    id INT AUTO_INCREMENT PRIMARY KEY,
    first_name VARCHAR(100),
    last_name VARCHAR(100)
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_reviewers_table_query)
    connection.commit()

ProgrammingError: 1050 (42S01): Table 'reviewers' already exists

- A.3. ratings 테이블 생성

In [35]:
create_ratings_table_query = """
CREATE TABLE ratings (
    movie_id INT,
    reviewer_id INT,
    rating DECIMAL(2,1),
    FOREIGN KEY(movie_id) REFERENCES movies(id),
    FOREIGN KEY(reviewer_id) REFERENCES reviewers(id),
    PRIMARY KEY(movie_id, reviewer_id)
)
"""
with connection.cursor() as cursor:
    cursor.execute(create_ratings_table_query)
    connection.commit()

#### DESCRIBE 문장을 사용하여 테이블 스키마(schema) 표시

In [29]:
show_table_query = "DESCRIBE movies"
with connection.cursor() as cursor:
    cursor.execute(show_table_query)
    # Fetch rows from last executed query
    result = cursor.fetchall()
    for row in result:
        print(row)

('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('title', 'varchar(100)', 'YES', '', None, '')
('release_year', 'year', 'YES', '', None, '')
('genre', 'varchar(100)', 'YES', '', None, '')
('collection_in_mil', 'int', 'YES', '', None, '')


#### ALTER명령문 을 사용하여 테이블 스키마 수정

In [30]:
alter_table_query = """
ALTER TABLE movies
MODIFY COLUMN collection_in_mil DECIMAL(4,1)
"""
show_table_query = "DESCRIBE movies"
with connection.cursor() as cursor:
    cursor.execute(alter_table_query)
    cursor.execute(show_table_query)
    # Fetch rows from last executed query
    result = cursor.fetchall()
    print("Movie Table Schema after alteration:")
    for row in result:
        print(row)

Movie Table Schema after alteration:
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('title', 'varchar(100)', 'YES', '', None, '')
('release_year', 'year', 'YES', '', None, '')
('genre', 'varchar(100)', 'YES', '', None, '')
('collection_in_mil', 'decimal(4,1)', 'YES', '', None, '')


#### DROP명령문 을 사용하여 테이블 삭제

In [31]:
drop_table_query = "DROP TABLE ratings"
with connection.cursor() as cursor:
    cursor.execute(drop_table_query)

##### B. 테이블에 레코드 삽입

In [32]:
insert_movies_query = """
INSERT INTO movies (title, release_year, genre, collection_in_mil)
VALUES
    ("Forrest Gump", 1994, "Drama", 330.2),
    ("3 Idiots", 2009, "Drama", 2.4),
    ("Eternal Sunshine of the Spotless Mind", 2004, "Drama", 34.5),
    ("Good Will Hunting", 1997, "Drama", 138.1),
    ("Skyfall", 2012, "Action", 304.6),
    ("Gladiator", 2000, "Action", 188.7),
    ("Black", 2005, "Drama", 3.0),
    ("Titanic", 1997, "Romance", 659.2),
    ("The Shawshank Redemption", 1994, "Drama",28.4),
    ("Udaan", 2010, "Drama", 1.5),
    ("Home Alone", 1990, "Comedy", 286.9),
    ("Casablanca", 1942, "Romance", 1.0),
    ("Avengers: Endgame", 2019, "Action", 858.8),
    ("Night of the Living Dead", 1968, "Horror", 2.5),
    ("The Godfather", 1972, "Crime", 135.6),
    ("Haider", 2014, "Action", 4.2),
    ("Inception", 2010, "Adventure", 293.7),
    ("Evil", 2003, "Horror", 1.3),
    ("Toy Story 4", 2019, "Animation", 434.9),
    ("Air Force One", 1997, "Drama", 138.1),
    ("The Dark Knight", 2008, "Action",535.4),
    ("Bhaag Milkha Bhaag", 2013, "Sport", 4.1),
    ("The Lion King", 1994, "Animation", 423.6),
    ("Pulp Fiction", 1994, "Crime", 108.8),
    ("Kai Po Che", 2013, "Sport", 6.0),
    ("Beasts of No Nation", 2015, "War", 1.4),
    ("Andadhun", 2018, "Thriller", 2.9),
    ("The Silence of the Lambs", 1991, "Crime", 68.2),
    ("Deadpool", 2016, "Action", 363.6),
    ("Drishyam", 2015, "Mystery", 3.0)
"""
with connection.cursor() as cursor:
    cursor.execute(insert_movies_query)
    connection.commit()

In [33]:
insert_reviewers_query = """
INSERT INTO reviewers
(first_name, last_name)
VALUES ( %s, %s )
"""
reviewers_records = [
    ("Chaitanya", "Baweja"),
    ("Mary", "Cooper"),
    ("John", "Wayne"),
    ("Thomas", "Stoneman"),
    ("Penny", "Hofstadter"),
    ("Mitchell", "Marsh"),
    ("Wyatt", "Skaggs"),
    ("Andre", "Veiga"),
    ("Sheldon", "Cooper"),
    ("Kimbra", "Masters"),
    ("Kat", "Dennings"),
    ("Bruce", "Wayne"),
    ("Domingo", "Cortes"),
    ("Rajesh", "Koothrappali"),
    ("Ben", "Glocker"),
    ("Mahinder", "Dhoni"),
    ("Akbar", "Khan"),
    ("Howard", "Wolowitz"),
    ("Pinkie", "Petit"),
    ("Gurkaran", "Singh"),
    ("Amy", "Farah Fowler"),
    ("Marlon", "Crafford"),
]
with connection.cursor() as cursor:
    cursor.executemany(insert_reviewers_query, reviewers_records)
    connection.commit()

In [36]:
insert_ratings_query = """
INSERT INTO ratings
(rating, movie_id, reviewer_id)
VALUES ( %s, %s, %s)
"""
ratings_records = [
    (6.4, 17, 5), (5.6, 19, 1), (6.3, 22, 14), (5.1, 21, 17),
    (5.0, 5, 5), (6.5, 21, 5), (8.5, 30, 13), (9.7, 6, 4),
    (8.5, 24, 12), (9.9, 14, 9), (8.7, 26, 14), (9.9, 6, 10),
    (5.1, 30, 6), (5.4, 18, 16), (6.2, 6, 20), (7.3, 21, 19),
    (8.1, 17, 18), (5.0, 7, 2), (9.8, 23, 3), (8.0, 22, 9),
    (8.5, 11, 13), (5.0, 5, 11), (5.7, 8, 2), (7.6, 25, 19),
    (5.2, 18, 15), (9.7, 13, 3), (5.8, 18, 8), (5.8, 30, 15),
    (8.4, 21, 18), (6.2, 23, 16), (7.0, 10, 18), (9.5, 30, 20),
    (8.9, 3, 19), (6.4, 12, 2), (7.8, 12, 22), (9.9, 15, 13),
    (7.5, 20, 17), (9.0, 25, 6), (8.5, 23, 2), (5.3, 30, 17),
    (6.4, 5, 10), (8.1, 5, 21), (5.7, 22, 1), (6.3, 28, 4),
    (9.8, 13, 1)
]
with connection.cursor() as cursor:
    cursor.executemany(insert_ratings_query, ratings_records)
    connection.commit()

##### c. 데이터베이스에서 레코드 읽기

In [37]:
select_movies_query = "SELECT * FROM movies LIMIT 5"
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    result = cursor.fetchall()
    for row in result:
        print(row)

(1, 'Forrest Gump', 1994, 'Drama', Decimal('330.2'))
(2, '3 Idiots', 2009, 'Drama', Decimal('2.4'))
(3, 'Eternal Sunshine of the Spotless Mind', 2004, 'Drama', Decimal('34.5'))
(4, 'Good Will Hunting', 1997, 'Drama', Decimal('138.1'))
(5, 'Skyfall', 2012, 'Action', Decimal('304.6'))


In [38]:
select_movies_query = "SELECT title, release_year FROM movies LIMIT 5"
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for row in cursor.fetchall():
        print(row)

('Forrest Gump', 1994)
('3 Idiots', 2009)
('Eternal Sunshine of the Spotless Mind', 2004)
('Good Will Hunting', 1997)
('Skyfall', 2012)


In [39]:
select_movies_query = """
SELECT title, collection_in_mil
FROM movies
WHERE collection_in_mil > 300
ORDER BY collection_in_mil DESC
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

('Avengers: Endgame', Decimal('858.8'))
('Titanic', Decimal('659.2'))
('The Dark Knight', Decimal('535.4'))
('Toy Story 4', Decimal('434.9'))
('The Lion King', Decimal('423.6'))
('Deadpool', Decimal('363.6'))
('Forrest Gump', Decimal('330.2'))
('Skyfall', Decimal('304.6'))


In [40]:
select_movies_query = """
SELECT CONCAT(title, " (", release_year, ")"),
      collection_in_mil
FROM movies
ORDER BY collection_in_mil DESC
LIMIT 5
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

('Avengers: Endgame (2019)', Decimal('858.8'))
('Titanic (1997)', Decimal('659.2'))
('The Dark Knight (2008)', Decimal('535.4'))
('Toy Story 4 (2019)', Decimal('434.9'))
('The Lion King (1994)', Decimal('423.6'))


In [41]:
select_movies_query = """
SELECT CONCAT(title, " (", release_year, ")"),
      collection_in_mil
FROM movies
ORDER BY collection_in_mil DESC
LIMIT 5
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

('Avengers: Endgame (2019)', Decimal('858.8'))
('Titanic (1997)', Decimal('659.2'))
('The Dark Knight (2008)', Decimal('535.4'))
('Toy Story 4 (2019)', Decimal('434.9'))
('The Lion King (1994)', Decimal('423.6'))


In [42]:
select_movies_query = """
SELECT CONCAT(title, " (", release_year, ")"),
      collection_in_mil
FROM movies
ORDER BY collection_in_mil DESC
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchmany(size=5):
        print(movie)
    cursor.fetchall()

('Avengers: Endgame (2019)', Decimal('858.8'))
('Titanic (1997)', Decimal('659.2'))
('The Dark Knight (2008)', Decimal('535.4'))
('Toy Story 4 (2019)', Decimal('434.9'))
('The Lion King (1994)', Decimal('423.6'))


##### d. JOIN 명령문을 사용하여 여러 테이블 처리
- 데이터베이스에서 가장 높은 평가를 받은 상위 5개 영화의 이름을 찾으려면

In [43]:
select_movies_query = """
SELECT title, AVG(rating) as average_rating
FROM ratings
INNER JOIN movies
    ON movies.id = ratings.movie_id
GROUP BY movie_id
ORDER BY average_rating DESC
LIMIT 5
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

('The Godfather', Decimal('9.90000'))
('Night of the Living Dead', Decimal('9.90000'))
('Avengers: Endgame', Decimal('9.75000'))
('Eternal Sunshine of the Spotless Mind', Decimal('8.90000'))
('Beasts of No Nation', Decimal('8.70000'))


- 가장 많은 평점을 준 리뷰어의 이름을 찾으려면

In [27]:
select_movies_query = """
SELECT CONCAT(first_name, " ", last_name), COUNT(*) as num
FROM reviewers
INNER JOIN ratings
    ON reviewers.id = ratings.reviewer_id
GROUP BY reviewer_id
ORDER BY num DESC
LIMIT 1
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

ProgrammingError: 1146 (42S02): Table 'online_movie_rating.ratings' doesn't exist

#### e. 데이터베이스에서 레코드 업데이트 및 삭제
- UPDATE명령
데이터베이스의 리뷰어 중 한 명인 Amy Farah Fowler 이제 Sheldon Cooper와 결혼하여  이제 그녀의 성이 변경

In [44]:
update_query = """
UPDATE
    reviewers
SET
    last_name = "Cooper"
WHERE
    first_name = "Amy"
"""
with connection.cursor() as cursor:
    cursor.execute(update_query)
    connection.commit()

- DELETE명령

In [45]:
select_movies_query = """
SELECT reviewer_id, movie_id FROM ratings
WHERE reviewer_id = 2
"""
with connection.cursor() as cursor:
    cursor.execute(select_movies_query)
    for movie in cursor.fetchall():
        print(movie)

(2, 7)
(2, 8)
(2, 12)
(2, 23)


In [30]:
delete_query = "DELETE FROM ratings WHERE reviewer_id = 2"
with connection.cursor() as cursor:
    cursor.execute(delete_query)
    connection.commit()

ProgrammingError: 1146 (42S02): Table 'online_movie_rating.ratings' doesn't exist

### 4. DataFrame을 Table로

In [31]:
import mysql.connector
from getpass import getpass

mydb = mysql.connector.connect(
        host="localhost",                     # Docker 컨테이너 포워딩된 호스트 주소
        user=input("Enter username: "),       # MySQL 사용자 이름
        password=getpass("Enter password: "), # MySQL 사용자 비밀번호
        db='online_movie_rating',                           # 접속할 데이터베이스 이름
        port=3307                             # 포워딩된 포트 번호
)

In [None]:
mycursor = mydb.cursor()

mycursor.execute("CREATE TABLE queens (id INT AUTO_INCREMENT PRIMARY KEY, name VARCHAR(255), address VARCHAR(255))")

In [None]:
# Import necessary libraries
import pandas as pd
from sqlalchemy import create_engine

# Step 1: Create a DataFrame with the data
data = {'name': ['Alice', 'Bob'],
        'address': ['Street 123', 'Avenue 456']}
df = pd.DataFrame(data)
print(df)

# User input for username and password
user = input("Enter username: ")
password = getpass("Enter password: ")


# Step 2: Create a SQLAlchemy engine to connect to the MySQL database
engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@localhost:3307/online_movie_rating")

# Step 3: Convert the Pandas DataFrame to a format for MySQL table insertion
df.to_sql('new_customers',
          con=engine,
          if_exists='replace',
          index=False)

    name     address
0  Alice  Street 123
1    Bob  Avenue 456


2

### 5. Fake DataFrame을 Table한 번에 보내기
- ETL의 Loading 단계에서 export를 DB로 하는 경우

In [48]:
from faker import Faker
import pandas as pd
from sqlalchemy import create_engine
from getpass import getpass
from mysql.connector import connect

# Faker 객체 초기화
fake = Faker()

# Fake 데이터 DataFrame 생성
data = {
    'name': [fake.name() for _ in range(1000)],
    'phone_number': [fake.phone_number() for _ in range(1000)],
    'nationality': [fake.country() for _ in range(1000)],
    'email': [fake.email() for _ in range(1000)]
}
df = pd.DataFrame(data)

# 사용자로부터 MySQL 접속 정보 받기
host = "localhost"
database = input("Enter database name: ")   # movies_db
user = input("Enter username: ")
password = getpass("Enter password: ")
port = 3307                                 # 기본 MySQL 포트는 3306이지만 docker port forwarding 3307

# SQLAlchemy 엔진 생성
engine = create_engine(f'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}')

# MySQL 데이터베이스에 연결하고 데이터 전송
with engine.connect() as connection:
    df.to_sql('fake_data', con=connection, if_exists='replace', index=False)

print("Data transferred successfully!")


ProgrammingError: (mysql.connector.errors.ProgrammingError) 1045 (28000): Access denied for user 'ODBC'@'172.17.0.1' (using password: NO)
(Background on this error at: https://sqlalche.me/e/20/f405)