In [None]:
import pandas as pd

def get_data(csv_path):
    return pd.read_csv(csv_path)


def get_book_by_publisher(data, ascending=True):
    return data.groupby("publisher").size().sort_values(ascending=ascending)


def get_author_by_publisher(data, ascending=True):
    return (
        data.assign(name=data.first_name.str.cat(data.last_name, sep=' '))
        .groupby('publisher')
        .nunique()
        .loc[:, "name"]
        .sort_values(ascending=ascending)
    )

def add_new_book(data, author_name, book_title, publisher_name):
    """Adds a new book to the system"""
    # Does the book exist?
    first_name, _, last_name = author_name.partition(" ")
    if any(
        (data.first_name == first_name)
        & (data.last_name == last_name)
        & (data.title == book_title)
        & (data.publisher == publisher_name)
    ):
        return data
    # Add the new book
    return data.append(
        {
            "first_name": first_name,
            "last_name": last_name,
            "title": book_title,
            "publisher": publisher_name,
        },
        ignore_index=True,
    )


from treelib import Tree

def output_author_hierarchy(data):
    """Output the data as a hierarchy list of authors"""
    authors = data.assign(
        name=data.first_name.str.cat(data.last_name, sep=" ")
    )
    authors_tree = Tree()
    authors_tree.create_node("Authors", "authors")
    for author, books in authors.groupby("name"):
        authors_tree.create_node(author, author, parent="authors")
        for book, publishers in books.groupby("title")["publisher"]:
            book_id = f"{author}:{book}"
            authors_tree.create_node(book, book_id, parent=author)
            for publisher in publishers:
                authors_tree.create_node(publisher, parent=book_id)

    # Output the hierarchical authors data
    authors_tree.show()

In [None]:
data = get_data("author_book_publisher.csv")

In [None]:
books_by_publisher = get_book_by_publisher(data, ascending=False)

In [None]:
# Get the number of books printed by each publishe
for publisher, total_books in books_by_publisher.items():
    print(f"Publisher: {publisher}, total books: {total_books}")

In [None]:
authors_by_publisher = get_author_by_publisher(data, ascending=False)
for publisher, total_authors in authors_by_publisher.items():
    print(f"Publisher: {publisher}, total authors: {total_authors}")


In [None]:
# Output hierarchical authors data
output_author_hierarchy(data)

In [None]:
# Add a new book to the data structure
data = add_new_book(
    data,
    author_name="Stephen King",
    book_title="The Stand",
    publisher_name="Random House",
)

# Output the updated hierarchical authors data
output_author_hierarchy(data)

In [None]:
%%sql

CREATE TABLE author (
    author_id INTEGER NOT NULL PRIMARY KEY,
    first_name VARCHAR,
    last_name VARCHAR
)

CREATE TABLE book (
    book_id INTEGER NOT NULL PRIMARY KEY,
    author_id INTEGER REFERENCES author,
    title VARCHAR
)

CREATE TABLE publisher (
    pub_id INTEGER NOT NULL PRIMARY KEY,
    name VARCHAR
)

INSERT INTO author
    (first_name, last_name)
VALUES ('Paul', 'Mendez');

UPDATE author
SET first_name = 'Richard', last_name = 'Bachman'
WHERE first_name = 'Stephen' AND last_name = 'King';

DELETE FROM author
WHERE first_name = 'Paul'
AND last_name = 'Mendez';


The data in the author_book_publisher.csv file represents the data and relationships by duplicating data. A database handles this by breaking the data up into three tables—author, book, and publisher—and establishing relationships between them.


But how does the one-to-many relationship between these two tables get implemented? Using the Primary Key referencing

Many-to-many relationships are created by an association table acting as a bridge between the two related tables.

### OR Impedance Mismatch:

It’s possible to map the results returned by SQL queries to objects, but doing so works against the grain of how the database works. Sticking with the scalar results provided by SQL works against the grain of how Python developers work

In [1]:
from sqlalchemy import Column, Integer, String, ForeignKey, Table
from sqlalchemy.orm import relationship, backref, declarative_base
# from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

# association table between author & publisher
author_publisher = Table(
    "author_publisher",
    Base.metadata,  # provides the connection between the SQLAlchemy functionality and the database engine.
    Column("author_id", Integer, ForeignKey("author.author_id")),
    Column("publisher_id", Integer, ForeignKey("publisher.publisher_id"))
)
# This table is different, as it is not the model. 
# As mentioned above, it creates the association between the tables
book_publisher = Table(
    "book_publisher",
    Base.metadata,
    Column("book_id", Integer, ForeignKey("book.book_id")),
    Column("publisher_id", Integer, ForeignKey("publisher.publisher_id"))
)

In [2]:
class Author(Base):
    __tablename__ = "author"
    __table_args__ = {'extend_existing': True}
    author_id = Column(Integer, primary_key=True)
    first_name = Column(String)
    last_name = Column(String)
    books = relationship("Book", backref=backref('author'))  # Creates an author attribute for each Book instance.
    publishers = relationship(
        "Publisher", secondary=author_publisher, back_populates='authors'
    )  # secondary tells SQLAlchemy that the relationship to the Publisher class is through a secondary table,
    

In [3]:
class Book(Base):
    __tablename__ = "book"
    __table_args__ = {"extend_existing": True}
    book_id = Column(Integer, primary_key=True)
    author_id = Column(Integer, ForeignKey('author.author_id'))
    title = Column(String)
    publishers = relationship(
        "Publisher", secondary=book_publisher, back_populates='books'
    )

In [4]:
class Publisher(Base):
    __tablename__ = "publisher"
    __table_args__ = {"extend_existing": True}
    publisher_id = Column(Integer, primary_key=True)
    name = Column(String)
    authors = relationship(
        "Author", secondary=author_publisher, back_populates='publishers'
    )
    books = relationship(
        'Book', secondary=book_publisher, back_populates='publishers'
    )

In [5]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import asc, desc, func

engine = create_engine("sqlite:///author_book_publisher.db")
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()

In [6]:
test = session.query(Author).all()

In [8]:
for aut in test:
    print(aut.author_id)
    print(aut.first_name)
    print(aut.last_name)
    break

1
Isaac
Asimov


In [9]:
books = session.query(Book).all()
for bo in books:
    print(bo.title)
    break

Foundation


In [19]:
book_01 = session.query(Book)
book_02 = book_01.filter_by(title='It')
book_02.one_or_none().title

# The session is an SQLAlchemy object used to communicate with SQLite in the Python example programs

'It'

In [20]:
book_tots = (
    session.query(
        Author.first_name,
        Author.last_name,
        func.count(Book.title).label('book_total')
    ).join(Book)
    .group_by(Author.last_name)
    .order_by(desc('book_total'))
    .all()
)

In [21]:
book_tots

[('Stephen', 'King', 3),
 ('Tom', 'Clancy', 2),
 ('Carol', 'Shaben', 1),
 ('Alex', 'Michaelides', 1),
 ('John', 'Le Carre', 1),
 ('Pearl', 'Buck', 1),
 ('Isaac', 'Asimov', 1)]

In [23]:
book_by_pubs = (
    session.query(
        Publisher.name, func.count(Book.title).label('tot_books')
    ).join(Publisher.books)
    .group_by(Publisher.name)
    .order_by(desc('tot_books'))
)
book_by_pubs.all()

[('Simon & Schuster', 4),
 ('Random House', 4),
 ('Penguin Random House', 2),
 ('Berkley', 2)]

In [24]:
book_by_auth = (
    session.query(
        Author.first_name,
        Author.author_id,
        func.count(Book.title).label('tot_books')
    ).join(Book)
    .group_by(Author.author_id)
    .order_by(desc('tot_books'))
    .all()
)
book_by_auth

[('Stephen', 4, 3),
 ('Tom', 3, 2),
 ('Carol', 7, 1),
 ('Alex', 6, 1),
 ('John', 5, 1),
 ('Pearl', 2, 1),
 ('Isaac', 1, 1)]

In [30]:
get_authors = (
    session.query(
        Author.author_id,
        Author.first_name,
        Author.last_name
    ).order_by(Author.last_name)
).all()
get_authors

[(1, 'Isaac', 'Asimov'),
 (2, 'Pearl', 'Buck'),
 (3, 'Tom', 'Clancy'),
 (4, 'Stephen', 'King'),
 (5, 'John', 'Le Carre'),
 (8, 'Rajr', 'Maleu'),
 (6, 'Alex', 'Michaelides'),
 (7, 'Carol', 'Shaben')]

In [29]:
new_author = Author(first_name='Rajr', last_name="Maleu")
session.add(new_author)  # inserting the data is simple

In [38]:
session.commit()

In [35]:
filter_auth = session.query(Author).filter_by(first_name='Rajr').all()
filter_auth[0].first_name

'Rajr'

In [37]:
filter_auth = session.query(Author).filter(Author.last_name == 'Asimov').one_or_none()
filter_auth.last_name

'Asimov'