In [30]:
# we will create a class to connect to sqlite database and perform CRUD operations

# first we need to import sqlite3

import sqlite3
import pandas as pd

In [37]:
# our class for connecting to sqlite database and performing CRUD operations
# especially useful if you have multiple databases and you want to create a class for each database

# later on we can move this class to separate file and import it TODO
class SqliteCRUD:
    
    # constructor
    # called when we create an object of this class
    def __init__(self, dbpath):
        self.dbpath = dbpath
        self.conn = sqlite3.connect(dbpath)
        self.cursor = self.conn.cursor()
        # print status  
        print(f"Connected to database: {dbpath} Ready to perform CRUD operations!")
    
    # destructor - will be called when object is deleted - typically when program ends
    def __del__(self):
        self.conn.close()
    
    # get a list of all artists
    def read_artists(self):
        self.cursor.execute("SELECT * FROM artists")
        return self.cursor.fetchall() # returns a list of tuples

    def create_artist(self, name):
        # notice the parameterized query - for security reasons
        self.cursor.execute("INSERT INTO artists (name) VALUES (?)", (name,))
        self.conn.commit()
        # return self to allow chaining
        return self

    # insert a list of artists
    def create_artists(self, artists):
        self.cursor.executemany("INSERT INTO artists (name) VALUES (?)", artists)
        # alternative:
        # for artist in artists:
        #     self.cursor.execute("INSERT INTO artists (name) VALUES (?)", artist)
        # should be similar speed as long as we do not have too many artists
        # also note how we only commit once
        self.conn.commit()
        return self

    # update artist name
    def update_artist_by_id(self, id, new_name):
        # again parameterized query for security reasons
        self.cursor.execute("UPDATE artists SET name = ? WHERE ArtistId = ?", (new_name, id))
        self.conn.commit()
        return self

    # update arits name by name
    def update_artist_by_name(self, old_name, new_name):
        # again parameterized query for security reasons
        # this could change multiple rows !
        self.cursor.execute("UPDATE artists SET name = ? WHERE name = ?", (new_name, old_name))
        self.conn.commit()
        return self

    # let's get tail of artists table
    def read_artists_tail(self, n=10):
        self.cursor.execute("SELECT * FROM artists ORDER BY ArtistId DESC LIMIT ?", (n,))
        return self.cursor.fetchall()

    # delete artist by id
    def delete_artist_by_id(self, id):
        # again parameterized query for security reasons
        self.cursor.execute("DELETE FROM artists WHERE ArtistId = ?", (id,))
        self.conn.commit()
        return self

    # delete artist by name
    def delete_artist_by_name(self, name):
        # again parameterized query for security reasons
        self.cursor.execute("DELETE FROM artists WHERE name = ?", (name,))
        self.conn.commit()
        return self

    # return dataframe with all artists
    def read_artists_df(self):
        # by abstracting this we can change the implementation later
        # also we create documentation for this method
        return pd.read_sql_query("SELECT * FROM artists", self.conn)

    # just add more methods for other tables, joins, views, etc.

    # TODO explore GROUP BY, HAVING and other SQL features

In [39]:
# in effect we have made our own ORM - object relational mapper
# ORM is a library that maps database tables to python objects

# ORM on wikipedia: https://en.wikipedia.org/wiki/Object-relational_mapping

In [38]:
# we create a single object of this class
db = SqliteCRUD("chinook.db")
db.dbpath
# now we can run queries using db.cursor.execute(query) method

Connected to database: chinook.db Ready to perform CRUD operations!


'chinook.db'

In [9]:
artist_list = db.read_artists()
artist_list[:3]

[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith')]

In [10]:
# last 3
artist_list[-3:]

[(278, 'Jumprava'),
 (279, '; DROP TABLE artists; --'),
 (280, 'Lady Gaga; DROP TABLES artists; --')]

In [11]:
# create a new artist
db.create_artist("David Bowie")
artist_list = db.read_artists()
# last 3 
artist_list[-3:]


[(279, '; DROP TABLE artists; --'),
 (280, 'Lady Gaga; DROP TABLES artists; --'),
 (281, 'David Bowie')]

In [12]:
# a list of single element tuples
my_artists = [("David Bowie",), ("Queen",), ("The Beatles",)]
db.create_artists(my_artists)
artist_list = db.read_artists()
# last 3
artist_list[-3:]

[(282, 'David Bowie'), (283, 'Queen'), (284, 'The Beatles')]

In [17]:
# last 10 artists
artist_list = db.read_artists()
artist_list[-10:]

[(275, 'Philip Glass Ensemble'),
 (276, 'Valdis Saulespurēns'),
 (277, 'Jumprava'),
 (278, 'Jumprava'),
 (279, '; DROP TABLE artists; --'),
 (280, 'Lady Gaga; DROP TABLES artists; --'),
 (281, 'David Bowie'),
 (282, 'David Bowie'),
 (283, 'Queen'),
 (284, 'The Beatles')]

In [21]:
# let's change 280 to Lady Gaga
db.update_artist_by_id(280, "Lady Gaga")
db.read_artists()[-10:] # of course on larger database we would not read all artists

[(275, 'Philip Glass Ensemble'),
 (276, 'Valdis Saulespurēns'),
 (277, 'Jumprava'),
 (278, 'Jumprava'),
 (279, '; DROP TABLE artists; --'),
 (280, 'Lady Gaga'),
 (281, 'David Bowie'),
 (282, 'David Bowie'),
 (283, 'Queen'),
 (284, 'The Beatles')]

In [25]:
# let's get tail
db.read_artists_tail(12) # default is 10

[(284, 'The Beatles'),
 (283, 'Queen'),
 (282, 'David Bowie'),
 (281, 'David Bowie'),
 (280, 'Lady Gaga'),
 (279, '; DROP TABLE artists; --'),
 (278, 'Jumprava'),
 (277, 'Jumprava'),
 (276, 'Valdis Saulespurēns'),
 (275, 'Philip Glass Ensemble'),
 (274, 'Nash Ensemble'),
 (273,
  'C. Monteverdi, Nigel Rogers - Chiaroscuro; London Baroque; London Cornett & Sackbu')]

In [28]:
# let's get tail of 5
db.read_artists_tail(5)

[(284, 'The Beatles'),
 (283, 'Queen'),
 (282, 'David Bowie'),
 (281, 'David Bowie'),
 (280, 'Lady Gaga')]

In [29]:
# let's chain some methods i am using \ for newline to make it more readable
db.create_artist("Ansis")\
    .create_artist("Lauris Reinikis")\
    .create_artist("Samanta Tīne")\
    .read_artists_tail(8)

[(287, 'Samanta Tīne'),
 (286, 'Lauris Reinikis'),
 (285, 'Ansis'),
 (284, 'The Beatles'),
 (283, 'Queen'),
 (282, 'David Bowie'),
 (281, 'David Bowie'),
 (280, 'Lady Gaga')]

In [34]:
artists_df = db.read_artists_df()
artists_df.head()

Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains


In [35]:
artists_df.tail()

Unnamed: 0,ArtistId,Name
282,283,Queen
283,284,The Beatles
284,285,Ansis
285,286,Lauris Reinikis
286,287,Samanta Tīne


In [None]:
# now we could start working on the dataframe without worrying about the database