## 8.3 Country Club case study

Queries a local SQLite db file, first using assignment's provided query method, and later switching to pandas read_sql_query for better output display.

In [1]:
import sqlite3
from sqlite3 import Error
import pandas as pd
import os

In [2]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
    except Error as e:
        print(e)
 
    return conn

In [3]:
def do_query(db_file,query):
    """
    Query all rows in the tasks table
    :param db_file: database file
    :param query: the query
    :return:
    """
    conn = create_connection(db_file)
    with conn: 
        cur = conn.cursor()
        cur.execute(query) 
        rows = cur.fetchall()
        for row in rows:
            print(row)

In [4]:
# "sqlite_db_pythonsqlite.db" needs to be in working directory
os.getcwd()

'/Users/Carsten/OneDrive/Documents/Springboard/git_repositories/DataScienceSQLCountryClub'

In [5]:
db_file = "sqlite_db_pythonsqlite.db"

In [6]:
query = """
/* Q1: Some of the facilities charge a fee to members, but some do not.
Write a SQL query to produce a list of the names of the facilities that do. */
SELECT name, membercost
FROM Facilities
WHERE membercost > 0;
"""

In [7]:
do_query(db_file,query)

2.6.0
('Tennis Court 1', 5)
('Tennis Court 2', 5)
('Massage Room 1', 9.9)
('Massage Room 2', 9.9)
('Squash Court', 3.5)


I don't like format of db_query output.  Switch to using pandas for query and formatting into dataframe for subsequent queries

In [8]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,membercost
0,Tennis Court 1,5.0
1,Tennis Court 2,5.0
2,Massage Room 1,9.9
3,Massage Room 2,9.9
4,Squash Court,3.5


In [9]:
query = """
/* Q2: How many facilities do not charge a fee to members? */
SELECT COUNT(*) AS 'Count_of_nocharge_facilities'
FROM Facilities
WHERE membercost = 0;
"""

In [10]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,Count_of_nocharge_facilities
0,4


In [11]:
query = """
/* Q3: Write an SQL query to show a list of facilities that charge a fee to members,
where the fee is less than 20% of the facility's monthly maintenance cost.
Return the facid, facility name, member cost, and monthly maintenance of the
facilities in question. */

SELECT facid,
	   name AS 'facility name',
	   membercost AS 'member cost',
	   monthlymaintenance AS 'monthly maintenance cost'
FROM Facilities
WHERE membercost > 0 AND
      (membercost < 0.20 *
			(SELECT AVG(monthlymaintenance)
	 		 FROM Facilities)
	  );
"""

In [12]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,facid,facility name,member cost,monthly maintenance cost
0,0,Tennis Court 1,5.0,200
1,1,Tennis Court 2,5.0,200
2,4,Massage Room 1,9.9,3000
3,5,Massage Room 2,9.9,3000
4,6,Squash Court,3.5,80


In [13]:
query = """
/* Q4: Write an SQL query to retrieve the details of facilities with ID 1 and 5.
Try writing the query without using the OR operator. */

SELECT *
FROM Facilities
WHERE facid IN (1,5);
"""

In [14]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,facid,name,membercost,guestcost,initialoutlay,monthlymaintenance
0,1,Tennis Court 2,5.0,25,8000,200
1,5,Massage Room 2,9.9,80,4000,3000


In [15]:
query = """
/* Q5: Produce a list of facilities, with each labelled as
'cheap' or 'expensive', depending on if their monthly maintenance cost is
more than $100. Return the name and monthly maintenance of the facilities
in question. */

SELECT name,
	   monthlymaintenance,
	   (CASE WHEN monthlymaintenance > 100 THEN 'expensive'
	   	ELSE 'cheap' END) AS price_category
FROM Facilities;
"""

In [16]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,monthlymaintenance,price_category
0,Tennis Court 1,200,expensive
1,Tennis Court 2,200,expensive
2,Badminton Court,50,cheap
3,Table Tennis,10,cheap
4,Massage Room 1,3000,expensive
5,Massage Room 2,3000,expensive
6,Squash Court,80,cheap
7,Snooker Table,15,cheap
8,Pool Table,15,cheap


In [17]:
query = """
/* Q6: You'd like to get the first and last name of the last member(s)
who signed up. Try not to use the LIMIT clause for your solution. */

SELECT firstname, surname
FROM Members
WHERE joindate =
	  (SELECT MAX(joindate)
	   FROM Members);
"""

In [18]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,firstname,surname
0,Darren,Smith


In [19]:
query = """
/* Q7: Produce a list of all members who have used a tennis court.
Include in your output the name of the court, and the name of the member
formatted as a single column. Ensure no duplicate data, and order by
the member name. */

SELECT DISTINCT name, firstname||' '||surname AS fullname
FROM Bookings
INNER JOIN Facilities
	USING(facid)
INNER JOIN Members
	USING(memid)
WHERE name LIKE 'Tennis Court%'
ORDER BY surname, firstname, name;
"""

In [20]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,fullname
0,Tennis Court 1,Florence Bader
1,Tennis Court 2,Florence Bader
2,Tennis Court 1,Anne Baker
3,Tennis Court 2,Anne Baker
4,Tennis Court 1,Timothy Baker
5,Tennis Court 2,Timothy Baker
6,Tennis Court 1,Tim Boothe
7,Tennis Court 2,Tim Boothe
8,Tennis Court 1,Gerald Butters
9,Tennis Court 2,Gerald Butters


In [21]:
query = """
/* Q8: Produce a list of bookings on the day of 2012-09-14 which
will cost the member (or guest) more than $30. Remember that guests have
different costs to members (the listed costs are per half-hour 'slot'), and
the guest user's ID is always 0. Include in your output the name of the
facility, the name of the member formatted as a single column, and the cost.
Order by descending cost, and do not use any subqueries. */

SELECT bookid,
       starttime,
       name,
       firstname || ' ' || surname as fullname,
       (CASE
        WHEN memid=0 THEN slots*guestcost
        ELSE slots*membercost END ) AS "total_cost"
FROM Bookings
INNER JOIN Facilities
    USING(facid)
INNER JOIN Members
    USING(memid)
WHERE starttime LIKE '2012-09-14%'
    AND total_cost >30
ORDER BY total_cost DESC;
"""

In [22]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,bookid,starttime,name,fullname,total_cost
0,2946,2012-09-14 11:00:00,Massage Room 2,GUEST GUEST,320.0
1,2937,2012-09-14 09:00:00,Massage Room 1,GUEST GUEST,160.0
2,2940,2012-09-14 13:00:00,Massage Room 1,GUEST GUEST,160.0
3,2942,2012-09-14 16:00:00,Massage Room 1,GUEST GUEST,160.0
4,2926,2012-09-14 17:00:00,Tennis Court 2,GUEST GUEST,150.0
5,2920,2012-09-14 16:00:00,Tennis Court 1,GUEST GUEST,75.0
6,2922,2012-09-14 19:00:00,Tennis Court 1,GUEST GUEST,75.0
7,2925,2012-09-14 14:00:00,Tennis Court 2,GUEST GUEST,75.0
8,2948,2012-09-14 09:30:00,Squash Court,GUEST GUEST,70.0
9,2941,2012-09-14 14:00:00,Massage Room 1,Jemima Farrell,39.6


In [23]:
query = """
/* Q9: This time, produce the same result as in Q8, but using a subquery. */


/* CB: I can't calculate total cost until after I join Facilities to Bookings.
The only subqueries on a pre-joined table I can think of
do row and/or column filtering, which I could do on outer query anyway.
So I can't think of a meaninful subquery in the FROM clause.
*/
"""

In [24]:
do_query(db_file,query)

2.6.0


In [25]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


TypeError: 'NoneType' object is not iterable

In [26]:
query = """
/* Q10: Produce a list of facilities with a total revenue less than 1000.
The output of facility name and total revenue, sorted by revenue. Remember
that there's a different cost for guests and members! */

/* CB: I couldn't reference total_revenue by name in WHERE clause
until I put a wrapper select around original query */

SELECT *
FROM
(SELECT name, SUM(total_cost) AS total_revenue
FROM
	(SELECT *,
			(CASE
			WHEN memid=0 THEN slots*guestcost
			ELSE slots*membercost END ) AS total_cost
	FROM Bookings
	INNER JOIN Facilities
		USING(facid))
GROUP BY name)
WHERE total_revenue < 1000
ORDER BY total_revenue;
"""

In [27]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,total_revenue
0,Table Tennis,180
1,Snooker Table,240
2,Pool Table,270


In [28]:
query = """
/* Q11: Produce a report of members and who recommended them in alphabetic surname,firstname order */

SELECT firstname||' '||surname AS 'member_name',
       rec_firstname||' '||rec_surname AS 'recommended_by_name'
FROM Members
LEFT JOIN (SELECT memid,
                  surname AS rec_surname,
                  firstname AS rec_firstname
                  FROM Members) AS Members_2
ON Members.recommendedby = Members_2.memid
WHERE Members.memid <> 0
ORDER BY surname;
"""

In [29]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,member_name,recommended_by_name
0,Florence Bader,Ponder Stibbons
1,Anne Baker,Ponder Stibbons
2,Timothy Baker,Jemima Farrell
3,Tim Boothe,Tim Rownam
4,Gerald Butters,Darren Smith
5,Joan Coplin,Timothy Baker
6,Erica Crumpet,Tracy Smith
7,Nancy Dare,Janice Joplette
8,Jemima Farrell,
9,David Farrell,


In [30]:
query = """
/* Q12: Find the facilities with their usage by member, but not guests */

/* CB: I first read Q12 as total booked slots per facility excluding guests */

SELECT name, member_usage
FROM (SELECT facid, SUM(slots) AS member_usage
      FROM Bookings
      WHERE memid <> 0
      GROUP BY facid)
LEFT JOIN Facilities
USING(facid)
ORDER BY name;
"""

In [31]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,member_usage
0,Badminton Court,1086
1,Massage Room 1,884
2,Massage Room 2,54
3,Pool Table,856
4,Snooker Table,860
5,Squash Court,418
6,Table Tennis,794
7,Tennis Court 1,957
8,Tennis Court 2,882


In [32]:
query = """
/* Q12: Find the facilities with their usage by member, but not guests */

/* CB: But maybe want the more detailed booked slots by facility AND member: */

SELECT name, firstname||' '||surname AS 'member_name',usage
FROM (SELECT facid, memid, SUM(slots) AS usage
      FROM Bookings
      WHERE memid <> 0
      GROUP BY facid, memid)
LEFT JOIN Facilities
USING(facid)
LEFT JOIN Members
USING(memid)
ORDER BY name, usage DESC

"""

In [33]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,member_name,usage
0,Badminton Court,Darren Smith,432
1,Badminton Court,Tracy Smith,102
2,Badminton Court,Anna Mackenzie,96
3,Badminton Court,Gerald Butters,63
4,Badminton Court,Ponder Stibbons,48
...,...,...,...
197,Tennis Court 2,Jemima Farrell,3
198,Tennis Court 2,Jack Smith,3
199,Tennis Court 2,Henrietta Rumney,3
200,Tennis Court 2,David Farrell,3


In [34]:
query = """
/* Q13: Find the facilities usage by month, but not guests */
/* CB: I'll define "usage" as sum of slots across all bookings for facility where memid <> 0 */

SELECT name,
       substr(starttime, 1, 7) AS month,
       SUM(slots) AS usage
FROM Bookings
LEFT JOIN Facilities
USING(facid)
WHERE memid <>0
GROUP BY name, month;
"""

In [35]:
pd.read_sql_query(query,create_connection(db_file))

2.6.0


Unnamed: 0,name,month,usage
0,Badminton Court,2012-07,165
1,Badminton Court,2012-08,414
2,Badminton Court,2012-09,507
3,Massage Room 1,2012-07,166
4,Massage Room 1,2012-08,316
5,Massage Room 1,2012-09,402
6,Massage Room 2,2012-07,8
7,Massage Room 2,2012-08,18
8,Massage Room 2,2012-09,28
9,Pool Table,2012-07,110
