The data for this project is in the "country_club" database. This database
contains 3 tables:
    i) the "Bookings" table,
    ii) the "Facilities" table, and
    iii) the "Members" table.

In this case study, a series of questions will be asked. Each question is 
answered in one cell.

In [39]:
import sqlite3
from sqlite3 import Error
import pandas as pd

 
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
    except Error as e:
        print(e)
 
    return conn

 
def select_all_tasks(conn,query):
    """
    Query all rows in the tasks table
    :param conn: the Connection object
    :return:
    """
    cur = conn.cursor()
    

    cur.execute(query)
 
    rows = cur.fetchall()
   
 
    for row in rows:
        print(row)
        
    return rows

#def main():
#    database = "sqlite_db_pythonsqlite.db"
 
    # create a database connection
#    conn = create_connection(database)
#    with conn: 
#        print("2. Query all tasks")
#       pd.DataFrame(select_all_tasks(conn))
 
 
# if __name__ == '__main__':
#    main()

In [40]:
# Q1: Some of the facilities charge a fee to members, but some do not.
# Write a SQL query to produce a list of the names of the facilities that do.

database = "sqlite_db_pythonsqlite.db"
query1 = """
        SELECT name, membercost 
        FROM Facilities
        WHERE membercost <> 0;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query1))

2.6.0
('Tennis Court 1', 5)
('Tennis Court 2', 5)
('Massage Room 1', 9.9)
('Massage Room 2', 9.9)
('Squash Court', 3.5)


In [41]:
# Q2: How many facilities do not charge a fee to members

query2 = """
       SELECT COUNT(name)
        FROM Facilities
        WHERE membercost = 0;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query2))

2.6.0
(4,)


In [42]:
# Q3: Write an SQL query to show a list of facilities that charge a fee to members,
# where the fee is less than 20% of the facility's monthly maintenance cost.
# Return the facid, facility name, member cost, and monthly maintenance of the
# facilities in question

query3 = """
        SELECT facid, name, membercost, monthlymaintenance
        FROM Facilities
        WHERE membercost != 0 AND membercost < monthlymaintenance * 0.2;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query3))

2.6.0
(0, 'Tennis Court 1', 5, 200)
(1, 'Tennis Court 2', 5, 200)
(4, 'Massage Room 1', 9.9, 3000)
(5, 'Massage Room 2', 9.9, 3000)
(6, 'Squash Court', 3.5, 80)


In [43]:
# Q4: Write an SQL query to retrieve the details of facilities with ID 1 and 5.
# Try writing the query without using the OR operator.

query4 = """
        SELECT *
        FROM Facilities
        WHERE facid IN (1, 5);
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query4))

2.6.0
(1, 'Tennis Court 2', 5, 25, 8000, 200)
(5, 'Massage Room 2', 9.9, 80, 4000, 3000)


In [44]:
# Q5: Produce a list of facilities, with each labelled as
# 'cheap' or 'expensive', depending on if their monthly maintenance cost is
# more than $100. Return the name and monthly maintenance of the facilities
# in question. 

query5 = """
        SELECT name, monthlymaintenance,
            CASE WHEN monthlymaintenance > 100 THEN 'expensive'
            ELSE 'cheap' END AS maintenancecost
        FROM Facilities;

        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query5))

2.6.0
('Tennis Court 1', 200, 'expensive')
('Tennis Court 2', 200, 'expensive')
('Badminton Court', 50, 'cheap')
('Table Tennis', 10, 'cheap')
('Massage Room 1', 3000, 'expensive')
('Massage Room 2', 3000, 'expensive')
('Squash Court', 80, 'cheap')
('Snooker Table', 15, 'cheap')
('Pool Table', 15, 'cheap')


In [45]:
# Q6: You'd like to get the first and last name of the last member(s)
# who signed up. Try not to use the LIMIT clause for your solution. 

query6 = """
        SELECT firstname,surname, MAX(joindate) AS join_date
        FROM Members
        WHERE firstname != 'Guest';

        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query6))

2.6.0
('Darren', 'Smith', '2012-09-26 18:08:45')


In [46]:
# Q7: Produce a list of all members who have used a tennis court.
# Include in your output the name of the court, and the name of the member
# formatted as a single column. Ensure no duplicate data, and order by
# the member name.

query7 = """
        SELECT DISTINCT f.name AS facility_name, (m.firstname || ' '|| m.surname) AS member_name
        FROM Bookings AS b
        LEFT JOIN Members AS m 
        ON b.memid = m.memid
        LEFT JOIN Facilities AS f 
        ON b.facid = f.facid
        WHERE (b.facid =0 OR b.facid =1) AND b.memid !=0
        ORDER BY member_name;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query7))

2.6.0
('Tennis Court 1', 'Anne Baker')
('Tennis Court 2', 'Anne Baker')
('Tennis Court 2', 'Burton Tracy')
('Tennis Court 1', 'Burton Tracy')
('Tennis Court 1', 'Charles Owen')
('Tennis Court 2', 'Charles Owen')
('Tennis Court 2', 'Darren Smith')
('Tennis Court 1', 'David Farrell')
('Tennis Court 2', 'David Farrell')
('Tennis Court 2', 'David Jones')
('Tennis Court 1', 'David Jones')
('Tennis Court 1', 'David Pinker')
('Tennis Court 1', 'Douglas Jones')
('Tennis Court 1', 'Erica Crumpet')
('Tennis Court 2', 'Florence Bader')
('Tennis Court 1', 'Florence Bader')
('Tennis Court 1', 'Gerald Butters')
('Tennis Court 2', 'Gerald Butters')
('Tennis Court 2', 'Henrietta Rumney')
('Tennis Court 1', 'Jack Smith')
('Tennis Court 2', 'Jack Smith')
('Tennis Court 1', 'Janice Joplette')
('Tennis Court 2', 'Janice Joplette')
('Tennis Court 2', 'Jemima Farrell')
('Tennis Court 1', 'Jemima Farrell')
('Tennis Court 1', 'Joan Coplin')
('Tennis Court 1', 'John Hunt')
('Tennis Court 2', 'John Hunt')
('Ten

In [47]:
# Q8: Produce a list of bookings on the day of 2012-09-14 which
# will cost the member (or guest) more than $30. Remember that guests have
# different costs to members (the listed costs are per half-hour 'slot'), and
# the guest user's ID is always 0. Include in your output the name of the
# facility, the name of the member formatted as a single column, and the cost.
# Order by descending cost, and do not use any subqueries.

query8 = """
        SELECT f.name AS facility_name, (m.firstname || ' '|| m.surname) AS visitor_name, 
            CASE WHEN b.memid=0 THEN b.slots*f.guestcost
            ELSE b.slots*f.membercost END AS cost
        FROM Bookings AS b
        LEFT JOIN Facilities AS f
        ON b.facid = f.facid
        LEFT JOIN Members AS m
        ON m.memid = b.memid
        WHERE ((b.memid=0 AND b.slots*f.guestcost >30) OR (b.memid!=0 AND b.slots*membercost >30)) 
        AND
        (b.starttime>='2012-09-14' AND b.starttime < '2012-09-15')
        ORDER BY cost DESC;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query8))

2.6.0
('Massage Room 2', 'GUEST GUEST', 320)
('Massage Room 1', 'GUEST GUEST', 160)
('Massage Room 1', 'GUEST GUEST', 160)
('Massage Room 1', 'GUEST GUEST', 160)
('Tennis Court 2', 'GUEST GUEST', 150)
('Tennis Court 1', 'GUEST GUEST', 75)
('Tennis Court 1', 'GUEST GUEST', 75)
('Tennis Court 2', 'GUEST GUEST', 75)
('Squash Court', 'GUEST GUEST', 70.0)
('Massage Room 1', 'Jemima Farrell', 39.6)
('Squash Court', 'GUEST GUEST', 35.0)
('Squash Court', 'GUEST GUEST', 35.0)


In [48]:
# Q9: This time, produce the same result as in Q8, but using a subquery.

query9 = """
        SELECT subquery.name AS facility_name, (m.firstname || ' '|| m.surname) AS visitor_name, 
            subquery.slots*unit_cost AS cost
        FROM 
        (
        SELECT b.facid,b.memid, b.starttime, b.slots,b.bookid, f.name, 
            CASE WHEN b.memid = 0 THEN guestcost
            ELSE membercost end AS unit_cost
        FROM Bookings AS b
        LEFT JOIN Facilities AS f
        ON b.facid = f.facid
        WHERE b.starttime>='2012-09-14' AND b.starttime < '2012-09-15'
        ) AS subquery
        LEFT JOIN Members AS m
        ON subquery.memid = m.memid
        WHERE subquery.slots*unit_cost > 30
        ORDER BY cost DESC;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query9))

2.6.0
('Massage Room 2', 'GUEST GUEST', 320)
('Massage Room 1', 'GUEST GUEST', 160)
('Massage Room 1', 'GUEST GUEST', 160)
('Massage Room 1', 'GUEST GUEST', 160)
('Tennis Court 2', 'GUEST GUEST', 150)
('Tennis Court 1', 'GUEST GUEST', 75)
('Tennis Court 1', 'GUEST GUEST', 75)
('Tennis Court 2', 'GUEST GUEST', 75)
('Squash Court', 'GUEST GUEST', 70.0)
('Massage Room 1', 'Jemima Farrell', 39.6)
('Squash Court', 'GUEST GUEST', 35.0)
('Squash Court', 'GUEST GUEST', 35.0)


In [49]:
# Q10: Produce a list of facilities with a total revenue less than 1000.
# The output of facility name and total revenue, sorted by revenue. Remember
# that there's a different cost for guests and members! 

query10 = """
        SELECT name,
        SUM(
            CASE WHEN memid=0 THEN guestcost*slots
            ELSE membercost*slots END
            ) AS total_revenue
        FROM Bookings as b
        LEFT JOIN Facilities as f
        ON b.facid = f.facid
        GROUP BY name
        HAVING total_revenue < 1000
        ORDER BY total_revenue;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query10))

2.6.0
('Table Tennis', 180)
('Snooker Table', 240)
('Pool Table', 270)


In [50]:
# Q11: Produce a report of members and who recommended them in alphabetic surname,firstname order
query11 = """
        SELECT (m1.surname || ', '|| m1.firstname) AS MemberName, 
            CASE WHEN m1.recommendedby!=0 THEN (m2.surname || ', '|| m2.firstname)
            ELSE ' ' END AS RecommendedBy
        FROM Members AS m1
        LEFT JOIN Members AS m2
        ON m1.recommendedby = m2.memid
        ORDER BY MemberName;
        """ 
    # create a database connection
conn = create_connection(database)
with conn: 
    
    data = pd.DataFrame(select_all_tasks(conn,query11))

2.6.0
('Bader, Florence', 'Stibbons, Ponder')
('Baker, Anne', 'Stibbons, Ponder')
('Baker, Timothy', 'Farrell, Jemima')
('Boothe, Tim', 'Rownam, Tim')
('Butters, Gerald', 'Smith, Darren')
('Coplin, Joan', 'Baker, Timothy')
('Crumpet, Erica', 'Smith, Tracy')
('Dare, Nancy', 'Joplette, Janice')
('Farrell, David', None)
('Farrell, Jemima', None)
('GUEST, GUEST', None)
('Genting, Matthew', 'Butters, Gerald')
('Hunt, John', 'Purview, Millicent')
('Jones, David', 'Joplette, Janice')
('Jones, Douglas', 'Jones, David')
('Joplette, Janice', 'Smith, Darren')
('Mackenzie, Anna', 'Smith, Darren')
('Owen, Charles', 'Smith, Darren')
('Pinker, David', 'Farrell, Jemima')
('Purview, Millicent', 'Smith, Tracy')
('Rownam, Tim', None)
('Rumney, Henrietta', 'Genting, Matthew')
('Sarwin, Ramnaresh', 'Bader, Florence')
('Smith, Darren', None)
('Smith, Darren', None)
('Smith, Jack', 'Smith, Darren')
('Smith, Tracy', None)
('Stibbons, Ponder', 'Tracy, Burton')
('Tracy, Burton', None)
('Tupperware, Hyacinth', N

In [51]:
# Q12: Find the facilities with their usage by member, but not guests

# This question is not clear. Waiting for clarification.

In [52]:
# Q13: Find the facilities usage by month, but not guests

# This question is not clear. Waiting for clarification.