# 9.7 Mini Project: SQLite DB Creation
- Member: 
    - Congxin (David) Xu - cx2rx
    - Diyu Zheng - dz2fc

### Database ER Diagram

<img src="Mini Project ER Diagram.jpg" width="1000">|

### Create Database

In [1]:
# Import Modules
import sqlite3
import pandas as pd

In [2]:
# Create a connection to the database.
conn = sqlite3.connect('apps_DB.db')

# Create a cursor. A cursor is used to move around the database
cursor = conn.cursor()

### Adding Tables to Database

In [3]:
# SQLite does not have a seperate Boolean storage class
sql_create_apps = """
    CREATE TABLE apps(
        App_Id INTEGER,
        App_Name TEXT,
        Category_Id INTEGER,
        Login_Method TEXT,
        File_Size INTEGER,
        Photo_Access TEXT, 
        Location_Access TEXT,
        Allow_Notification TEXT,
        PRIMARY KEY(App_Id),
        FOREIGN KEY(Category_Id) REFERENCES categories(Category_Id)
    );
"""

In [4]:
# Use the cursor to execute the statement
cursor.execute(sql_create_apps)

<sqlite3.Cursor at 0x2a52d466b90>

In [5]:
sql_create_categories = """
    CREATE TABLE categories(
        Category_Id INTEGER,
        Category_Name TEXT,
        PRIMARY KEY(Category_Id)
    );
"""

In [6]:
# Use the cursor to execute the statement
cursor.execute(sql_create_categories)

<sqlite3.Cursor at 0x2a52d466b90>

In [7]:
sql_create_users = """
    CREATE TABLE users(
        User_Id INTEGER,
        User_Name TEXT,
        App_Id INTEGER,
        User_Rating INTEGER,
        PRIMARY KEY(User_Id, App_Id)
    );
"""

In [8]:
# Use the cursor to execute the statement
cursor.execute(sql_create_users)

<sqlite3.Cursor at 0x2a52d466b90>

In [9]:
# Read Table 1 from Excel
app = pd.read_excel("Mini SQL DB Design.xlsx", sheet_name="App")
app.head()

Unnamed: 0,App_Id,App_Name,Category_Id,Login_Method,File_Size,Photo_Access,Location_Access,Allow_Notification
0,1001,Aetna Health,11,Face ID,93,No,Yes,No
1,1002,Airbnb,12,Password,283,No,Yes,Yes
2,1003,Amazon,13,Password,160,No,Yes,Yes
3,1004,BofA,14,Face ID,173,No,Yes,Yes
4,1005,Booking.com,12,Password,164,No,Yes,No


In [10]:
# Add the table to the database
app.to_sql('apps', conn, index = False, if_exists = 'append')

In [11]:
# Read Table 2 from Excel
category = pd.read_excel("Mini SQL DB Design.xlsx", sheet_name="Category")
category.head()

Unnamed: 0,Category_Id,Category_Name
0,11,Insurance
1,12,Travel
2,13,Shopping
3,14,Financial
4,15,Game


In [12]:
# Add the table to the database
category.to_sql('categories', conn, index = False, if_exists = 'append')

In [13]:
# Read Table 3 from Excel
user=pd.read_excel("Mini SQL DB Design.xlsx", sheet_name = "User")
user.head()

Unnamed: 0,User_Id,User_Name,App_Id,User_Rating
0,6001,Roy,1003,4
1,6001,Roy,1010,2
2,6001,Roy,1009,5
3,6001,Roy,1017,2
4,6001,Roy,1001,3


In [14]:
# Add the table to the database
user.to_sql('users', conn, index = False, if_exists = 'append')

### Write SQL Query to Answer Questions

#### Query 1: Find the name of the apps with largest file size in each category. Report category name, app name and its corresponding file size.

In [15]:
query1 = """
    SELECT c.Category_Name, App_Name, MAX(File_Size) as File_Size FROM apps a
    INNER JOIN Categories c
    ON a.Category_Id = c.Category_Id
    GROUP BY a.Category_Id;
"""
pd.read_sql_query(query1, conn)

Unnamed: 0,Category_Name,App_Name,File_Size
0,Insurance,GEICO,199
1,Travel,Airbnb,283
2,Shopping,Amazon,160
3,Financial,Chase,259
4,Game,Fruit Ninja,268
5,Social,Instagram,254


#### Query 2: Find the average rating for apps for each login method and each category.

In [16]:
query2 = """
SELECT a.Login_Method, l.Category_Name, AVG(u.User_Rating) as Avg_Rating FROM users u
LEFT JOIN apps a
    ON u.App_Id = a.App_Id 
LEFT JOIN categories l
    ON a.Category_Id = l.Category_Id
GROUP BY a.Login_Method, l.Category_Id
ORDER BY a.Login_Method, AVG(u.User_Rating) DESC
"""
pd.read_sql_query(query2, conn)

Unnamed: 0,Login_Method,Category_Name,Avg_Rating
0,Face ID,Insurance,3.4
1,Face ID,Financial,2.5
2,Password,Insurance,3.5
3,Password,Travel,3.428571
4,Password,Social,2.857143
5,Password,Shopping,2.5
6,Password,Game,2.0
7,Password,Financial,2.0


#### Query 3: Find the total file size of the apps that require location access but not photo access for each person

In [17]:
query3="""
SELECT u.User_Name, SUM(a.File_Size) as Total_File_Size FROM users u
LEFT JOIN apps a
    ON u.App_Id = a.App_Id 
WHERE a.Photo_Access = 'No' AND a.Location_Access = 'Yes'
GROUP BY User_Id
"""

pd.read_sql_query(query3, conn)

Unnamed: 0,User_Name,Total_File_Size
0,Roy,1059
1,David,1297
2,Diyu,56
3,Taylor,834


#### Query 4: Order apps by popularity and if there is tie, sort by average rating in descending order.

In [18]:
query4 = """
SELECT a.App_Name, AVG(u.User_Rating) as Avg_Rating, COUNT(a.App_Name) as Count_App FROM users u
LEFT JOIN apps a
    ON u.App_Id = a.App_Id 
GROUP BY a.App_Name
ORDER BY COUNT(a.App_Name) DESC, AVG(u.User_Rating) DESC
"""
pd.read_sql_query(query4, conn)

Unnamed: 0,App_Name,Avg_Rating,Count_App
0,GEICO,3.333333,3
1,Instagram,4.5,2
2,Booking.com,4.0,2
3,Fly Delta,4.0,2
4,Aetna Health,3.5,2
5,StateFarm,3.5,2
6,Airbnb,3.0,2
7,Amazon,2.5,2
8,WSOP,2.5,2
9,WeChat,2.0,2


### Close cursor and database connection

In [19]:
cursor.close()
conn.close()