# SQL with Python Reference Guide 1 
# Intro and basic SELECT
## (Justin M. Olds)
Based on Stanford SQL course: https://lagunita.stanford.edu/courses/DB/SQL/SelfPaced/info

---
Basic info: 
* Data Definition Language (DDL) -- CREATE TABLE, DROP TABLE 
* Data Manipulation Language (DML) -- SELECT, INSERT, DELETE, UPDATE
* Other commands -- Indexes, Constraints, Views, Triggers, Transations, Authorization, ... 
---
Basic SELECT statement (three clauses)
* SELECT A1, A2, ... An -- Tells you what to return
* FROM R1, R2, ... Rn  -- Relations you want to query over
* WHERE condition -- Used to combine the relations and filter

Orientation is to think (1) FROM, (2) WHERE, (3) SELECT

---

Basics of the SELECT statement

College admissions database
3 Relations:
* College (__cName__, __state__, enrollment) 
* Student (__sID__, sName, GPA, sizeHS)
* Apply (__sID__, __cName__, __major__, decision)

Note: __Bolded__ relations are designated as key relations. Therefore, the key relations are unique within the relation.
(e.g., with the Apply relation, a particular student can apply to the same college multile times, but only once for a parituclar collage and for a particular major)

In [71]:
import sqlite3
import pandas as pd

#### The function sqlite3.connect connects to a database. 

In [48]:
# conn = sqlite3.connect("/Users/-/Dropbox/Python and SQL/employee.db")
# conn = sqlite3.connect("/Users/-/Dropbox/Python and SQL/CollegeData.db")
conn = sqlite3.connect("class.db")

In [49]:
c = conn.cursor()

### Create tables

In [50]:
c.execute('DROP TABLE IF EXISTS College')
c.execute('DROP TABLE IF EXISTS Student') 
c.execute('DROP TABLE IF EXISTS Apply') 

c.execute('CREATE TABLE College(cName TEXT, state TEXT, enrollment INT)')
c.execute('CREATE TABLE Student(sID INT, sName TEXT, GPA REAL, sizeHS INT)')
c.execute('CREATE TABLE Apply(sID INT, cName TEXT, major TEXT, decision TEXT)')
conn.commit()

### Insert data

In [51]:
c.execute('DELETE FROM Student')
c.execute('DELETE FROM College')
c.execute('DELETE FROM Apply')

c.execute("INSERT INTO Student VALUES (123, 'Amy', 3.9, 1000)")
c.execute("INSERT INTO Student values (234, 'Bob', 3.6, 1500)")
c.execute("INSERT INTO Student values (345, 'Craig', 3.5, 500)")
c.execute("INSERT INTO Student values (456, 'Doris', 3.9, 1000)")
c.execute("INSERT INTO Student values (567, 'Edward', 2.9, 2000)")
c.execute("INSERT INTO Student values (678, 'Fay', 3.8, 200)")
c.execute("INSERT INTO Student values (789, 'Gary', 3.4, 800)")
c.execute("INSERT INTO Student values (987, 'Helen', 3.7, 800)")
c.execute("INSERT INTO Student values (876, 'Irene', 3.9, 400)")
c.execute("INSERT INTO Student values (765, 'Jay', 2.9, 1500)")
c.execute("INSERT INTO Student values (654, 'Amy', 3.9, 1000)")
c.execute("INSERT INTO Student values (543, 'Craig', 3.4, 2000)")

c.execute("INSERT INTO College values ('Stanford', 'CA', 15000)")
c.execute("INSERT INTO College values ('Berkeley', 'CA', 36000)")
c.execute("INSERT INTO College values ('MIT', 'MA', 10000)")
c.execute("INSERT INTO College values ('Cornell', 'NY', 21000)")

c.execute("INSERT INTO Apply values (123, 'Stanford', 'CS', 'Y')")
c.execute("INSERT INTO Apply values (123, 'Stanford', 'EE', 'N')")
c.execute("INSERT INTO Apply values (123, 'Berkeley', 'CS', 'Y')")
c.execute("INSERT INTO Apply values (123, 'Cornell', 'EE', 'Y')")
c.execute("INSERT INTO Apply values (234, 'Berkeley', 'biology', 'N')")
c.execute("INSERT INTO Apply values (345, 'MIT', 'bioengineering', 'Y')")
c.execute("INSERT INTO Apply values (345, 'Cornell', 'bioengineering', 'N')")
c.execute("INSERT INTO Apply values (345, 'Cornell', 'CS', 'Y')")
c.execute("INSERT INTO Apply values (345, 'Cornell', 'EE', 'N')")
c.execute("INSERT INTO Apply values (678, 'Stanford', 'history', 'Y')")
c.execute("INSERT INTO Apply values (987, 'Stanford', 'CS', 'Y')")
c.execute("INSERT INTO Apply values (987, 'Berkeley', 'CS', 'Y')")
c.execute("INSERT INTO Apply values (876, 'Stanford', 'CS', 'N')")
c.execute("INSERT INTO Apply values (876, 'MIT', 'biology', 'Y')")
c.execute("INSERT INTO Apply values (876, 'MIT', 'marine biology', 'N')")
c.execute("INSERT INTO Apply values (765, 'Stanford', 'history', 'Y')")
c.execute("INSERT INTO Apply values (765, 'Cornell', 'history', 'N')")
c.execute("INSERT INTO Apply values (765, 'Cornell', 'psychology', 'Y')")
c.execute("INSERT INTO Apply values (543, 'MIT', 'CS', 'N')")
conn.commit()


### Basic SELECT Statement

In [73]:
df = pd.read_sql_query("SELECT sID, sName, GPA FROM Student", conn)
# print(c.fetchone())
# print(c.fetchmany(5)) # returns that many rows as a list
# print(c.fetchall()) # returns list
#print(*c.fetchall(), sep = "\n") # returns list with line breaks for each element
df

Unnamed: 0,sID,sName,GPA
0,123,Amy,3.9
1,234,Bob,3.6
2,345,Craig,3.5
3,456,Doris,3.9
4,567,Edward,2.9
5,678,Fay,3.8
6,789,Gary,3.4
7,987,Helen,3.7
8,876,Irene,3.9
9,765,Jay,2.9


In [74]:
df = pd.read_sql_query("SELECT sID, sName, GPA FROM Student WHERE GPA > 3.6", conn)
df

Unnamed: 0,sID,sName,GPA
0,123,Amy,3.9
1,456,Doris,3.9
2,678,Fay,3.8
3,987,Helen,3.7
4,876,Irene,3.9
5,654,Amy,3.9


Note: It's not neccessary to include the GPA in the result of the query even if we filter based on GPA. 

---
### Combining two relations with a SELECT statement

In [75]:
df = pd.read_sql_query("SELECT sName, major FROM Student, Apply WHERE Student.sID = Apply.sID", conn)
df

Unnamed: 0,sName,major
0,Amy,CS
1,Amy,CS
2,Amy,EE
3,Amy,EE
4,Bob,biology
5,Craig,CS
6,Craig,EE
7,Craig,bioengineering
8,Craig,bioengineering
9,Fay,history


To get rid of duplicates we can add the keyword DISTINT after the SELECT keyword

In [76]:
df = pd.read_sql_query("SELECT DISTINCT sName, major FROM Student, Apply WHERE Student.sID = Apply.sID", conn)
df

Unnamed: 0,sName,major
0,Amy,CS
1,Amy,EE
2,Bob,biology
3,Craig,CS
4,Craig,EE
5,Craig,bioengineering
6,Fay,history
7,Helen,CS
8,Irene,CS
9,Irene,biology


### More complicated queries
Find names and GPAs of students whose high school size <1000, applied to CS at Stanford, and the application decision

In [78]:
df = pd.read_sql_query("""
    SELECT sName, GPA, decision 
    FROM Student, Apply 
    WHERE Student.sID = Apply.sID
        AND sizeHS < 1000 
        AND major = 'CS'
        AND cname = 'Stanford'
    """, conn)
df

Unnamed: 0,sName,GPA,decision
0,Helen,3.7,Y
1,Irene,3.9,N


Next, find all large universities (enrollment > 20000) with an instance of one at least one person applying to the CS dept. 

Note: "SELECT cName" is ambiguous since it is the same index used in both the College and Apply tables. To resolve this, just identify one to return. 

In [79]:
df = pd.read_sql_query("""
    SELECT College.cName
    FROM College, Apply 
    WHERE College.cName = Apply.cName
        AND enrollment > 20000 
        AND major = 'CS'
    """, conn)
df

Unnamed: 0,cName
0,Berkeley
1,Cornell
2,Berkeley


Since the above SELECT statement returned two instances of Berkeley, we can add the DISTINCT keyword like before to get rid of redundencies. 

In [80]:
df = pd.read_sql_query("""
    SELECT DISTINCT College.cName
    FROM College, Apply 
    WHERE College.cName = Apply.cName
        AND enrollment > 20000 
        AND major = 'CS'
    """, conn)
df

Unnamed: 0,cName
0,Berkeley
1,Cornell


Query with a bigger result. Join all three relations (Student, College, Apply) with the join conditions (=) so that we are cross-referencing the same students and same colleges across relations. 

In [81]:
df = pd.read_sql_query("""
    SELECT Student.sID, sName, GPA, Apply.cName, enrollment
    FROM Student, College, Apply 
    WHERE Apply.sID = Student.sID
        AND College.cName = Apply.cName
    """, conn)
df

Unnamed: 0,sID,sName,GPA,cName,enrollment
0,123,Amy,3.9,Berkeley,36000
1,123,Amy,3.9,Cornell,21000
2,123,Amy,3.9,Stanford,15000
3,123,Amy,3.9,Stanford,15000
4,234,Bob,3.6,Berkeley,36000
5,345,Craig,3.5,Cornell,21000
6,345,Craig,3.5,Cornell,21000
7,345,Craig,3.5,Cornell,21000
8,345,Craig,3.5,MIT,10000
9,678,Fay,3.8,Stanford,15000


SQL queries return unordered data, but the result of a query can be sorted based on a particular relation by adding a ORDER BY clause. For example, the results can be ordered based on GPA (descending). 

Note: Ascending is the default for ORDER BY.

In [82]:
df = pd.read_sql_query("""
    SELECT Student.sID, sName, GPA, Apply.cName, enrollment
    FROM Student, College, Apply 
    WHERE Apply.sID = Student.sID
        AND College.cName = Apply.cName
    ORDER BY GPA DESC
    """, conn)
df

Unnamed: 0,sID,sName,GPA,cName,enrollment
0,123,Amy,3.9,Berkeley,36000
1,123,Amy,3.9,Cornell,21000
2,123,Amy,3.9,Stanford,15000
3,123,Amy,3.9,Stanford,15000
4,876,Irene,3.9,MIT,10000
5,876,Irene,3.9,MIT,10000
6,876,Irene,3.9,Stanford,15000
7,678,Fay,3.8,Stanford,15000
8,987,Helen,3.7,Berkeley,36000
9,987,Helen,3.7,Stanford,15000


Multiple ORDER BY clauses can be entered to further structure the data output. 

In [83]:
df = pd.read_sql_query("""
    SELECT Student.sID, sName, GPA, Apply.cName, enrollment
    FROM Student, College, Apply 
    WHERE Apply.sID = Student.sID
        AND College.cName = Apply.cName
    ORDER BY GPA DESC, enrollment
    """, conn)
df

Unnamed: 0,sID,sName,GPA,cName,enrollment
0,876,Irene,3.9,MIT,10000
1,876,Irene,3.9,MIT,10000
2,123,Amy,3.9,Stanford,15000
3,123,Amy,3.9,Stanford,15000
4,876,Irene,3.9,Stanford,15000
5,123,Amy,3.9,Cornell,21000
6,123,Amy,3.9,Berkeley,36000
7,678,Fay,3.8,Stanford,15000
8,987,Helen,3.7,Stanford,15000
9,987,Helen,3.7,Berkeley,36000


LIKE predicate -- allows for simple string matching on attributes. 
For example, we can search across all majors that have to do with "bio" as follows:

In [84]:
df = pd.read_sql_query("""
    SELECT sID, major
    FROM Apply 
    WHERE major LIKE '%bio%'
    """, conn)
df

Unnamed: 0,sID,major
0,234,biology
1,345,bioengineering
2,345,bioengineering
3,876,biology
4,876,marine biology


SELECT * for getting all attributes available.

In [85]:
df = pd.read_sql_query("""
    SELECT *
    FROM Apply 
    WHERE major LIKE '%bio%'
    """, conn)
df

Unnamed: 0,sID,cName,major,decision
0,234,Berkeley,biology,N
1,345,MIT,bioengineering,Y
2,345,Cornell,bioengineering,N
3,876,MIT,biology,Y
4,876,MIT,marine biology,N


General queries for the cross-product of two relations. 

In [86]:
df = pd.read_sql_query("""
    SELECT *
    FROM Student, College
    """, conn)
df

Unnamed: 0,sID,sName,GPA,sizeHS,cName,state,enrollment
0,123,Amy,3.9,1000,Stanford,CA,15000
1,123,Amy,3.9,1000,Berkeley,CA,36000
2,123,Amy,3.9,1000,MIT,MA,10000
3,123,Amy,3.9,1000,Cornell,NY,21000
4,234,Bob,3.6,1500,Stanford,CA,15000
5,234,Bob,3.6,1500,Berkeley,CA,36000
6,234,Bob,3.6,1500,MIT,MA,10000
7,234,Bob,3.6,1500,Cornell,NY,21000
8,345,Craig,3.5,500,Stanford,CA,15000
9,345,Craig,3.5,500,Berkeley,CA,36000


Using arithmetic (or numeric operations) within SELECT statement.
The example below scales GPA based on the size of the student's high school. 

In [87]:
df = pd.read_sql_query("""
    SELECT sID, sName, GPA, sizeHS, GPA*(sizeHS/1000)
    FROM Student
    """, conn)
df

Unnamed: 0,sID,sName,GPA,sizeHS,GPA*(sizeHS/1000)
0,123,Amy,3.9,1000,3.9
1,234,Bob,3.6,1500,3.6
2,345,Craig,3.5,500,0.0
3,456,Doris,3.9,1000,3.9
4,567,Edward,2.9,2000,5.8
5,678,Fay,3.8,200,0.0
6,789,Gary,3.4,800,0.0
7,987,Helen,3.7,800,0.0
8,876,Irene,3.9,400,0.0
9,765,Jay,2.9,1500,2.9


If we are unhappy with the long column names that are generated with numeric operations, they can be changed with the AS feature.

In [88]:
df = pd.read_sql_query("""
    SELECT sID, sName, GPA, sizeHS, GPA*(sizeHS/1000) AS scaledGPA
    FROM Student
    """, conn)
df

Unnamed: 0,sID,sName,GPA,sizeHS,scaledGPA
0,123,Amy,3.9,1000,3.9
1,234,Bob,3.6,1500,3.6
2,345,Craig,3.5,500,0.0
3,456,Doris,3.9,1000,3.9
4,567,Edward,2.9,2000,5.8
5,678,Fay,3.8,200,0.0
6,789,Gary,3.4,800,0.0
7,987,Helen,3.7,800,0.0
8,876,Irene,3.9,400,0.0
9,765,Jay,2.9,1500,2.9


# WTF I can't get that shit to shit anything. Ignore all below--just referneces from online bs

--- 


In [None]:
c.execute("""CREATE TABLE employees (
            first text,
            last text,
            pay integer
           )""")

c.execute("INSERT INTO employees VALUES ('Corey', 'Schafer', 50000)")



In [None]:
c.execute("INSERT INTO employees VALUES ('Mary', 'Schafer', 70000)")
conn.commit()

In [None]:
c.execute("SELECT * FROM employees WHERE last='Schafer'")
#print(c.fetchone())
#c.fetchmany(5) # returns that many rows as a list
print(c.fetchall()) # returns list



In [None]:
def insert_emp(emp):
    with conn: 
        c.execute("INSERT INTO employees VALUES (:first, :last, :pay)", {'first': emp.first, 'last': emp.last, 'pay': emp.pay})

def get_emps_by_name(lastname):
    c.execute("SELECT * FROM employees WHERE last=:last", {'last': lastname})
    return c.fetchall()

def update_pay(emp, pay):
    with conn:
        c.execute("""UPDATE employees SET pay = :pay
                WHERE first = :first AND last = :last""",
                 {'first': emp.first, 'last': emp.last, 'pay': pay})

def remove_emp(emp):
    with conn:
        c.execute("DELETE from employees WHERE first = :first AND last = :last",
                 {'first': emp.first, 'last': emp.last})

In [None]:
emp_1 = Employee('John', 'Doe', 80000)
emp_2 = Employee('Jane', 'Doe', 90000)
print(emp_1.first)
print(emp_1.last)
print(emp_1.pay)

In [None]:
insert_emp(emp_1)
insert_emp(emp_2)

In [None]:
emps = get_emps_by_name('Doe')
print(emps)

In [None]:
update_pay(emp_2, 95000)
remove_emp(emp_1)

In [None]:
#c.execute("INSERT INTO employees VALUES (?, ?, ?)" (emp_1.first, emp_1.last, emp_1.pay))
c.execute("INSERT INTO employees VALUES (:first, :last, :pay)", {'first': emp_2.first, 'last': emp_2.last, 'pay': emp_2.pay})
conn.commit()

In [None]:
c.execute("SELECT * FROM employees WHERE last=:last", {'last': 'Doe'})
print(c.fetchall())

In [None]:
c.execute("SELECT * FROM employees WHERE last=?", ('Schafer',))
print(c.fetchall())

In [None]:
conn.commit()
conn.close()
print(employees)


#### To make any operation with the databse statements must invoke a cursor object to execute them. Additionally, it is necessary to commit the changes.
---
#### Create a users table with name, phone, email, and password columns. 

In [None]:
# Get a cursor object
c = db.cursor()
# Create a table
c.execute('''
    CREATE TABLE users(id INTEGER PRIMARY KEY, name TEXT,
                       phone TEXT, email TEXT unique, password TEXT)
''')
db.commit()

#### To drop a table

In [None]:
#cursor = db.cursor()
#cursor.execute('''DROP TABLE users''')
#db.commit()

#### Inserting (INSERT) Data into the database

In [None]:
cursor = db.cursor()
name1 = 'Andres'
phone1 = '3366858'
email1 = 'user@example.com'
# A very secure password
password1 = '12345'
name2 = 'John'
phone2 = '5557241'
email2 = 'johndoe@example.com'
password2 = 'abcdef'
 
# Insert user 1
cursor.execute('''INSERT INTO users(name, phone, email, password)
                  VALUES(?,?,?,?)''', (name1,phone1, email1, password1))
print('First user inserted')
 
# Insert user 2
cursor.execute('''INSERT INTO users(name, phone, email, password)
                  VALUES(?,?,?,?)''', (name2,phone2, email2, password2))
print('Second user inserted')
 
db.commit()

#### To insert serveral rows use the executemany function with a list object

In [None]:
cursor = db.cursor()
id1 = 1
id2 = 2
id3 = 3
name1 = 'Andres'
phone1 = '3366858'
email1 = 'user@example.com'
password1 = '12345'
name2 = 'John'
phone2 = '5557241'
email2 = 'johndoe@example.com'
password2 = 'abcdef'
name3 = 'ASDF'
phone3 = 'ASDFF'
email3 = 'johndASDoe@example.com'
password3 = 'abcDDDdef'

users = [(id1, name1,phone1, email1, password1),
         (id2, name2,phone2, email2, password2),
         (id3, name3,phone3, email3, password3)]
cursor.executemany('INSERT INTO users VALUES (?,?,?,?,?)', users)
db.commit()

In [None]:
users