# Auto Loan Prediction Dataset - Delicia Jathanna

## CSCI 3287 

In [26]:
import sqlite3
import zipfile
import pandas as pd

## Connecting to my database & Creating the tables

In [29]:
df_auto = pd.read_csv('CSPB-3287-Final/auto_dataset.zip')

In [30]:
conn = sqlite3.connect("autoloansdb.db")
cur = conn.cursor()

In [378]:
drop_table_jobs = conn.execute('DROP TABLE IF EXISTS jobs;')

In [379]:
create_jobs_table = conn.execute('''
CREATE TABLE IF NOT EXISTS jobs
	(
	  occupation VARCHAR(50) NOT NULL, --Occupation of the application
      salary INT NOT NULL --Monthly income of the applicant
	);
''')

In [375]:
drop_table_credit = conn.execute('DROP TABLE IF EXISTS credit;')

In [376]:
create_credit_table = conn.execute('''
CREATE TABLE IF NOT EXISTS credit
	(
	  occupation VARCHAR(50), --Occupation of the application (foreign key)
      credit_score INTEGER NOT NULL, --Credit Score of the applicant
      FOREIGN KEY (occupation) REFERENCES jobs (occupation)
	);
''')

In [400]:
drop_table_loan = conn.execute('DROP TABLE IF EXISTS loan;')

In [401]:
create_loan_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan
	(
	  loan_id VARCHAR(50) PRIMARY KEY, --ID of the loan (primary key)
      occupation VARCHAR(50), --Occupation of the applicant (foreign key)
      credit_score INTEGER, --Credit Score of the applicant (foreign key)
      FOREIGN KEY (occupation) REFERENCES jobs (occupation),
      FOREIGN KEY (credit_score) REFERENCES credit (credit_score)
	);
''')

In [296]:
drop_table_loan_status = conn.execute('DROP TABLE IF EXISTS loan_status;')

In [297]:
create_loan_status_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan_status
	(
	  loan_id VARCHAR(50), --ID of the loan (foreign key)
      loan_status VARCHAR(50) CHECK (loan_status = 'Y' OR loan_status = 'N'), --Status of the loan
      credit_score INTEGER, --Credit Score of the applicant (foreign key)
      FOREIGN KEY (loan_id) REFERENCES loan (loan_id)
      FOREIGN KEY (credit_score) REFERENCES credit (credit_score)
	);
''')

In [298]:
drop_table_loan_term = conn.execute('DROP TABLE IF EXISTS loan_term;')

In [299]:
create_loan_term_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan_term
	(
	  loan_id VARCHAR(50), --ID of the loan (foreign key)
      loan_term INTEGER, --Term of the loan in months 
      FOREIGN KEY (loan_id) REFERENCES loan_status (loan_id) ON INSERT CASCADE
	);
''')

In [300]:
### Filling the tables with the data from the dataset

In [394]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO jobs(occupation, salary)\
    VALUES ( :Occupation, :Monthly_Income) ''' )
    conn.execute(cmd, row)

In [377]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO credit(credit_score, occupation)\
    VALUES ( :Credit_Score, :Occupation) ''' )
    conn.execute(cmd, row)

In [402]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan(loan_id, occupation, credit_score)\
    VALUES ( :Loan_ID, :Occupation, :Credit_Score) ''' )
    conn.execute(cmd, row)

In [397]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan_status(loan_id, loan_status, credit_score)\
    VALUES ( :Loan_ID, :Loan_Status, :Credit_Score) ''' )
    conn.execute(cmd, row)

In [305]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan_term(loan_id, loan_term)\
    VALUES ( :Loan_ID, :Loan_Amount_Term) ''' )
    conn.execute(cmd, row)

### Display the initial data in the tables

In [395]:
res = cur.execute("SELECT * FROM jobs LIMIT 10")
res.fetchall()

[('Teacher', '$4,000.00 '),
 ('Engineer', '$6,000.00 '),
 ('Nurse', '$25,000.00 '),
 ('Doctor', '$80,000.00 '),
 ('Businessman', '$10,000.00 '),
 ('Engineer', '$5,000.00 '),
 ('Teacher', '$3,500.00 '),
 ('Nurse', '$2,000.00 '),
 ('Businessman', '$12,000.00 '),
 ('Doctor', '$9,000.00 ')]

In [382]:
res = cur.execute("SELECT * FROM credit LIMIT 10")
res.fetchall()

[('Teacher', 750),
 ('Engineer', 800),
 ('Nurse', 650),
 ('Doctor', 900),
 ('Businessman', 700),
 ('Engineer', 750),
 ('Teacher', 650),
 ('Nurse', 600),
 ('Businessman', 850),
 ('Doctor', 800)]

In [403]:
res = cur.execute("SELECT * FROM loan LIMIT 10")
res.fetchall()

[('LP001002', 'Teacher', 750),
 ('LP001003', 'Engineer', 800),
 ('LP001005', 'Nurse', 650),
 ('LP001006', 'Doctor', 900),
 ('LP001008', 'Businessman', 700),
 ('LP001011', 'Engineer', 750),
 ('LP001013', 'Teacher', 650),
 ('LP001014', 'Nurse', 600),
 ('LP001018', 'Businessman', 850),
 ('LP001020', 'Doctor', 800)]

In [309]:
res = cur.execute("SELECT * FROM loan_status LIMIT 10")
res.fetchall()

[('LP001002', 'Y', 750),
 ('LP001003', 'Y', 800),
 ('LP001005', 'Y', 650),
 ('LP001006', 'Y', 900),
 ('LP001008', 'Y', 700),
 ('LP001011', 'Y', 750),
 ('LP001013', 'N', 650),
 ('LP001014', 'N', 600),
 ('LP001018', 'Y', 850),
 ('LP001020', 'N', 800)]

In [310]:
res = cur.execute("SELECT * FROM loan_term LIMIT 10")
res.fetchall()

[('LP001002', 36),
 ('LP001003', 36),
 ('LP001005', 36),
 ('LP001006', 36),
 ('LP001008', 36),
 ('LP001011', 36),
 ('LP001013', 0),
 ('LP001014', 0),
 ('LP001018', 36),
 ('LP001020', 0)]

# Triggers and Indexes
We will now Add some triggers and indexes to the data so that the queries run smoother and we can ensure that when a foreign key gets deleted, it gets deleted in every table.

In [313]:
drop_fk_occupation_trigger = conn.execute("DROP TRIGGER IF EXISTS fk_occupation_delete")

In [314]:
fk_occupation_trigger = conn.execute('''
    CREATE TRIGGER fk_occupation_delete
    AFTER DELETE ON jobs 
    FOR EACH ROW
    BEGIN
        DELETE FROM credit
        WHERE occupation = old.occupation;
        DELETE FROM loan
        WHERE occupation = old.occupation;
    END;
    ''')

In [315]:
drop_fk_loanid_trigger = conn.execute("DROP TRIGGER IF EXISTS fk_loanid_delete")

In [316]:
fk_loanid_trigger = conn.execute('''
    CREATE TRIGGER fk_loanid_delete
    AFTER DELETE ON loan 
    FOR EACH ROW
    BEGIN
        DELETE FROM loan_term
        WHERE loan_id = old.loan_id;
        DELETE FROM loan_status
        WHERE loan_id = old.loan_id;
    END;
    ''')

In [317]:
drop_fk_score_trigger = conn.execute("DROP TRIGGER IF EXISTS fk_score_delete")

In [318]:
fk_score_trigger = conn.execute('''
    CREATE TRIGGER fk_score_delete
    AFTER DELETE ON credit 
    FOR EACH ROW
    BEGIN
        DELETE FROM loan
        WHERE credit_score = old.credit_score;
        DELETE FROM loan_status
        WHERE credit_score = old.credit_score;
    END;
    ''')

### Testing the occupation foreign key trigger

In [319]:
res = cur.execute("DELETE FROM jobs WHERE occupation = 'Teacher'")

In [320]:
res = cur.execute("SELECT * FROM jobs LIMIT 10")
res.fetchall()

[('Engineer', '$6,000.00 '),
 ('Nurse', '$25,000.00 '),
 ('Doctor', '$80,000.00 '),
 ('Businessman', '$10,000.00 '),
 ('Engineer', '$5,000.00 '),
 ('Nurse', '$2,000.00 '),
 ('Businessman', '$12,000.00 '),
 ('Doctor', '$9,000.00 '),
 ('Engineer', '$6,000.00 '),
 ('Salesperson', '$3,000.00 ')]

In [374]:
res = cur.execute("SELECT * FROM credit LIMIT 10")
res.fetchall()

[('Businessman', 700),
 ('Engineer', 700),
 ('Salesperson', 600),
 ('Electrician', 550),
 ('Marketing Exec', 690),
 ('Chef', 620),
 ('Police Officer', 680),
 ('Graphic Design', 670),
 ('Writer', 500),
 ('Musician', 550)]

In [322]:
res = cur.execute("SELECT * FROM loan LIMIT 10")
res.fetchall()

[('LP001003', 'Engineer', 800),
 ('LP001006', 'Doctor', 900),
 ('LP001008', 'Businessman', 700),
 ('LP001014', 'Nurse', 600),
 ('LP001018', 'Businessman', 850),
 ('LP001020', 'Doctor', 800),
 ('LP001024', 'Engineer', 700),
 ('LP001027', 'Salesperson', 600),
 ('LP001029', 'Entrepreneur', 800),
 ('LP001030', 'Lawyer', 820)]

### Testing the loan_id trigger

In [323]:
res = cur.execute("DELETE FROM loan WHERE loan_id = 'LP001005'")

In [324]:
res = cur.execute("SELECT * FROM loan LIMIT 10")
res.fetchall()

[('LP001003', 'Engineer', 800),
 ('LP001006', 'Doctor', 900),
 ('LP001008', 'Businessman', 700),
 ('LP001014', 'Nurse', 600),
 ('LP001018', 'Businessman', 850),
 ('LP001020', 'Doctor', 800),
 ('LP001024', 'Engineer', 700),
 ('LP001027', 'Salesperson', 600),
 ('LP001029', 'Entrepreneur', 800),
 ('LP001030', 'Lawyer', 820)]

In [325]:
res = cur.execute("SELECT * FROM loan_term LIMIT 10")
res.fetchall()

[('LP001003', 36),
 ('LP001006', 36),
 ('LP001008', 36),
 ('LP001014', 0),
 ('LP001018', 36),
 ('LP001020', 0),
 ('LP001024', 36),
 ('LP001027', 0),
 ('LP001029', 36),
 ('LP001030', 12)]

In [326]:
res = cur.execute("SELECT * FROM loan_status LIMIT 10")
res.fetchall()

[('LP001003', 'Y', 800),
 ('LP001006', 'Y', 900),
 ('LP001008', 'Y', 700),
 ('LP001014', 'N', 600),
 ('LP001018', 'Y', 850),
 ('LP001020', 'N', 800),
 ('LP001024', 'Y', 700),
 ('LP001027', 'N', 600),
 ('LP001029', 'Y', 800),
 ('LP001030', 'Y', 820)]

### Testing the credit_score trigger

In [327]:
res = cur.execute("DELETE FROM credit WHERE credit_score > 700")

In [328]:
res = cur.execute("SELECT * FROM credit LIMIT 10")
res.fetchall()

[('Nurse', 650),
 ('Businessman', 700),
 ('Nurse', 600),
 ('Engineer', 700),
 ('Salesperson', 600),
 ('Nurse', 650),
 ('Electrician', 550),
 ('Marketing Exec', 690),
 ('Chef', 620),
 ('Police Officer', 680)]

In [329]:
res = cur.execute("SELECT * FROM loan LIMIT 10")
res.fetchall()

[('LP001008', 'Businessman', 700),
 ('LP001014', 'Nurse', 600),
 ('LP001024', 'Engineer', 700),
 ('LP001027', 'Salesperson', 600),
 ('LP001041', 'Electrician', 550),
 ('LP001046', 'Marketing Exec', 690),
 ('LP001047', 'Chef', 620),
 ('LP001073', 'Writer', 500),
 ('LP001086', 'Musician', 550),
 ('LP001098', 'Salesperson', 600)]

In [330]:
res = cur.execute("SELECT * FROM loan_status LIMIT 10")
res.fetchall()

[('LP001008', 'Y', 700),
 ('LP001014', 'N', 600),
 ('LP001024', 'Y', 700),
 ('LP001027', 'N', 600),
 ('LP001041', 'N', 550),
 ('LP001046', 'N', 690),
 ('LP001047', 'N', 620),
 ('LP001073', 'N', 500),
 ('LP001086', 'N', 550),
 ('LP001098', 'Y', 600)]

Now that we've ensured that when a foreign key gets deleted, it deletes in every table, lets add a trigger to the loan status table that a loan cannot be approved if the credit score is below 700. 

In [419]:
drop_loan_status_trigger = conn.execute("DROP TRIGGER IF EXISTS loan_status_trigger")

In [420]:
loan_status_trigger = conn.execute('''
    CREATE TRIGGER loan_status_trigger
    AFTER INSERT ON loan_status 
    FOR EACH ROW
    BEGIN
        SELECT CASE 
            WHEN ((SELECT loan_status FROM loan_status WHERE loan_status.loan_id = NEW.loan_id AND loan_status.credit_score < 700) = 'Y') 
            THEN RAISE(ABORT, 'Credit Score must be above 700 to be approved for a loan') 
        END;
    END;
    ''')

In [421]:
res = conn.execute("INSERT INTO loan_status(loan_id, credit_score, loan_status) VALUES('LP002000', 650, 'Y')")

IntegrityError: Credit Score must be above 700 to be approved for a loan

In [None]:
# Queries to join the tables to see what credit score, occupation, and monthly income
# get approved for loans versus the ones that don't

In [238]:
res = cur.execute("SELECT credit_score, jobs.occupation AS occupation, jobs.salary FROM loan JOIN jobs on jobs.occupation = loan.occupation ORDER BY occupation LIMIT 10;")
res.fetchall()

[(720, 'Account Executive', '$6,000.00 '),
 (720, 'Account Manager', '$5,800.00 '),
 (720, 'Accountant', '$4,500.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,500.00 '),
 (720, 'Accountant', '$6,500.00 '),
 (720, 'Accountant', '$6,500.00 '),
 (750, 'Accountant', '$4,500.00 '),
 (750, 'Accountant', '$5,000.00 ')]

In [26]:
cur.close()