# Auto Loan Prediction Dataset - Delicia Jathanna

## CSCI 3287 

In [2]:
import sqlite3
import zipfile
import pandas as pd

## Connecting to my database & Creating the tables

In [3]:
df_auto = pd.read_csv('auto_dataset.zip')

In [4]:
conn = sqlite3.connect("autoloansdb.db")
cur = conn.cursor()

In [5]:
drop_table_jobs = conn.execute('DROP TABLE IF EXISTS jobs;')

In [6]:
create_jobs_table = conn.execute('''
CREATE TABLE IF NOT EXISTS jobs
	(
	  occupation VARCHAR(50) NOT NULL, --Occupation of the application
      salary INT NOT NULL --Monthly income of the applicant
	);
''')

In [7]:
drop_table_credit = conn.execute('DROP TABLE IF EXISTS credit;')

In [8]:
create_credit_table = conn.execute('''
CREATE TABLE IF NOT EXISTS credit
	(
	  occupation VARCHAR(50), --Occupation of the application (foreign key)
      credit_score INTEGER NOT NULL, --Credit Score of the applicant
      FOREIGN KEY (occupation) REFERENCES jobs (occupation)
	);
''')

In [9]:
drop_table_loan = conn.execute('DROP TABLE IF EXISTS loan;')

In [10]:
create_loan_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan
	(
	  loan_id VARCHAR(50) PRIMARY KEY, --ID of the loan (primary key)
      occupation VARCHAR(50), --Occupation of the applicant (foreign key)
      credit_score INTEGER, --Credit Score of the applicant (foreign key)
      FOREIGN KEY (occupation) REFERENCES jobs (occupation),
      FOREIGN KEY (credit_score) REFERENCES credit (credit_score)
	);
''')

In [11]:
drop_table_loan_status = conn.execute('DROP TABLE IF EXISTS loan_status;')

In [12]:
create_loan_status_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan_status
	(
	  loan_id VARCHAR(50), --ID of the loan (foreign key)
      loan_status VARCHAR(50) CHECK (loan_status = 'Y' OR loan_status = 'N'), --Status of the loan
      credit_score INTEGER, --Credit Score of the applicant (foreign key)
      FOREIGN KEY (loan_id) REFERENCES loan (loan_id)
      FOREIGN KEY (credit_score) REFERENCES credit (credit_score)
	);
''')

In [13]:
drop_table_loan_term = conn.execute('DROP TABLE IF EXISTS loan_term;')

In [14]:
create_loan_term_table = conn.execute('''
CREATE TABLE IF NOT EXISTS loan_term
	(
	  loan_id VARCHAR(50), --ID of the loan (foreign key)
      loan_term INTEGER, --Term of the loan in months 
      FOREIGN KEY (loan_id) REFERENCES loan_status (loan_id)
	);
''')

In [15]:
### Filling the tables with the data from the dataset

In [16]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO jobs(occupation, salary)\
    VALUES ( :Occupation, :Monthly_Income) ''' )
    conn.execute(cmd, row)

In [18]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO credit(credit_score, occupation)\
    VALUES ( :Credit_Score, :Occupation) ''' )
    conn.execute(cmd, row)

IntegrityError: NOT NULL constraint failed: credit.credit_score

In [180]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan(loan_id, occupation, credit_score)\
    VALUES ( :Loan_ID, :Occupation, :Credit_Score) ''' )
    conn.execute(cmd, row)

In [194]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan_status(loan_id, loan_status, credit_score)\
    VALUES ( :Loan_ID, :Loan_Status, :Credit_Score) ''' )
    conn.execute(cmd, row)

In [182]:
for row in df_auto.to_dict('records'):
    cmd = ('''INSERT INTO loan_term(loan_id, loan_term)\
    VALUES ( :Loan_ID, :Loan_Amount_Term) ''' )
    conn.execute(cmd, row)

In [None]:
# Queries to display the tables

In [1]:
res = cur.execute("SELECT * FROM jobs LIMIT 10")
res.fetchall()

NameError: name 'cur' is not defined

In [187]:
res = cur.execute("SELECT * FROM credit LIMIT 2")
res.fetchall()

[('Teacher', 750), ('Engineer', 800)]

In [189]:
res = cur.execute("SELECT * FROM loan LIMIT 10")
res.fetchall()

[('LP001002', 'Teacher', 750),
 ('LP001003', 'Engineer', 800),
 ('LP001005', 'Nurse', 650),
 ('LP001006', 'Doctor', 900),
 ('LP001008', 'Businessman', 700),
 ('LP001011', 'Engineer', 750),
 ('LP001013', 'Teacher', 650),
 ('LP001014', 'Nurse', 600),
 ('LP001018', 'Businessman', 850),
 ('LP001020', 'Doctor', 800)]

In [195]:
res = cur.execute("SELECT * FROM loan_status LIMIT 10")
res.fetchall()

[('LP001002', 'Y', 750),
 ('LP001003', 'Y', 800),
 ('LP001005', 'Y', 650),
 ('LP001006', 'Y', 900),
 ('LP001008', 'Y', 700),
 ('LP001011', 'Y', 750),
 ('LP001013', 'N', 650),
 ('LP001014', 'N', 600),
 ('LP001018', 'Y', 850),
 ('LP001020', 'N', 800)]

In [190]:
res = cur.execute("SELECT * FROM loan_term LIMIT 10")
res.fetchall()

[('LP001002', 36),
 ('LP001003', 36),
 ('LP001005', 36),
 ('LP001006', 36),
 ('LP001008', 36),
 ('LP001011', 36),
 ('LP001013', 0),
 ('LP001014', 0),
 ('LP001018', 36),
 ('LP001020', 0)]

In [159]:
res = cur.execute("SELECT credit_score, jobs.occupation, jobs.salary FROM credit JOIN jobs on jobs.occupation = credit.occupation ORDER BY jobs.occupation LIMIT 25")
res.fetchall()

[(720, 'Account Executive', '$6,000.00 '),
 (720, 'Account Manager', '$5,800.00 '),
 (720, 'Accountant', '$4,500.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,500.00 '),
 (720, 'Accountant', '$6,500.00 '),
 (720, 'Accountant', '$6,500.00 '),
 (750, 'Accountant', '$4,500.00 '),
 (750, 'Accountant', '$5,000.00 '),
 (750, 'Accountant', '$5,000.00 '),
 (750, 'Accountant', '$5,500.00 '),
 (750, 'Accountant', '$6,500.00 '),
 (750, 'Accountant', '$6,500.00 '),
 (740, 'Accountant', '$4,500.00 '),
 (740, 'Accountant', '$5,000.00 '),
 (740, 'Accountant', '$5,000.00 '),
 (740, 'Accountant', '$5,500.00 '),
 (740, 'Accountant', '$6,500.00 '),
 (740, 'Accountant', '$6,500.00 '),
 (720, 'Accountant', '$4,500.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,000.00 '),
 (720, 'Accountant', '$5,500.00 '),
 (720, 'Accountant', '$6,500.00 ')]

In [26]:
cur.close()