In [1]:
## Import the necessary libraries
import sqlite3
import pandas as pd
import os
import time

In [2]:
# Paths
BASE_DIR = os.path.abspath("..")                      # github_repo/
DATA_DIR = os.path.join(BASE_DIR, "data", "raw")       # github_repo/data/raw
DB_PATH = os.path.join(BASE_DIR, "insurance.db")       # github_repo/insurance.db

In [3]:
## Connect back to the database
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

In [4]:
# Create indexes to speed up execution times
indexes = [
"CREATE INDEX IF NOT EXISTS idx_customer_indiv ON customer(individual_id)",
"CREATE INDEX IF NOT EXISTS idx_customer_addr ON customer(address_id)",
"CREATE INDEX IF NOT EXISTS idx_address_addr ON address(address_id)",
"CREATE INDEX IF NOT EXISTS idx_demo_indiv ON demographic(individual_id)",
"CREATE INDEX IF NOT EXISTS idx_aic_indiv ON autoinsurance_churn(individual_id)",
"CREATE INDEX IF NOT EXISTS idx_term_indiv ON termination(individual_id)"
]

In [5]:
# Execute each sql one by one
for sql in indexes:
    cursor.execute(sql)
conn.commit()

In [6]:
# Define the query
query = """
CREATE TABLE combined_data AS
SELECT
    c.individual_id,c.address_id,c.curr_ann_amt,c.days_tenure,c.cust_orig_date,COALESCE(c.age_in_years,aic.age_in_years) as age_in_years,
    a.LATITUDE,a.LONGITUDE,a.STREET_ADDRESS,a.CITY,a.STATE,a.COUNTY,
    aic.Churn,
    COALESCE(aic.income,d.income) as income,COALESCE(aic.acct_suspd_date,t.ACCT_SUSPD_DATE) as ACCT_SUSPD_DATE,d.has_children,d.length_of_residence,COALESCE(d.marital_status,aic.marital_status) as marital_status,d.home_market_value,d.home_owner,d.college_degree,d.good_credit
FROM customer c
LEFT JOIN address a
    ON c.address_id = a.address_id
LEFT JOIN demographic d
    ON c.individual_id = d.individual_id
LEFT JOIN autoinsurance_churn aic
    on c.individual_id = aic.individual_id
LEFT JOIN termination t
    on c.individual_id = t.individual_id
WHERE
    c.curr_ann_amt >= 0
ORDER BY c.individual_id;
"""


Notes:
-I wanted to remove the personal identifiable information for data security. (Date of Birth, Social Security Number)
-I checked income from the autoinsurance_churn table and if it wasn't found there, then I checked the demographic table
-I checked the account suspension date from the autoinsurance_churn table and if it wasn't found there, then I checked the termination table

In [7]:
# Drop old table if it exists
cursor.execute("DROP TABLE IF EXISTS combined_data")

<sqlite3.Cursor at 0x18ec89d8f40>

In [8]:
# Create table directly from SQL join
cursor.execute(query)

<sqlite3.Cursor at 0x18ec89d8f40>

In [9]:
conn.commit()

In [10]:

conn.close()
