In [1]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd

# SQLite Database for Financial Analysis - SQLite and DB Browser 

<!-- PELICAN_BEGIN_SUMMARY -->

After merging and reshaping the datasets, we can use SQLite to safely store, organize and manipulate data in smaller environments.
SQLite is a flexible database that can do real work in real business environments and the SQLite library is also integrated
into a number of popular scripting languages, including Python.
This database tool can also be useful for accountants doing financial analysis.

<!-- PELICAN_END_SUMMARY -->

<img src="/images/SQLflowchart.png" alt="[img: SQL flowchart]" title="SQLite flowchart" />


#### **What is SQLite?**
from "Using SQLite" by Jay A. Kreibich

What is SQLite: In the simplest terms, SQLite is a public-domain software package that provides a
relational database management system, or RDBMS. Relational database systems are
used to store user-defined records in large tables. In addition to data storage and management, a database engine can process complex query commands that combine data
from multiple tables to generate reports and data summaries.
The “Lite” in SQLite does not refer to its capabilities. Rather, SQLite is lightweight
when it comes to setup complexity, administrative overhead, and resource usage

#### **What is DB Browser for SQLite?**
DB Browser for SQLite is a high quality, visual, open source tool used to create, design, and edit database files compatible with SQLite.

It is for users and developers wanting to create, search, and edit databases. It uses a familiar spreadsheet-like interface,
and you don't need to learn complicated SQL commands. This program is not a visual shell for the sqlite command line tool.

#### **Why would we use SQLite to read Excel Files?**
- Sort data on several fields
- Combine data from different spreadheets 
- Use free graphical user interface (GUI) tools to run SQL queries on database
- Convert Excel files to tables
- Install Scientific Python to convert Excel file in SQLite database in just 8 lines of code
- Note: Excel tab name cannot have spaces, but underscores are acceptable

#### **Connecting SQLite **
- The example is an Excel file "PremiumLossSQLite.xlsx" 
- The file has three tabs - premium, loss and reinsurance
- The first row in each sheet should have legal column names. Here is the conversion code:

In [2]:
# Import library
import sqlite3

# Name of Excel xlsx file. SQLite database will have the same name and extension .db
filename="data/PremiumLossSQLite"   

# filename+".db"
# use if_exists so we can repeat the process when new data is available for rerun

con=sqlite3.connect(filename+".db") 
wb=pd.read_excel(filename+'.xlsx',sheet_name=None)
for sheet in wb:
    wb[sheet].to_sql(sheet,con, index=False, if_exists='replace')  
con.commit()

#con.close()    # keep the connection open until you finish all the modification

#### **A handy tool to visualize and access SQLite databases is the free DB Browser**
#### **Below is the screenshot of the database structures after connecting from Excel to SQLite by using Python**
<img src="/images/DBimage.png" alt="[img: DB Browser after import Excel to SQLite and view the table]" title="DbBrowser Query" />

#### ** Query the database - query list of policies with incurred losses that covered under the reinsurance treaty**
- Instead of Excel VLookup function that has certian limitation
- We can repeat/automate the query process when new data get updated 
- Save the query result to SQLite or Excel
- Option A : Inner join between the reinsurance, premium and loss data 
- Option B : Selecting row

In [3]:
## Option A : Inner join between the reinsurance, premium and loss data 

sql_statement='''
select r.PolicyNumber as Policy_from_Reinsurance_Tab, p.PolicyNumber as Policy_from_Policy_Tab, l.PolicyNumber as Policy_from_Loss_Tab
from reinsurance as r
join premium as p on p.PolicyNumber=r.PolicyNumber
join loss as l on l.PolicyNumber=r.PolicyNumber
'''
df = pd.read_sql(sql_statement,con)

df.to_sql('reinsurace_query_results', con,if_exists='replace') 
df.to_excel('data/reinsurace_query_results.xlsx') 

#### ** Save the query result in SQLite**
<img src="/images/SQLQuery.png" alt="[img: Reinsurance Query Save in SQLite]" title="Reinsurance Query SQL " />

#### **Save the query result as an Excel file**
<img src="/images/reinsurance.jpg" alt="[img: Reinsurance Query Save in Excel from SQLite]" title="Reinsurance Query Excel" />

In [4]:
## Option B : Selecting row   ???? how to print out??? 8/8/2018
# 1) Select from reinsurance table with Line of Business is Hospital

cur=con.cursor()
table_name = 'reinsurance'   # name of the table to be queried

column_2 = 'LOB'
cur.execute('SELECT * FROM {tn} WHERE {cn}="Hospital"'.\
        format(tn=table_name, cn=column_2))
# all_rows = cur.fetchall()
# print('1):', all_rows)

one_row = cur.fetchone()
print('1):', one_row) 

1): (47653, 2017, 'Open', 'Rudolf \xa0', 'Talkin', 'Hospital', '500K/1.5M', 887969, '2014-01-03 00:00:00', '2017-09-17 00:00:00', '2017-08-14 00:00:00', '2018-08-14 00:00:00', 'OCC', 80800, 50000, 20000, None, 63133, 290832)


In [5]:
# 2) Select one column - policy column for rows that match a LOB is hosptial
column_1 = 'PolicyNumber'
cur.execute('SELECT ({coi}) FROM {tn} WHERE {cn}="Hospital"'.\
        format(coi=column_1, tn=table_name, cn=column_2))
all_rows = cur.fetchall()
print('2):', all_rows)

2): [(47653,), (46048,), (43112,), (37955,), (43242,), (13418,), (26364,), (23810,), (11106,), (12652,)]


In [6]:
# 3) Select two columns - policy/LOB column for rows that match a LOB is hosptial
cur.execute('SELECT {coi1},{coi2} FROM {tn} WHERE {cn}="Hospital"'.\
        format(coi1=column_1, coi2=column_2, tn=table_name, cn=column_2))
all_rows = cur.fetchall()
print('3):', all_rows)

3): [(47653, 'Hospital'), (46048, 'Hospital'), (43112, 'Hospital'), (37955, 'Hospital'), (43242, 'Hospital'), (13418, 'Hospital'), (26364, 'Hospital'), (23810, 'Hospital'), (11106, 'Hospital'), (12652, 'Hospital')]


In [8]:
# 4) Selecting only up to 5 rows that match a certain value in 1 column

cur.execute('SELECT * FROM {tn} WHERE {cn}="Phy" LIMIT 5'.\
        format(tn=table_name, cn=column_2))
five_rows = cur.fetchall()
print('4):', five_rows)

4): [(18196, 2017, 'Closed', 'Burl \xa0', 'Katan', 'Phy', '500K/1.5M', 887535, '2013-02-05 00:00:00', '2017-07-02 00:00:00', '2017-01-03 00:00:00', '2018-01-03 00:00:00', 'CM', 212515, 50000, 20000, None, 55733, 101685), (37658, 2017, 'Closed', 'Towanda \xa0', 'Israeli', 'Phy', '500K/1.5M', 884102, '2013-03-06 00:00:00', '2018-05-22 00:00:00', '2017-11-29 00:00:00', '2018-11-29 00:00:00', 'OCC', 122708, 50000, 20000, None, 92617, 244446), (10700, 2017, 'Closed', 'Sabra \xa0', 'Higgenbotham', 'Phy', '500K/1.5M', 880205, '2012-12-23 00:00:00', '2013-12-05 00:00:00', '2017-10-07 00:00:00', '2018-10-07 00:00:00', 'OCC', 12021, 50000, 20000, None, 37639, 97247), (10948, 2017, 'Closed', 'Florencia \xa0', 'Bilyk', 'Phy', '500K/1.5M', 880208, '2013-01-11 00:00:00', '2017-04-22 00:00:00', '2017-06-05 00:00:00', '2018-06-05 00:00:00', 'CM', 5132, 50000, 20000, None, 289389, 38573), (26445, 2017, 'Closed', 'Saturnina \xa0', 'Lamperouge', 'Phy', '500K/1.5M', 889713, '2013-03-22 00:00:00', '2018-02

In [9]:
# 5) Check if a certain claim number exists and print its column contents
id_column = 'ClaimNumber'
some_id = 887535

cur.execute("SELECT * FROM {tn} WHERE {idf}={my_id}".\
        format(tn=table_name, cn=column_2, idf=id_column, my_id=some_id))
id_exists = cur.fetchone()
if id_exists:
    print('5): {}'.format(id_exists))
else:
    print('5): {} does not exist'.format(some_id))


5): (18196, 2017, 'Closed', 'Burl \xa0', 'Katan', 'Phy', '500K/1.5M', 887535, '2013-02-05 00:00:00', '2017-07-02 00:00:00', '2017-01-03 00:00:00', '2018-01-03 00:00:00', 'CM', 212515, 50000, 20000, None, 55733, 101685)


#### ** Insert a column to table - "premium" **
- Option A - use DML to add a column :-|
- Option B - use to_sql to add a column - much easier and more native to PANDAS way of doing things

In [10]:
# Premium table before inserting new column
sql_statement='''
select p.* from premium as p
'''
df = pd.read_sql(sql_statement,con)
df.head()

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0


In [11]:
## Option A - use DML to add a column :-|

# Obtain a Cursor object to execute SQL statements

cur=con.cursor()

# Add a new column to premium table

addColumn = "ALTER TABLE premium ADD COLUMN Reinsurers varchar(32)"

cur.execute(addColumn) 

sql_statement='''
select p.* from premium as p
'''
df = pd.read_sql(sql_statement,con)
df.head()

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy,Reinsurers
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0,
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0,
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0,
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0,
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0,


In [12]:
## Option B - use to_sql to add a column - much easier and more native to PANDAS way of doing things

df['Reinsurers2']='ABC Re'
df.to_sql('premium',con,if_exists='replace') 

df.head()

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy,Reinsurers,Reinsurers2
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0,,ABC Re
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0,,ABC Re
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0,,ABC Re
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0,,ABC Re
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0,,ABC Re


In [None]:
# Closing the connection to the database file
con.close()