In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd

# SQLite Database for Financial Analysis - SQLite and DB Browser 

<!-- PELICAN_BEGIN_SUMMARY -->

After merging and reshaping the datasets, we can use SQLite to store, organize and manipulate data in smaller environments.
<br>SQLite is a public-domain software package that provides a relational database management system, or RDBMSSQLite. It is a flexible database that can do real work in real business environments and the SQLite library is also integrated
into a number of popular scripting languages, including Python.
<!-- PELICAN_END_SUMMARY -->

<img src="/images/SQLflowchart.png" alt="[img: SQL flowchart]" title="SQLite flowchart" />

**Goals: Manipulate Data in SQLite**
- Sort data on several fields
- Combine data from different spreadheets 
- Use free graphical user interface (GUI) tools to run SQL queries on database
- Convert Excel files to tables
- Install Scientific Python to convert Excel file in SQLite database in just 8 lines of code
- Note: Excel tab name cannot have spaces, but underscores are acceptable

**Connecting SQLite **
- The example is an Excel file "PremiumLossSQLite.xlsx" 
- The file has three tabs - premium, loss and reinsurance
- The first row in each sheet should have legal column names.
- Here is the conversion codes:
    - Import library
    - Name of Excel xlsx file. SQLite database will have the same name and extension .db (filename+".db")
    - Use if_exists so we can repeat the process when new data is available for rerun

In [2]:
import sqlite3

filename="data/PremiumLossSQLite"   

con=sqlite3.connect(filename+".db") 
wb=pd.read_excel(filename+'.xlsx',sheet_name=None)
for sheet in wb:
    wb[sheet].to_sql(sheet,con, index=False, if_exists='replace')  
con.commit()

#con.close()    # keep the connection open until you finish all the modification

<br>
**A handy tool to visualize and access SQLite databases is the free DB Browser**
    - DB Browser for SQLite is a high quality, visual, open source tool used to create, design, and edit database files compatible with SQLite.
    - Below is the screenshot of the database structures after connecting from Excel to SQLite by using Python
<img src="/images/DBimage.png" alt="[img: DB Browser after import Excel to SQLite and view the table]" title="DbBrowser Query" />

<br>
**Query the database**
- We can repeat/automate the query process when new data get updated 
- Save the query result to SQLite or Excel
- Option A : Inner join between the reinsurance, premium and loss data 
- Option B : Selecting row

In [38]:
## Option A : Using SQL for Inner join between the reinsurance, premium and loss data 

sql_statement='''
select r.PolicyNumber as Policy_from_Reinsurance_Tab, p.PolicyNumber as Policy_from_Premium_Tab, l.PolicyNumber as Policy_from_Loss_Tab
from reinsurance as r
join premium as p on p.PolicyNumber=r.PolicyNumber
join loss as l on l.PolicyNumber=r.PolicyNumber
'''
df = pd.read_sql(sql_statement,con)

df.to_sql('reinsurace_query_results', con,if_exists='replace') 
df.to_excel('data/reinsurace_query_results.xlsx') 

In [93]:
## Option B : Using Pandas for Inner join between the reinsurance, premium and loss data 

sql_statement='select * from reinsurance'
df_reinsurance = pd.read_sql(sql_statement,con)
df_reinsurance['PolicyNumber_from_Reinsurance_Tab']=df_reinsurance['PolicyNumber']

# df_reinsurance.head()

sql_statement='select * from premium'
df_premium = pd.read_sql(sql_statement,con)
df_premium['PolicyNumber_from_Premium_Tab']=df_premium['PolicyNumber']

# df_premium.head()

sql_statement='select * from loss'
df_loss = pd.read_sql(sql_statement,con)
df_loss['PolicyNumber_from_Loss_Tab']=df_loss['PolicyNumber']

# df_loss.head()

df_merge = pd.merge(df_reinsurance, df_premium, on=['PolicyNumber'], how='inner', suffixes=['_from_Reinsurance_Tab','_from_Policy_Tab'])
# df_merge.head()
# df_merge[['PolicyNumber_from_Reinsurance_Tab','PolicyNumber_from_Premium_Tab']].head()
df_merge = pd.merge(df_merge, df_loss, on=['PolicyNumber'], how='inner', suffixes=['','_from_Loss_Tab'])

df_merge[['PolicyNumber_from_Reinsurance_Tab','PolicyNumber_from_Premium_Tab','PolicyNumber_from_Loss_Tab']].head()
# df_merge.head()

df_merge=df_merge[['PolicyNumber_from_Reinsurance_Tab','PolicyNumber_from_Premium_Tab','PolicyNumber_from_Loss_Tab']]

print("rows=%s" % df_merge.shape[0])

df_merge.to_sql('reinsurace_query_results', con,if_exists='replace') 
df_merge.to_excel('data/reinsurace_query_results.xlsx') 


Unnamed: 0,PolicyNumber_from_Reinsurance_Tab,PolicyNumber_from_Premium_Tab,PolicyNumber_from_Loss_Tab
0,10244,10244,10244
1,10244,10244,10244
2,10509,10509,10509
3,10509,10509,10509
4,10700,10700,10700


rows=16


<br>
**Save the query result to SQLite**
<img src="/images/SQLQuery.png" alt="[img: Reinsurance Query Save in SQLite]" title="Reinsurance Query SQL " />

<br>
**Save the query result to Excel**
<img src="/images/reinsurance.jpg" alt="[img: Reinsurance Query Save in Excel from SQLite]" title="Reinsurance Query Excel" />

In [37]:
sql_statement='select * from reinsurance'
df_reinsurance = pd.read_sql(sql_statement,con)

df_reinsurance[df_reinsurance['LOB']=="Hospital"][['PolicyNumber','LOB']].head()


Unnamed: 0,PolicyNumber,LOB
44,47653,Hospital
78,46048,Hospital
88,43112,Hospital
99,37955,Hospital
115,43242,Hospital


<br>
**Insert a column to table - "premium"**
- Option A - use DML to add a column :-|
- Option B - use to_sql to add a column - much easier and more native to PANDAS way of doing things

In [10]:
## Premium table before inserting new column
sql_statement='''
select p.* from premium as p
'''
df = pd.read_sql(sql_statement,con)
df.head(2)

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0


In [11]:
## Option A - use DML to add a column :-|

# Obtain a Cursor object to execute SQL statements

cur=con.cursor()

# Add a new column to premium table

addColumn = "ALTER TABLE premium ADD COLUMN Reinsurers varchar(32)"

cur.execute(addColumn) 

sql_statement='''
select p.* from premium as p
'''
df = pd.read_sql(sql_statement,con)
df.head()

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy,Reinsurers
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0,
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0,
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0,
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0,
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0,


In [12]:
## Option B - use to_sql to add a column - much easier and more native to PANDAS way of doing things

df['Reinsurers2']='ABC Re'
df.to_sql('premium',con,if_exists='replace') 

df.head()

Unnamed: 0,PolicyNumber,FirstName,LastName,PolicyEffectiveDate,LOB,PolicyType,Premium,Coverage,ExcessCoverage,ExcessPolicy,Reinsurers,Reinsurers2
0,10880,Kurtis,Dumm,2017-07-08 00:00:00,Phy,CM,232058,1000000,3900000,96289.0,,ABC Re
1,10948,Florencia,Bilyk,2017-06-05 00:00:00,Podiatrist,CM,5132,1000000,3900000,99671.0,,ABC Re
2,10962,Taisha,Whack,2017-09-19 00:00:00,Dentist,OCC,11308,1000000,3900000,91809.0,,ABC Re
3,11028,Yun,Linely,2017-10-11 00:00:00,Dentist,OCC,13381,1000000,3900000,94102.0,,ABC Re
4,10244,Wonda,Hallsworth,2017-08-07 00:00:00,Dentist,OCC,13330,1000000,3900000,92319.0,,ABC Re


In [None]:
# Closing the connection to the database file
con.close()