In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [2]:
premise_file = "../Resources/LicensePremise.csv"
premise_df = pd.read_csv(premise_file)
premise_df.head()

Unnamed: 0,License Serial Number,Premises Name,License Certificate Number,License Received Date,County ID Code
0,1311660,CANA ARRIBA GROCERY NO 2 INC,,2018-06-29T00:00:00,0
1,1310023,JOHANA GROCERY & DELI CORP,,2018-04-27T00:00:00,0
2,1310024,TKO BEVERAGES LLC,,2018-04-27T00:00:00,1
3,1310024,TKO BEVERAGES LLC,,2018-04-27T00:00:00,1
4,1311663,181 LEXINGTON AVENUE BBQ LLC,,2018-06-29T00:00:00,2


In [3]:
county_file = "../Resources/CountyLicenseCount.csv"
county_df = pd.read_csv(county_file)
county_df.head()

Unnamed: 0,ID,County Name (Licensee),County ID Code,License Count
0,0,ALBANY,5,77
1,1,ALLEGANY,59,4
2,2,BRONX,0,104
3,3,BROOME,35,14
4,4,CATTARAUGUS,41,9


### Transform premise DataFrame

In [4]:
# Create a filtered dataframe from specific columns
premise_cols = ["License Serial Number", "Premises Name", "County ID Code"]
premise_transformed= premise_df[premise_cols].copy()

In [5]:
premise_transformed = premise_df[["License Serial Number", "Premises Name", "County ID Code"]].drop_duplicates()

In [6]:
# Rename the column headers
premise_transformed = premise_transformed.rename(columns={"License Serial Number": "id",
                                                          "Premises Name": "premise_name",
                                                          "County ID Code": "county_id"})

In [7]:
# Clean the data by dropping duplicates and setting the index
premise_transformed = premise_transformed.drop_duplicates()
premise_transformed.set_index("id", inplace=True)

premise_transformed.head()

Unnamed: 0_level_0,premise_name,county_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1311660,CANA ARRIBA GROCERY NO 2 INC,0
1310023,JOHANA GROCERY & DELI CORP,0
1310024,TKO BEVERAGES LLC,1
1311663,181 LEXINGTON AVENUE BBQ LLC,2
1310029,AZIZ DELI & GRILL CORP,1


### Transform county DataFrame

In [8]:
county_cols = ["ID", "County Name (Licensee)", "County ID Code", "License Count"]
county_transformed = county_df[county_cols].copy()

# Rename the column headers
county_transformed = county_transformed.rename(columns={"ID": "id",
                                                         "County Name (Licensee)": "county_name",
                                                         "License Count": "license_count",
                                                         "County ID Code": "county_id"})

# Set index
county_transformed.set_index("id", inplace=True)

county_transformed.head()

Unnamed: 0_level_0,county_name,county_id,license_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,ALBANY,5,77
1,ALLEGANY,59,4
2,BRONX,0,104
3,BROOME,35,14
4,CATTARAUGUS,41,9


### Create database connection

In [9]:
from cloudconfig import db_user, db_pwd, db_host, db_port, db_name, cust_db, great_lakes_db, gw_db
import pymysql

pymysql.install_as_MySQLdb()

In [16]:
connection_string = f"{db_user}:{db_pwd}@{db_host}:{db_port}/{cust_db}"
cust_engine = create_engine(f'mysql://{connection_string}')
cust_db_conn = cust_engine.connect()

print(f"mysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{cust_db}")

mysql://root:Dartlings!@codingbootcamp.cze9dquabwlh.us-east-2.rds.amazonaws.com:3306/customer_db


In [11]:
connection_string = f"{db_user}:{db_pwd}@{db_host}:{db_port}/{great_lakes_db}"
gl_engine = create_engine(f'mysql://{connection_string}')
great_lakes_conn = gl_engine.connect()

print(f"mysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{great_lakes_db}")

mysql://root:Dartlings!@codingbootcamp.cze9dquabwlh.us-east-2.rds.amazonaws.com:3306/great_lakes_lottery


In [13]:
connection_string = f"{db_user}:{db_pwd}@{db_host}:{db_port}/{gw_db}"
engine = create_engine(f'mysql://{connection_string}')
gw_db_conn = engine.connect()

print(f"mysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{gw_db}")

mysql://root:Dartlings!@codingbootcamp.cze9dquabwlh.us-east-2.rds.amazonaws.com:3306/gwsis


In [14]:
sba_table = pd.read_sql('SELECT * FROM sba_loan_detail', con=gw_db_conn)
sba_table.head()

Unnamed: 0,index,BorrName,BorrStreet,BorrCity,BorrState,BorrZip,BankName,BankStreet,BankCity,BankState,...,ProjectState,SBADistrictOffice,CongressionalDistrict,BusinessType,LoanStatus,PaidInFullDate,ChargeOffDate,GrossChargeOffAmount,RevolverStatus,JobsSupported
0,0,"Ladle & The Tramp, LLC",1915 N Stillwater Road,Schaumburg,IL,60004,Twin City Bank,729 Vandercook Way,Longview,WA,...,IL,ILLINOIS DISTRICT OFFICE,8.0,CORPORATION,EXEMPT,,,0,0,1
1,1,"HRG 2, Inc.",2298 Wellington Circle,Hudson,OH,44236,"PNC Bank, National Association",222 Delaware Ave,WILMINGTON,DE,...,OH,CLEVELAND DISTRICT OFFICE,13.0,CORPORATION,CANCLD,,,0,0,0
2,2,Lee Boys Enterprises LLC.,9061 Haversack Lane,Mechanicsville,VA,23116,Twin City Bank,729 Vandercook Way,Longview,WA,...,VA,RICHMOND DISTRICT OFFICE,1.0,CORPORATION,CANCLD,,,0,0,13
3,3,"KATJAM Enterprises, LLC",8168 Nancy Dr,Mentor,OH,44060,The Huntington National Bank,17 S High St,COLUMBUS,OH,...,OH,CLEVELAND DISTRICT OFFICE,14.0,CORPORATION,EXEMPT,,,0,0,0
4,4,"KATJAM Enterprises, LLC",8168 Nancy Dr,Mentor,OH,44060,The Huntington National Bank,17 S High St,COLUMBUS,OH,...,OH,CLEVELAND DISTRICT OFFICE,14.0,CORPORATION,PIF,9/30/2015,,0,1,15


In [17]:
sba_table[0:5].to_sql('sba_nick', cust_db_conn)

### Load DataFrames into database

In [None]:
premise_transformed.to_sql(name='christina', con=conn, if_exists='append', index=True)

In [None]:
county_transformed.to_sql(name='Jannette', con=conn, if_exists='append', index=True)

In [None]:
# Confirm tables
engine.table_names()

In [None]:
pd.read_sql('select * from premise', engine)