### Load SQL Magics

In [4]:
%load_ext sql

### Load sqlalchemy to create a local environment of PostgreSQL server

In [5]:
from sqlalchemy import create_engine
import pandas as pd

In [6]:
# %sql dialect+driver://username:password@host:port/database
%sql postgresql://jovyan:postgres@localhost:8765/rsm-docker

'Connected: jovyan@rsm-docker'

### Create engine

In [7]:
engine = create_engine("postgresql://jovyan:postgres@localhost:8765/rsm-docker")

In [8]:
engine

Engine(postgresql://jovyan:***@localhost:8765/rsm-docker)

### Get files as data frames

In [9]:
accounts = pd.read_excel("data/accounts.xlsx")
orders = pd.read_excel("data/orders.xlsx")
region = pd.read_excel("data/region.xlsx")
sales_reps = pd.read_excel("data/sales_reps.xlsx")
web_events = pd.read_excel("data/web_events.xlsx")

### Data ingestion to tables in a database (here - rsm-docker)

In [10]:
accounts.to_sql("accounts", engine, if_exists="replace")
orders.to_sql("orders", engine, if_exists="replace")
region.to_sql("region", engine, if_exists="replace")
sales_reps.to_sql("sales_reps", engine, if_exists="replace")
web_events.to_sql("web_events", engine, if_exists="replace")

In [11]:
engine.table_names()

['accounts', 'orders', 'region', 'sales_reps', 'web_events']

## Queries

### 1. Write a query that uses UNION ALL on two instances (and selecting all columns) of the accounts table.

In [12]:
# UNION aLL appends all data rows from the two tables inspite of repeated entries */

In [16]:
%%sql

SELECT *
FROM accounts a1

UNION ALL

SELECT *
FROM accounts a2

 * postgresql://jovyan:***@localhost:8765/rsm-docker
100 rows affected.


index,id,name,website,lat,long,primary_poc,sales_rep_id
0,1001,Walmart,www.walmart.com,40.23849561,-75.10329704,Tamara Tuma,321500
1,1011,Exxon Mobil,www.exxonmobil.com,41.1691563,-73.84937379,Sung Shields,321510
2,1021,Apple,www.apple.com,42.29049481,-76.08400942,Jodee Lupo,321520
3,1031,Berkshire Hathaway,www.berkshirehathaway.com,40.94902131,-75.76389759,Serafina Banda,321530
4,1041,McKesson,www.mckesson.com,42.21709326,-75.28499823,Angeles Crusoe,321540
5,1051,UnitedHealth Group,www.unitedhealthgroup.com,40.08792542,-75.57569396,Savanna Gayman,321550
6,1061,CVS Health,www.cvshealth.com,41.46779585,-73.76763638,Anabel Haskell,321560
7,1071,General Motors,www.gm.com,40.80551762,-76.7101814,Barrie Omeara,321570
8,1081,Ford Motor,www.ford.com,41.113942,-75.85422452,Kym Hagerman,321580
9,1091,AT&T,www.att.com,42.4974627,-74.90271225,Jamel Mosqueda,321590


### 2. Add a WHERE clause to each of the tables that you unioned in the query above, filtering the first table where name equals Walmart and filtering the second table where name equals Disney.

In [15]:
%%sql

SELECT *
FROM accounts a1
WHERE name = 'Walmart'

UNION 

SELECT *
FROM accounts a2
WHERE name = 'Disney';

 * postgresql://jovyan:***@localhost:8765/rsm-docker
2 rows affected.


index,id,name,website,lat,long,primary_poc,sales_rep_id
52,1521,Disney,www.disney.com,41.87879976,-74.81102607,Timika Mistretta,321600
0,1001,Walmart,www.walmart.com,40.23849561,-75.10329704,Tamara Tuma,321500


### 3. Perform the union in your first query (under the Appending Data via UNION header) in a common table expression and name it double_accounts. Then do a COUNT the number of times a name appears in the double_accounts table. If you do this correctly, your query results should have a count of 2 for each name.

In [17]:
# Performing operations on the combined dataset, limiting rows to 100

In [19]:
%%sql

WITH double_accounts AS 
(SELECT *
FROM accounts a1

UNION ALL

SELECT *
FROM accounts a2)

SELECT name, COUNT(*)
FROM double_accounts
GROUP BY 1
ORDER BY 2 DESC
LIMIT 100;

 * postgresql://jovyan:***@localhost:8765/rsm-docker
100 rows affected.


name,count
Microsoft,2
Monsanto,2
Dean Foods,2
KKR,2
Performance Food Group,2
Paccar,2
USAA,2
CST Brands,2
Ally Financial,2
Amgen,2
