### Load SQL Magics

In [1]:
%load_ext sql

### Load sqlalchemy to create a local environment of PostgreSQL server

In [2]:
from sqlalchemy import create_engine
import pandas as pd

In [3]:
# %sql dialect+driver://username:password@host:port/database
%sql postgresql://jovyan:postgres@localhost:8765/rsm-docker

'Connected: jovyan@rsm-docker'

### Create engine

In [4]:
engine = create_engine("postgresql://jovyan:postgres@localhost:8765/rsm-docker")

In [5]:
engine

Engine(postgresql://jovyan:***@localhost:8765/rsm-docker)

### Get files as data frames

In [6]:
accounts = pd.read_excel("data/accounts.xlsx")
orders = pd.read_excel("data/orders.xlsx")
region = pd.read_excel("data/region.xlsx")
sales_reps = pd.read_excel("data/sales_reps.xlsx")
web_events = pd.read_excel("data/web_events.xlsx")

### Data ingestion to tables in a database (here - rsm-docker)

In [7]:
accounts.to_sql("accounts", engine, if_exists="replace")
orders.to_sql("orders", engine, if_exists="replace")
region.to_sql("region", engine, if_exists="replace")
sales_reps.to_sql("sales_reps", engine, if_exists="replace")
web_events.to_sql("web_events", engine, if_exists="replace")

In [8]:
engine.table_names()

['web_events', 'orders', 'region', 'sales_reps', 'ACCOUNTS', 'accounts']

## Queries

### 1. Use DISTINCT to test if there are any accounts associated with more than one region.

In [10]:
%%sql

SELECT DISTINCT a.name, r.name as reg_name
FROM accounts a
JOIN sales_reps s
ON s.id = a.sales_rep_id
JOIN region r
ON r.id = s.region_id;

 * postgresql://jovyan:***@localhost:8765/rsm-docker
351 rows affected.


name,reg_name
Sempra Energy,West
Texas Instruments,Southeast
Southern,Southeast
Voya Financial,West
Gap,Southeast
FirstEnergy,Southeast
Johnson Controls,Northeast
Group 1 Automotive,West
Oracle,Northeast
Northrop Grumman,Midwest


In [11]:
%%sql

SELECT COUNT(*)
FROM accounts a
JOIN sales_reps s
ON s.id = a.sales_rep_id
JOIN region r
ON r.id = s.region_id;

 * postgresql://jovyan:***@localhost:8765/rsm-docker
1 rows affected.


count
351


* Even without distinct the number of row entries is same which implies there are no accounts associated with more than one region. 


### 2. Have any sales reps worked on more than one account?

In [12]:
%%sql

SELECT s.name, COUNT(*) no_accounts
FROM accounts a
JOIN sales_reps s
ON s.id = a.sales_rep_id
GROUP BY s.name
ORDER BY no_accounts;

 * postgresql://jovyan:***@localhost:8765/rsm-docker
50 rows affected.


name,no_accounts
Ernestine Pickron,3
Cordell Rieder,3
Nakesha Renn,3
Silvana Virden,3
Retha Sears,3
Ayesha Monica,3
Akilah Drinkard,3
Shawanda Selke,3
Kathleen Lalonde,4
Julie Starr,4


In [13]:
%%sql

SELECT DISTINCT s.id, s.name
FROM sales_reps s;

 * postgresql://jovyan:***@localhost:8765/rsm-docker
50 rows affected.


id,name
321870,Derrick Boggess
321500,Samuel Racine
321580,Sibyl Lauria
321550,Lavera Oles
321860,Saran Ram
321820,Dorotha Seawell
321700,Debroah Wardle
321740,Charles Bidwell
321780,Julie Starr
321600,Ernestine Pickron


* All of the sales reps worked on more than 1 account.