**Querying postgreSQL in Jupyter notebook**


# Setup and custom tables

In [1]:
import pandas as pd
import sqlalchemy
import sqlalchemy_utils
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

In [2]:
# Define a database name
# Set your postgres username
dbname = "baseball"
username = "lacar"  # change this to your username

# Working with PostgreSQL in Python
# Connect to make queries using psycopg2
con = None
con = psycopg2.connect(database=dbname, user=username)

# Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine("postgres://%s@localhost/%s" % (username, dbname))
print(engine.url)

postgres://lacar@localhost/baseball


## Using CTE

In [3]:
# Just use a date difference less than 30 days

sql_query = """
WITH posts (user_id, post, time)
AS (VALUES
(1, 'A', CAST('2-14-20' AS date)),
(2, 'B', CAST('2-14-20' AS date)),
(3, 'C', CAST('2-15-20' AS date)),
(1, 'B', CAST('2-15-20' AS date)),
(2, 'A', CAST('2-16-20' AS date)),
(3, 'B', CAST('2-17-20' AS date)),
(1, 'D', CAST('2-18-20' AS date)))

SELECT *
FROM posts
"""
df_query = pd.read_sql_query(sql_query,con)    
df_query

Unnamed: 0,user_id,post,time
0,1,A,2020-02-14
1,2,B,2020-02-14
2,3,C,2020-02-15
3,1,B,2020-02-15
4,2,A,2020-02-16
5,3,B,2020-02-17
6,1,D,2020-02-18


## Using pandas

### Dates

In [4]:
# Generate random date ranges
# From https://towardsdatascience.com/mastering-dates-and-timestamps-in-pandas-and-python-in-general-5b8c6edcc50c

import random
import time
from dateutil.parser import parse
def str_time_prop(start, end, format, prop):
    stime = time.mktime(time.strptime(start, format))
    etime = time.mktime(time.strptime(end, format))
    ptime = stime + prop * (etime - stime)
    return time.strftime(format, time.localtime(ptime))

selected_format = '%Y-%m-%d %H:%M:%S'

def random_date(start, end, prop):
    return parse(str_time_prop(start, end, selected_format, prop)).strftime(selected_format)

def make_date(begin_dt, end_dt):
    return random_date(begin_dt, end_dt, random.random())

# e.g. make_date("2020-01-01 13:40:00", "2020-01-14 14:50:00")

# Generate dates (my function)
def generate_dates(n_dates, begin_dt, end_dt):
    return sorted([make_date(begin_dt, end_dt) for x in range(n_dates)])

### Names

In [5]:
# Generate names (my function)
def generate_name_list(n_names):
    import names   # needed to pip install
    name_list = list()
    for i in range(n_names):
        name_list.append(names.get_first_name())
    return name_list

### Random values within a range

In [6]:
def generate_values(n_vals, lowest, highest):
    import random
    random_vals = random.sample(range(lowest, highest), n_vals)
    return random_vals

In [7]:
# Also try random.random()

In [8]:
generate_values(10, 1, 20)

[6, 4, 2, 13, 17, 11, 8, 10, 3, 9]

### Multipurpose 3-digit codes

In [9]:
# Generate 3-digit codes (e.g. city ids) (my function)
def generate_codes(n_codes):
    # 3 digits between 110 and 999 without repeating
    import random
    code_ids = random.sample(range(110, 1000), n_codes)
    return code_ids

### Multipurpose custom values

In [10]:
# Generate random list following input of a set of values to choose
def generate_custom_vals(list2consider, n_items):
    custom_list = np.random.choice(list2consider, size=n_items, replace=True).tolist()
    return custom_list

### Example

In [11]:
my_date_list = generate_dates(10, "2020-01-01 13:40:00", "2020-01-14 14:50:00")
my_name_list = generate_name_list(10)
my_city_codes = generate_codes(10)

# Custom list 1
my_list2consider = ['desktop-browser','mobile-browser','ios-native','android-native']
my_list2consider4table = generate_custom_vals(my_list2consider, 10)

# Custom list 2
my_list2consider = ['US', 'Canada', 'Mexico']
my_countries4table = generate_custom_vals(my_list2consider, 10)

In [12]:
col_1 = range(1, 11)
col_2 = pd.to_datetime(my_date_list)
col_3 = my_list2consider4table
col_4 = my_countries4table
table1 = pd.DataFrame([col_1, col_2, col_3, col_4]).T
table1.columns = ['user_id', 'join_ts', 'join_client', 'country']

table1

# Temp table created here that I'll just over-write with each new problem
# table1.to_sql('user_summary', engine, if_exists='replace')


Unnamed: 0,user_id,join_ts,join_client,country
0,1,2020-01-04 09:36:15,desktop-browser,Mexico
1,2,2020-01-04 13:49:17,ios-native,Canada
2,3,2020-01-07 02:19:36,mobile-browser,Mexico
3,4,2020-01-07 20:11:17,mobile-browser,US
4,5,2020-01-08 07:05:53,desktop-browser,Mexico
5,6,2020-01-10 17:25:59,mobile-browser,Canada
6,7,2020-01-11 22:10:44,desktop-browser,Canada
7,8,2020-01-11 23:08:14,desktop-browser,US
8,9,2020-01-12 08:17:45,mobile-browser,Mexico
9,10,2020-01-14 13:55:38,mobile-browser,US


# 1205. Monthly Transactions II

https://leetcode.com/problems/monthly-transactions-ii/

Write an SQL query to find for each month and country, the number of approved transactions and their total amount, the number of chargebacks and their total amount.

Note: In your query, given the month and country, ignore rows with all zeros.

The query result format is in the following example:

Transactions table:
+------+---------+----------+--------+------------+
| id   | country | state    | amount | trans_date |
+------+---------+----------+--------+------------+
| 101  | US      | approved | 1000   | 2019-05-18 |
| 102  | US      | declined | 2000   | 2019-05-19 |
| 103  | US      | approved | 3000   | 2019-06-10 |
| 104  | US      | approved | 4000   | 2019-06-13 |
| 105  | US      | approved | 5000   | 2019-06-15 |
+------+---------+----------+--------+------------+

Chargebacks table:
+------------+------------+
| trans_id   | trans_date |
+------------+------------+
| 102        | 2019-05-29 |
| 101        | 2019-06-30 |
| 105        | 2019-09-18 |
+------------+------------+

Result table:
+----------+---------+----------------+-----------------+-------------------+--------------------+
| month    | country | approved_count | approved_amount | chargeback_count  | chargeback_amount  |
+----------+---------+----------------+-----------------+-------------------+--------------------+
| 2019-05  | US      | 1              | 1000            | 1                 | 2000               |
| 2019-06  | US      | 3              | 12000           | 1                 | 1000               |
| 2019-09  | US      | 0              | 0               | 1                 | 5000               |
+----------+---------+----------------+-----------------+-------------------+--------------------+

In [14]:
# Checking created tables
sql_query = """
WITH Transactions (id, country, state, amount, trans_date)
AS (VALUES
(101, 'US', 'approved', 1000, CAST('2019-05-18' AS date )),
(102, 'US', 'declined', 2000, CAST('2019-05-19' AS date )),
(103, 'US', 'approved', 3000, CAST('2019-06-10' AS date )),
(104, 'US', 'approved', 4000, CAST('2019-06-13' AS date )),
(105, 'US', 'approved', 5000, CAST('2019-06-15' AS date ))),

Chargebacks (trans_id, trans_date)
AS (VALUES
(102, CAST('2019-05-29' AS date )),
(101, CAST('2019-06-30' AS date )),
(105, CAST('2019-09-18' AS date )))
 
SELECT *
FROM Chargebacks;
"""
pd.read_sql_query(sql_query,con)


Unnamed: 0,trans_id,trans_date
0,102,2019-05-29
1,101,2019-06-30
2,105,2019-09-18


In [20]:
# Checking created tables
sql_query = """
WITH Transactions (id, country, state, amount, trans_date)
AS (VALUES
(101, 'US', 'approved', 1000, CAST('2019-05-18' AS date )),
(102, 'US', 'declined', 2000, CAST('2019-05-19' AS date )),
(103, 'US', 'approved', 3000, CAST('2019-06-10' AS date )),
(104, 'US', 'approved', 4000, CAST('2019-06-13' AS date )),
(105, 'US', 'approved', 5000, CAST('2019-06-15' AS date ))),

Chargebacks (trans_id, trans_date)
AS (VALUES
(102, CAST('2019-05-29' AS date )),
(101, CAST('2019-06-30' AS date )),
(105, CAST('2019-09-18' AS date )))
 
-- month    | country | approved_count | approved_amount | chargeback_count  | chargeback_amount 
-- extract date
-- group by country
-- use a case when for approved count
-- need to join on trans_id for the chargeback amount, but the count/amount is assigned based on trans_date
-- then will need to re-join on month


SELECT EXTRACT(MONTH FROM trans_date) AS month,
       COUNT(amount),
       SUM(CASE WHEN state='approved' THEN amount
                ELSE 0 END)
FROM Transactions
GROUP BY EXTRACT(MONTH FROM trans_date);


"""
pd.read_sql_query(sql_query,con)


Unnamed: 0,month,count,sum
0,6.0,3,12000
1,5.0,2,1000


# 7/9/20

You have a database consisting of a single table table of 9 students listed in alphabetical order. Each student has a class ID that is also alphabetical. Write a SQL query so that each consecutive pair of students are assigned the other students ID (Ex: Students 1 and 2 swap IDs, Students 3 and 4 swap IDs, etcâ€¦) 
	Table is Student, Columns are Name and ID


In [3]:
# Just use a date difference less than 30 days

sql_query = """
WITH Student (id, name)
AS (VALUES
(1, 'A'),
(2, 'B'),
(3, 'C'),
(4, 'D'),
(5, 'E'),
(6, 'F'),
(7, 'G'),
(8, 'H'),
(9, 'i'))

SELECT *
FROM Student
"""
df_query = pd.read_sql_query(sql_query,con)    
df_query

Unnamed: 0,id,name
0,1,A
1,2,B
2,3,C
3,4,D
4,5,E
5,6,F
6,7,G
7,8,H
8,9,i


SELECT name,
       CASE WHEN id IN (1,3,5,7) THEN id+1
            WHEN id IN (2,4,6,8) THEN id-1
            ELSE id END AS new_id 
FROM Student

In [7]:
# Just use a date difference less than 30 days

sql_query = """
WITH Student (id, name)
AS (VALUES
(1, 'A'),
(2, 'B'),
(3, 'C'),
(4, 'D'),
(5, 'E'),
(6, 'F'),
(7, 'G'),
(8, 'H'),
(9, 'i'))

SELECT name,
       CASE WHEN id IN (1,3,5,7) THEN id+1
            WHEN id IN (2,4,6,8) THEN id-1
            ELSE id END AS new_id 
FROM Student
"""
df_query = pd.read_sql_query(sql_query,con)    
df_query

Unnamed: 0,name,new_id
0,A,2
1,B,1
2,C,4
3,D,3
4,E,6
5,F,5
6,G,8
7,H,7
8,i,9


SELECT name,
       CASE WHEN id IN (1,3,5,7) THEN id+1
            WHEN id IN (2,4,6,8) THEN id-1
            ELSE id END AS new_id 
FROM Student

SELECT name,
       CASE WHEN id < 9 AND id % 2 <> 0 THEN id+1
            WHEN id < 9 AND id % 2 = 0 THEN id-1
            ELSE id END AS new_id 
FROM Student

In [8]:
# Just use a date difference less than 30 days

sql_query = """
WITH Student (id, name)
AS (VALUES
(1, 'A'),
(2, 'B'),
(3, 'C'),
(4, 'D'),
(5, 'E'),
(6, 'F'),
(7, 'G'),
(8, 'H'),
(9, 'i'))

SELECT name,
       CASE WHEN id < 9 AND id % 2 <> 0 THEN id+1
            WHEN id < 9 AND id % 2 = 0 THEN id-1
            ELSE id END AS new_id 
FROM Student
"""
df_query = pd.read_sql_query(sql_query,con)    
df_query

Unnamed: 0,name,new_id
0,A,2
1,B,1
2,C,4
3,D,3
4,E,6
5,F,5
6,G,8
7,H,7
8,i,9


In [None]:
# Mike's solution 

t1 AS (
SELECT *, row_number() OVER (ORDER BY name) AS row_number
FROM students )

SELECT name, CASE WHEN row_number = max(row_number) THEN id 
ELSE WHEN mod(row_number) = 0 THEN lag(id, 1) 
ELSE lead(id, 1) END AS new_id
FROM t1


# --

# --