In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
display_table('Transactions')

+-----+---------+----------+--------+------------+
|  id | country |  state   | amount | trans_date |
+-----+---------+----------+--------+------------+
| 121 |    US   | approved |  1000  | 2018-12-18 |
| 122 |    US   | declined |  2000  | 2018-12-19 |
| 123 |    US   | approved |  2000  | 2019-01-01 |
| 124 |    DE   | approved |  2000  | 2019-01-07 |
+-----+---------+----------+--------+------------+


### Write an SQL query to find for each month and country, the number of transactions and their total amount, the number of approved transactions and their total amount.

```
Output: 
+----------+---------+-------------+----------------+--------------------+-----------------------+
| month    | country | trans_count | approved_count | trans_total_amount | approved_total_amount |
+----------+---------+-------------+----------------+--------------------+-----------------------+
| 2018-12  | US      | 2           | 1              | 3000               | 1000                  |
| 2019-01  | US      | 1           | 1              | 2000               | 2000                  |
| 2019-01  | DE      | 1           | 1              | 2000               | 2000                  |
+----------+---------+-------------+----------------+--------------------+-----------------------+
```


In [3]:
%%sql 

SELECT 
    SUBSTRING(trans_date, 1, 7) AS month 
FROM Transactions;

month
2018-12
2018-12
2019-01
2019-01


In [4]:
%%sql 

SELECT 
    SUBSTRING(trans_date, 1, 7) AS month,
    country,
    COUNT(*) AS trans_count,
    SUM(amount) AS trans_total_amount
FROM Transactions
GROUP BY country, month

month,country,trans_count,trans_total_amount
2018-12,US,2,3000
2019-01,US,1,2000
2019-01,DE,1,2000


In [5]:
%%sql 

SELECT 
    SUBSTRING(trans_date, 1, 7) AS month,
    country,
    COUNT(*) AS trans_count,
    SUM(CASE WHEN state = 'approved' THEN 1 ELSE 0 END) AS approved_count,
    SUM(amount) AS trans_total_amount,
    SUM(CASE WHEN state = 'approved' THEN amount ELSE 0 END) AS approved_total_amount
FROM Transactions
GROUP BY country, month;

month,country,trans_count,approved_count,trans_total_amount,approved_total_amount
2018-12,US,2,1,3000,1000
2019-01,US,1,1,2000,2000
2019-01,DE,1,1,2000,2000


## Using CTE

In [6]:
%%sql 

WITH monthly_stats AS (
    SELECT
        SUBSTRING(trans_date, 1, 7) AS month,
        country,
        COUNT(*) AS trans_count,
        SUM(amount) AS trans_total_amount
    FROM Transactions
    GROUP BY month, country
),
approved_stats AS (
    SELECT
        SUBSTRING(trans_date, 1, 7) AS month,
        country,
        COUNT(*) AS approved_count,
        SUM(amount) AS approved_total_amount
    FROM Transactions
    WHERE state = 'approved'
    GROUP BY month, country
)
SELECT
    monthly_stats.month,
    monthly_stats.country,
    monthly_stats.trans_count,
    approved_stats.approved_count,
    monthly_stats.trans_total_amount,
    approved_stats.approved_total_amount
FROM monthly_stats
JOIN approved_stats ON monthly_stats.month = approved_stats.month
    AND monthly_stats.country = approved_stats.country;


month,country,trans_count,approved_count,trans_total_amount,approved_total_amount
2018-12,US,2,1,3000,1000
2019-01,US,1,1,2000,2000
2019-01,DE,1,1,2000,2000


# Using Pandas

In [7]:
import pandas as pd 

In [8]:
transactions_query = %sql SELECT * FROM Transactions # type: ignore 
transactions_query

id,country,state,amount,trans_date
121,US,approved,1000,2018-12-18
122,US,declined,2000,2018-12-19
123,US,approved,2000,2019-01-01
124,DE,approved,2000,2019-01-07


In [9]:
transactions_query[0]

(121, 'US', 'approved', 1000, datetime.date(2018, 12, 18))

In [10]:
type(transactions_query)

sql.run.ResultSet

In [11]:
transactions_df = transactions_query.DataFrame()
display(transactions_df)

Unnamed: 0,id,country,state,amount,trans_date
0,121,US,approved,1000,2018-12-18
1,122,US,declined,2000,2018-12-19
2,123,US,approved,2000,2019-01-01
3,124,DE,approved,2000,2019-01-07


In [12]:
transactions_df['trans_date'].astype(str)

0    2018-12-18
1    2018-12-19
2    2019-01-01
3    2019-01-07
Name: trans_date, dtype: object

In [13]:
transactions_df['trans_date'].astype(str).apply(lambda x: x[:7])

0    2018-12
1    2018-12
2    2019-01
3    2019-01
Name: trans_date, dtype: object

In [14]:
transactions_df['month'] = transactions_df['trans_date'].astype(str).apply(lambda x: x[:7])
transactions_df

Unnamed: 0,id,country,state,amount,trans_date,month
0,121,US,approved,1000,2018-12-18,2018-12
1,122,US,declined,2000,2018-12-19,2018-12
2,123,US,approved,2000,2019-01-01,2019-01
3,124,DE,approved,2000,2019-01-07,2019-01


In [15]:
transactions_df.drop('trans_date', axis=1, inplace=True)
transactions_df

Unnamed: 0,id,country,state,amount,month
0,121,US,approved,1000,2018-12
1,122,US,declined,2000,2018-12
2,123,US,approved,2000,2019-01
3,124,DE,approved,2000,2019-01


In [16]:
transactions_df.groupby(['country', 'state']).agg({'amount': 'sum', 'month': 'size'})

Unnamed: 0_level_0,Unnamed: 1_level_0,amount,month
country,state,Unnamed: 2_level_1,Unnamed: 3_level_1
DE,approved,2000,1
US,approved,3000,2
US,declined,2000,1


In [17]:
transactions_df.groupby(['country', 'state']).agg({'amount': 'sum', 'month': 'size'}).reset_index()

Unnamed: 0,country,state,amount,month
0,DE,approved,2000,1
1,US,approved,3000,2
2,US,declined,2000,1


In [18]:
grouped_df_1 = transactions_df \
            .groupby(['country', 'state']) \
            .agg({'amount': 'sum', 'month': 'size'}) \
            .reset_index()
grouped_df_1

Unnamed: 0,country,state,amount,month
0,DE,approved,2000,1
1,US,approved,3000,2
2,US,declined,2000,1


In [19]:
grouped_df_1.rename(columns={'amount':'trans_total_amount', 'month': 'trans_count'}, inplace=True)

grouped_df_1

Unnamed: 0,country,state,trans_total_amount,trans_count
0,DE,approved,2000,1
1,US,approved,3000,2
2,US,declined,2000,1


In [20]:
filtered_df = transactions_df.query("state == 'approved'")
filtered_df

Unnamed: 0,id,country,state,amount,month
0,121,US,approved,1000,2018-12
2,123,US,approved,2000,2019-01
3,124,DE,approved,2000,2019-01


In [21]:
filtered_df.groupby(['country', 'month']).agg({'amount': 'sum', 'month': 'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,amount,month
country,month,Unnamed: 2_level_1,Unnamed: 3_level_1
DE,2019-01,2000,1
US,2018-12,1000,1
US,2019-01,2000,1


In [22]:
grouped_df_2 = filtered_df.groupby(['country', 'month']) \
                .agg({'amount': 'sum', 'month': 'size'}) \

grouped_df_2

Unnamed: 0_level_0,Unnamed: 1_level_0,amount,month
country,month,Unnamed: 2_level_1,Unnamed: 3_level_1
DE,2019-01,2000,1
US,2018-12,1000,1
US,2019-01,2000,1


In [23]:
grouped_df_2.rename(columns=({'month': 'approved_count', 'amount': 'approved_total_amount'}))

Unnamed: 0_level_0,Unnamed: 1_level_0,approved_total_amount,approved_count
country,month,Unnamed: 2_level_1,Unnamed: 3_level_1
DE,2019-01,2000,1
US,2018-12,1000,1
US,2019-01,2000,1


In [24]:
grouped_df_2 = grouped_df_2 \
                .rename(columns=({'month': 'approved_count', 'amount': 'approved_total_amount'})) \
                .reset_index()
                
grouped_df_2

Unnamed: 0,country,month,approved_total_amount,approved_count
0,DE,2019-01,2000,1
1,US,2018-12,1000,1
2,US,2019-01,2000,1


In [28]:
concatted_df = pd.concat([grouped_df_1, grouped_df_2], axis=1)
concatted_df 

Unnamed: 0,country,state,trans_total_amount,trans_count,country.1,month,approved_total_amount,approved_count
0,DE,approved,2000,1,DE,2019-01,2000,1
1,US,approved,3000,2,US,2018-12,1000,1
2,US,declined,2000,1,US,2019-01,2000,1


In [31]:
columns_in_order = ['month', 'country', 'trans_count', 'approved_count', 'trans_total_amount', 'approved_total_amount']
final_df = concatted_df[columns_in_order]
final_df

Unnamed: 0,month,country,country.1,trans_count,approved_count,trans_total_amount,approved_total_amount
0,2019-01,DE,DE,1,1,2000,2000
1,2018-12,US,US,2,1,3000,1000
2,2019-01,US,US,1,1,2000,2000
