# Lab | SQL Rolling calculations

In this lab, you will be using the [Sakila](https://dev.mysql.com/doc/sakila/en/) database of movie rentals.

### Instructions

1. Get number of monthly active customers.
2. Active users in the previous month.
3. Percentage change in the number of active customers.
4. Retained customers every month.

In [2]:
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import getpass

In [3]:
password = getpass.getpass()
connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'
engine = create_engine(connection_string)
%load_ext sql
%sql {connection_string}

 ·············


'Connected: root@bank'

1. Get number of monthly active customers.

In [16]:
%%sql

with customer_activity as (
  select customer_id, convert(rental_date, date) as Activity_date,
  date_format(convert(rental_date,date), '%Y') as Activity_year,
  date_format(convert(rental_date,date), '%M') as Activity_Month,
  date_format(convert(rental_date,date), '%m') as Activity_Month_Number
  from sakila.rental
)
select count(distinct customer_id) as Active_users, Activity_year, Activity_Month
from customer_activity
group by Activity_year, Activity_Month_Number
order by Activity_year, Activity_Month_Number


 * mysql+pymysql://root:***@localhost/bank
6 rows affected.


Active_users,Activity_year,Activity_Month
520,2005,May
590,2005,June
599,2005,July
599,2005,August
158,2006,February
1,2022,October


2. Active users in the previous month.


In [8]:
%%sql
with customer_activity as (
  select customer_id, convert(rental_date, date) as Activity_date,
  date_format(convert(rental_date,date), '%Y') as Activity_year,
  date_format(convert(rental_date,date), '%M') as Activity_Month,
  date_format(convert(rental_date,date), '%m') as Activity_Month_Number
  from sakila.rental
),
monthly_active_users as (
  select count(distinct customer_id) as Active_users, Activity_year, Activity_Month_Number
  from customer_activity
  group by Activity_year, Activity_Month_Number
  order by Activity_year, Activity_Month_Number
),
cte_activity as (
  select Active_users, lag(Active_users,1) over (partition by Activity_year) as last_month, Activity_year, Activity_Month_Number  from monthly_active_users
)
select * from cte_activity
where last_month is not null

 * mysql+pymysql://root:***@localhost/bank
3 rows affected.


Active_users,last_month,Activity_year,Activity_Month_Number
590,520,2005,6
599,590,2005,7
599,599,2005,8


3. Percentage change in the number of active customers.


In [20]:
%%sql
with customer_activity as (
  select customer_id, convert(rental_date,date) as Activity_date,
  date_format(convert(rental_date,date), '%Y') as Activity_year,
  date_format(convert(rental_date,date), '%M') as Activity_Month,
  date_format(convert(rental_date,date), '%m') as Activity_Month_Number
  from sakila.rental
),
monthly_active_users as (
  select count(distinct customer_id) as Active_users, Activity_year, Activity_Month_Number
  from customer_activity
  group by Activity_year, Activity_Month_Number
  order by Activity_year, Activity_Month_Number
),
cte_activity as (
  select Active_users, lag(Active_users,1) over (partition by Activity_year) as last_month, Activity_year, Activity_Month_Number
  from monthly_active_users
)
select (Active_users-last_month)/Active_users*100 as percentage_change, Activity_year, Activity_Month_Number
from cte_activity
where last_month is not null;

 * mysql+pymysql://root:***@localhost/bank
3 rows affected.


percentage_change,Activity_year,Activity_Month_Number
11.8644,2005,6
1.5025,2005,7
0.0,2005,8


4. Retained customers every month.

In [21]:
%%sql
with customer_activity as (
  select customer_id, convert(rental_date, date) as Activity_date,
  date_format(convert(rental_date,date), '%M') as Activity_Month,
  date_format(convert(rental_date,date), '%Y') as Activity_year,
  convert(date_format(convert(rental_date,date), '%m'), UNSIGNED) as month_number
  from sakila.rental
),
distinct_users as (
  select distinct customer_id , Activity_month, Activity_year, month_number
  from customer_activity
)    
select count(distinct d1.customer_id) as Retained_customers, d1.Activity_month, d1.Activity_year
from distinct_users d1
join distinct_users d2
on d1.customer_id = d2.customer_id and d1.month_number = d2.month_number + 1
group by d1.Activity_month, d1.Activity_year
order by d1.Activity_year, d1.month_number;

 * mysql+pymysql://root:***@localhost/bank
3 rows affected.


Retained_customers,Activity_month,Activity_year
512,June,2005
590,July,2005
599,August,2005
