# PART 1 – DATA

a- Create an SQL query that provides one of our main business KPIs - the number of Monthly
Active Users (MAU) per day per offer and per country
    
    A user is defined as Active if he streamed at least one track longer than 30 seconds
    
    MAU are based on a rolling 30 days period

b- Create an SQL query that provides the list of the 10 best streamers of Metronomy in 2016 in
France

c- Create an SQL query that provides the number of days between the inscription and the 100th
stream of all the Brazilian users registered in January 2018

# Import Libraries

In [1]:
import sqlite3
import pandas as pd

# Connection to the database

In [2]:
con = sqlite3.connect('music.db')

# 10 best streamers of Metronomy in 2016 in France (streamed on French territory)

## Top streamers as time listened

In [3]:
metronomy_2016_FR_top_10_time = pd.read_sql_query("\
                        SELECT U.User_id, U.Email, U.Country, U.Gender, \
                        SUM(Streams_duration) as Total_listen FROM STREAMS as ST \
                        INNER JOIN SONGS as SG \
                        ON SG.Sng_id = ST.Sng_id \
                        INNER JOIN ARTISTS as A \
                        ON A.Artist_id = SG.Artist_id \
                        INNER JOIN USERS as U \
                        ON U.User_id = ST.User_id \
                        WHERE A.Artist_name = 'Metronomy' \
                        AND ST.Country = 'FR' \
                        AND strftime('%Y', ST.Stream_date)= '2016' \
                        GROUP by U.User_id, U.Email, U.Country, U.Gender \
                        ORDER BY Total_listen DESC LIMIT 10", con)

In [4]:
metronomy_2016_FR_top_10_time

Unnamed: 0,User_id,Email,Country,Gender,Total_listen
0,51,email51@gmail.com,FR,M,587
1,5,email5@gmail.com,BR,M,492
2,45,email45@gmail.com,GB,F,477
3,39,email39@gmail.com,FR,F,444
4,4,email4@gmail.com,DE,F,320
5,70,email70@gmail.com,BR,F,315
6,75,email75@gmail.com,DE,F,301
7,65,email65@gmail.com,BR,F,294
8,28,email28@gmail.com,DE,M,292
9,55,email55@gmail.com,GB,M,292


## Top 10 streamers as number of listens

In [5]:
metronomy_2016_FR_top_10_count = pd.read_sql_query("\
                        SELECT U.User_id, U.Email, U.Country, U.Gender, \
                        COUNT(Streams_duration) as Total_listen FROM STREAMS as ST \
                        INNER JOIN SONGS as SG \
                        ON SG.Sng_id = ST.Sng_id \
                        INNER JOIN ARTISTS as A \
                        ON A.Artist_id = SG.Artist_id \
                        INNER JOIN USERS as U \
                        ON U.User_id = ST.User_id \
                        WHERE A.Artist_name = 'Metronomy' \
                        AND ST.Country = 'FR' \
                        AND strftime('%Y', ST.Stream_date)= '2016' \
                        GROUP by U.User_id, U.Email, U.Country, U.Gender \
                        ORDER BY Total_listen DESC LIMIT 10", con)

In [6]:
metronomy_2016_FR_top_10_count

Unnamed: 0,User_id,Email,Country,Gender,Total_listen
0,5,email5@gmail.com,BR,M,2
1,22,email22@gmail.com,FR,M,2
2,39,email39@gmail.com,FR,F,2
3,43,email43@gmail.com,FR,M,2
4,45,email45@gmail.com,GB,F,2
5,51,email51@gmail.com,FR,M,2
6,82,email82@gmail.com,DE,F,2
7,1,email1@gmail.com,FR,M,1
8,2,email2@gmail.com,BR,F,1
9,4,email4@gmail.com,DE,F,1


In [29]:
df = pd.read_sql_query("""\
                        SELECT R.Stream_date - U.Inscription_date FROM 
                        (SELECT User_id, Stream_date, \
                        RANK() OVER(PARTITION BY User_id ORDER BY Stream_date ASC) RANK FROM STREAMS) R \
                        INNER JOIN USERS U
                        ON R.User_ID = U.User_id
                        WHERE R.RANK = 100
""",con)

In [30]:
df

Unnamed: 0,R.Stream_date - U.Inscription_date
0,1
1,6
2,1
3,3
4,0
5,2
6,7
7,4
8,8
9,5
