# GOALS
To create a GBQ view containing a daily state on users engagement.
- generate dates index
- generate users and dates index
- fill last_login_date for each date/user key
- fill last_consumption_start_date for each date/user key
- fill last_consumption_completion_date for each date/user key

# PACKAGES

In [1]:
import pandas as pd
from google.oauth2 import service_account
import pandas_gbq
import logging

# PARAMETERS

In [2]:
logger = logging.getLogger("pandas_gbq")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

project_id = "analytics-dev-308300"

credentials = service_account.Credentials.from_service_account_file(
    "../keys/gcp_key.json",
)

# FUNCTIONS

## generate base (users and dates) data frame

In [85]:
def get_data():
    """
    ()-->df
    """
    query = """
        SELECT DISTINCT
            user_id
        FROM
            dtm_engagement.dim_users
        WHERE
            group_id=1818
    """
    users_list = pd.read_gbq(
        query=query, credentials=credentials, project_id=project_id
    )["user_id"].tolist()

    query = """
        SELECT DISTINCT
            extraction_date, user_id, last_login
        FROM
            dtm_engagement.dim_users
        WHERE group_id=1818
    """
    login_df = pd.read_gbq(query=query, credentials=credentials, project_id=project_id)

    return users_list, login_df

In [86]:
def create_base_df(
    ls_users,
    start_date="2021-08-16",
    end_date=pd.Timestamp.today().strftime("%Y-%m-%d"),
):
    """
    (date-like, date_like, series) --> df
    Create a dataframe with one row for each combination of user and date. Date range is defined by start_date and end_date (excluded).
    """
    dates_index = (
        pd.to_datetime(
            pd.date_range(start=start_date, end=end_date, name="action_date")
        )
        .strftime("%Y-%m-%d")
        .to_list()
    )

    actions_dict = [
        {"action_date": action_date, "user_id": user}
        for action_date in dates_index
        for user in ls_users
    ]
    return pd.DataFrame(actions_dict)

## fill last_login_date for each date/user key

In [None]:
def get_last_login(actions_df, login_df):
    """
    (df,df)-->df
    """
    actions

# DATA WRANGLING

In [81]:
df_actions = base_df(get_users())

Requesting query... 
Query running...
Job ID: 75adad65-e0b5-49f4-a90a-f3afae01f140
Query done.
Processed: 71.6 KB Billed: 10.0 MB
Standard price: $0.00 USD

Got 30 rows.



In [72]:
query = """
SELECT
    extraction_date, user_id, last_login
FROM
    dtm_engagement.dim_users
WHERE group_id=1818
"""
df_login = pd.read_gbq(query=query, credentials=credentials, project_id=project_id)

Requesting query... 
Query running...
Job ID: 80e67ad9-2f25-4a96-9730-6cf4889b175c
Query done.
Processed: 71.6 KB Billed: 10.0 MB
Standard price: $0.00 USD

Got 30 rows.



In [73]:
def get_last_login(login_df, extraction_date, user_id):
    """
    (date like str, int, int, df)-->datetime like str
    """
    display(
        login_df[
            (login_df["extraction_date"] == extraction_date)
            & (login_df["user_id"] == user_id)
        ]
    )

In [74]:
get_last_login(df_login, "2021-08-22", 20118)

Unnamed: 0,extraction_date,user_id,last_login
5,2021-08-22,20118,2021-08-20T20:01:05+00:00


In [83]:
df_merge = df_actions.merge(
    df_login,
    how="left",
    left_on=["action_date", "user_id"],
    right_on=["extraction_date", "user_id"],
).drop(columns=["extraction_date"])
df_merge

Unnamed: 0,action_date,user_id,last_login
0,2021-08-16,20029,
1,2021-08-16,20030,
2,2021-08-16,20082,
3,2021-08-16,20100,
4,2021-08-16,20108,
...,...,...,...
205,2021-08-22,20168,2021-08-16T16:14:30+00:00
206,2021-08-22,20164,2021-08-17T17:58:53+00:00
207,2021-08-22,20167,2021-08-17T19:10:57+00:00
208,2021-08-22,20161,2021-08-18T12:28:59+00:00
