In [2]:
import boto3
import datetime as dt
import json
import numpy as np
import pandas as pd
import snowflake.connector
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

In [3]:
pd.options.display.float_format = '{:,.4f}'.format

In [4]:
from abc import ABCMeta, abstractmethod

class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
    

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_DEV","WORKSPACE")

def run_query(query):
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df



In [5]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Retail Hours

In [6]:
# churn_genpop = run_query('''
# with pvc as (
# select
#     wholesale_user_activation.user_id,
#     hbo_uuid,
#     affiliate_code
# from max_prod.bi_analytics.wholesale_user_activation
#     left join max_prod.bi_analytics.dimension_user
#         on wholesale_user_activation.user_id = dimension_user.user_id
#     left join max_prod.core.geo_map
#         on wholesale_user_activation.country_code = geo_map.country_iso_code
# where
#     wholesale_user_activation.country_code = 'US'
#     and geo_map.territory = 'HBO MAX DOMESTIC'
#     and geo_map.region = 'NORTH AMERICA'
#     and affiliate_code = 'amazon_hbonow'
#     and convert_timezone('UTC', 'America/Los_Angeles', wholesale_user_activation.first_auth_time_utc::timestamp) >= '2020-11-17'
# )

# , subs as 
# (
# select 
#       c.hbo_uuid
#     , c.hurley_user_id
#     , up.hurley_profile_id
#     , is_cancel
#     , sub_month
#     , min(cycle_start_date) AS cycle_start_date
#     , min(cycle_expire_date) AS cycle_expire_date
# from max_dev.workspace.user_retain_churn_list_test as c 
# left join max_prod.identity.user_profile_dim_current up 
#     on up.hurley_user_id = c.hurley_user_id
# where cycle_expire_date between '2022-10-01' and '2022-10-31'
#         and up.is_primary_profile=1
# GROUP BY 1, 2, 3, 4, 5
# limit 3000000
# )

# , streaming_subset as
# (
# select
#       ss.hbo_uuid
#     , ss.hurley_user_id
#     , ss.hurley_profile_id
#     , ss.is_cancel
#     , ss.sub_month
#     , ss.cycle_start_date
#     , ss.cycle_expire_date
#     , hb.viewable_id
#     , hb.stream_elapsed_play_seconds/3600 as hours_viewed
# from subs ss
# left join max_prod.viewership.max_user_stream_heartbeat hb
#     on hb.hurley_profile_id = ss.hurley_profile_id
# where 1=1
#     and hb.request_date between '2022-08-01' and '2022-10-31'
#     and hb.request_date >= ss.cycle_start_date
#     and hb.request_date <= ss.cycle_expire_date
#     and hb.viewable_id IS NOT NULL 
#     and hb.stream_elapsed_play_seconds >= 900
#     and hb.video_type = 'main'
#     and hb.channel = 'HBO MAX SUBSCRIPTION'
#     and hb.country_iso_code in ('US')
#     and hb.is_primary_profile=1
# )

# select
#       s.hbo_uuid
#     , s.is_cancel
#     , s.sub_month
#     , rad.title_id
#     , sum(s.hours_viewed) as hours_viewed
# from streaming_subset s
# left join max_prod.catalog.reporting_asset_dim rad
#      on s.viewable_id = rad.viewable_id
# left join pvc 
#     on pvc.hbo_uuid = s.hbo_uuid
# left join  max_prod.bi_analytics.fact_common_receipt a
#     on s.hurley_user_id=a.hurley_user_id
#     and a.subscription_start_date = s.cycle_start_date
# where rad.asset_type!='PROMOTION'
# and affiliate_code is NULL
# and (a.signup_offer is NULL or a.signup_offer='no_free_trial')
# group by 1,2,3,4
# ''')

In [None]:
churn_genpop = 

In [6]:
churn_genpop.head()

Unnamed: 0,hbo_uuid,is_cancel,sub_month,title_id,hours_viewed
0,a4a8e153a6fa655baedde4671b135a9632710b068d6eda...,False,28,GXuOrbQniO6UJwwEAAAOx,1.682777778
1,4a9c51d3e0c2fcda54d2653d914ad0f7c02d8299bd1b32...,False,9,GXkRjxwjR68PDwwEAABKJ,1.450833333
2,a4a8e153a6fa655baedde4671b135a9632710b068d6eda...,False,28,GYGLWjQCNPQS0wgEAAAD0,4.2275
3,105da4c3639990b65591c2dbee46e8acdcd495145bb8d4...,False,9,GYdeRFQy2mcNNwwEAAAAT,1.840833334
4,4777299cc64dd4069ac9ca7d3ae40c37446ee3ec89c5ba...,False,6,GX8VcRQca5sLDbAEAAAD-,1.005


In [7]:
import boto3
import io

def to_s3(filename, output_bucket, content):
    client = boto3.client('s3')
    client.put_object(Bucket=output_bucket, Key=filename, Body=content)
    
output_bucket = 'hbo-outbound-datascience-content-dev'
s3 = boto3.resource('s3')
bucket = s3.Bucket(output_bucket)

def write_to_sf(df, file_name):
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index = False)
    content = csv_buffer.getvalue()
    filename = 'title_hours_viewed_retention/{}.csv'.format(file_name)
    to_s3(filename, output_bucket, content)


Boto3 will no longer support Python 3.6 starting May 30, 2022. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.7 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/



In [8]:
filename = 'churn_user_stream_title_hours_'+pd.Timestamp.today().strftime('%Y-%m-%d')
write_to_sf(churn_genpop,  filename)

In [9]:
output_bucket+'/title_hours_viewed_retention/{}.csv'.format(filename)

'hbo-outbound-datascience-content-dev/title_hours_viewed_retention/churn_user_stream_title_hours_2023-10-19.csv'