In [1]:
# !pip install snowflake --user
# !pip install snowflake-connector-python --user
# !pip install matplotlib
# !pip install seaborn

# Data

In [2]:
import os
import sys
path=!pwd
sys.path.append(os.path.join(path[0], '..'))
from utils import *
import snowflake.connector

In [3]:
pd.set_option('display.max_rows', 1000)

In [4]:
class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_PROD","DATASCIENCE_STAGE")

In [5]:
def run_query(query):
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df

# Input Data

In [8]:
run_query('''select *
            from max_prod.content_analytics.new_title_release_days_on_platform l
            limit 5
            ''')

Unnamed: 0,title_level,title,title_id,first_release_date,days_on_hbo_max,hours,cumulative_hours,total_streams,cumulative_streams,daily_viewing_subs,...,cumulative_viewing_profiles,first_views,cumulative_first_views,retail_first_views,cumulative_retail_first_views,wholesale_first_views,cumulative_wholesale_first_views,total_run_time,number_available_episodes,record_timestamp
0,Series and Movies,Young Sheldon,GXyL-rA27qcJ6pQEAAADP,2020-09-01 07:01:00,574,104681,46854724,371390,162802965,48856,...,2955149,37,89758,27,52814,10,36944,1575.616667,83,2022-04-12 14:57:08.549000+00:00
1,Series and Movies,Young Sheldon,GXyL-rA27qcJ6pQEAAADP,2020-09-01 07:01:00,167,76994,10205014,237431,33983535,34257,...,834742,151,37459,92,18334,59,19125,1248.5,65,2022-03-16 00:42:17.288000+00:00
2,Series and Movies,Young Sheldon,GXyL-rA27qcJ6pQEAAADP,2020-09-01 07:01:00,237,118372,15486152,415378,51488224,52404,...,1209485,183,46672,116,24023,67,22649,1248.5,65,2022-03-16 00:42:17.288000+00:00
3,Series and Movies,Young Sheldon,GXyL-rA27qcJ6pQEAAADP,2020-09-01 07:01:00,296,63957,20530636,232003,69745520,36507,...,1578875,72,55083,49,29504,23,25579,1248.5,65,2022-03-16 00:42:17.288000+00:00
4,Series and Movies,Young Sheldon,GXyL-rA27qcJ6pQEAAADP,2020-09-01 07:01:00,10,106698,1150287,368359,3886086,48692,...,162227,409,4529,193,1947,216,2582,1248.5,65,2022-03-16 00:42:17.288000+00:00


In [11]:
viewership_2021 = run_query('''select title, CUMULATIVE_VIEWING_SUBS
            from max_prod.content_analytics.new_title_release_days_on_platform l
            where  days_on_hbo_max = 28 
            and title_level = 'Seasons and Movies'
            and year(first_release_date) = 2021
            order by CUMULATIVE_VIEWING_SUBS desc
            limit 100
            ''')

In [12]:
viewership_2022 = run_query('''select title, CUMULATIVE_VIEWING_SUBS
            from max_prod.content_analytics.new_title_release_days_on_platform l
            where  days_on_hbo_max = 28 
            and title_level = 'Seasons and Movies'
            and year(first_release_date) = 2022
            order by CUMULATIVE_VIEWING_SUBS desc
            limit 100
            ''')

In [14]:
viewership_2021.to_csv('viewership_2021.csv')

In [15]:
viewership_2022.to_csv('viewership_2022.csv')