In [1]:
# general
import boto3
import datetime as dt
import json
import numpy as np
import pandas as pd
import snowflake.connector

In [2]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

### 0.2 : Connection to Snowflake

In [3]:
from abc import ABCMeta, abstractmethod

class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
    

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_DEV","WORKSPACE")

def run_query(query):
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df



In [4]:
revenue = pd.read_csv('Revenue.csv', encoding = "ISO-8859-1")
revenue.head()

Unnamed: 0,Region,Country,Company,Business Line,Business model,Distribution/Technology,Digital/Analogue,Year,Quarter,Value
0,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q4,23637
1,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q3,23514
2,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q2,23390
3,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q1,23269
4,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2026,Q4,23149


In [5]:
subscriber = pd.read_csv('Customers(RGUs).csv', encoding = "ISO-8859-1")
subscriber.head()

Unnamed: 0,Region,Country,Company,Business Line,Business model,Distribution/Technology,Digital/Analogue,Year,Quarter,Value
0,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q3,1349
1,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q4,1349
2,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q1,1349
3,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q2,1349
4,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2026,Q4,1349


# AVOD/FAST

In [13]:
revenue_2022 = revenue[(revenue['Company'].isin(['PlutoTV', 'Tubi', 'Roku TV']))
                      &(revenue['Year'] == 2022)
                      &(revenue['Quarter'] == 'Q4')]

In [14]:
subscriber_2022 = subscriber[(subscriber['Company'].isin(['PlutoTV', 'Tubi', 'Roku TV']))
                      &(subscriber['Year'] == 2022)
                      &(subscriber['Quarter'] == 'Q4')]

In [15]:
revenue_2022['Value'] = revenue_2022['Value'].str.replace(',', '').astype(int)
subscriber_2022['Value'] = subscriber_2022['Value'].str.replace(',', '').astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [16]:
title_popularity = run_query('''
SELECT m.imdb_id, t.TITLE_TYPE, SUM(m.PAGE_VIEWS) AS total_page_views
FROM max_prod.ckg.wikipedia_page_views_media m
join "ENTERPRISE_DATA"."CATALOG"."IMDB_TITLE" t
ON m.imdb_id = t.title_id
WHERE m.DATE >= '2022-10-01'
AND m.DATE < '2023-01-01'
AND m.LANGUAGE = 'en'
GROUP BY 1, 2
''')

In [17]:
title_availabity = pd.read_csv('fast titles.csv', encoding= 'unicode_escape')

In [18]:
title_availabity = title_availabity.rename(columns = {'IMDb ID':'imdb_id', 'Platform':'platform', 'Title':'title', 'First Release Year':'production_release_date',
                                                     'Primary Genre':'primary_genre', 'Scripted':'scripted', 'Original':'is_original'})
title_availabity = title_availabity[title_availabity['platform'].isin(['Pluto TV (on-demand)', 'Tubi', 'Roku', ])]
title_availabity = title_availabity[['platform', 'title', 'imdb_id', 'production_release_date', 'primary_genre', 'scripted', 'is_original']]\
                    .drop_duplicates()

In [22]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original
0,Pluto TV (on-demand),ÁBoom! (Colombia),-,2017,Entertainment,Unscripted,Not An Original
3,Pluto TV (on-demand),ÁBoom! (Colombia): Season 1,-,2017,Entertainment,Unscripted,Not An Original
6,Pluto TV (on-demand),Áboom! Mxico,-,[Not given],Comedy,Unscripted,Not An Original
9,Pluto TV (on-demand),Áboom! Mxico: Season 1,-,[Not given],Comedy,Unscripted,Not An Original
12,Tubi,Áboom! Spain,-,2015,Children & Family,Scripted,Not An Original


In [26]:
title_availabity_multiplatform = title_availabity.groupby(['imdb_id']).count().reset_index()
title_availabity_multiplatform =title_availabity_multiplatform[title_availabity_multiplatform['title']>1]

In [27]:
title_availabity_multiplatform

Unnamed: 0,imdb_id,platform,title,production_release_date,primary_genre,scripted,is_original
0,-,17552,17552,17552,17552,17552,17552
31,tt0013427,2,2,2,2,2,2
32,tt0013442,3,3,3,3,3,3
33,tt0014142,3,3,3,3,3,3
47,tt0016430,2,2,2,2,2,2
...,...,...,...,...,...,...,...
39842,tt9896768,2,2,2,2,2,2
39847,tt9903028,6,6,6,6,6,6
39849,tt9904552,2,2,2,2,2,2
39854,tt9906264,3,3,3,3,3,3


In [34]:
title_popularity.head()

Unnamed: 0,imdb_id,title_type,total_page_views
0,tt2886926,movie,120
1,tt0088031,movie,26737
2,tt0094056,movie,15968
3,tt0354318,tvSeries,6225
4,tt0043146,movie,398


## Actual Revenues

In [36]:
title_availabity = title_availabity.merge(title_popularity, on = ['imdb_id'], how = 'left')

In [37]:
title_availabity.tail()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views
85036,Pluto TV (on-demand),___,-,2009,Comedy,Scripted,Not An Original,,
85037,Tubi,___________,tt0155658,1997,Children & Family,Scripted,Not An Original,tvSeries,8266.0
85038,Tubi,___________: Season 1,tt0155658,1997,Children & Family,Scripted,Not An Original,tvSeries,8266.0
85039,Pluto TV (on-demand),___(_),tt3963816,2016,Crime & Thriller,Scripted,Not An Original,movie,34712.0
85040,Pluto TV (on-demand),____,tt0079142,1979,Comedy,Scripted,Not An Original,movie,5725.0


In [39]:
title_availabity=title_availabity[title_availabity['title_type'] == 'movie']

In [40]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views
28,Pluto TV (on-demand),Àquin Diablos Es Juliette?,tt0126627,1997,Entertainment,Unscripted,Not An Original,movie,257.0
29,Tubi,Àquin Diablos Es Juliette?,tt0126627,1997,Entertainment,Unscripted,Not An Original,movie,257.0
33,Tubi,Àquin Te Quiere a Ti?,tt0180337,1942,Comedy,Scripted,Not An Original,movie,43.0
40,Pluto TV (on-demand),.45,tt0259822,2006,Crime & Thriller,Scripted,Not An Original,movie,3582.0
41,Roku,.45,tt0259822,2006,Crime & Thriller,Scripted,Not An Original,movie,3582.0


In [44]:
title_availabity_sum = title_availabity.groupby(['platform']).sum().reset_index()

In [45]:
title_availabity_sum.rename(columns = {'total_page_views': 'platform_total_page_views'}, inplace = True)

In [46]:
title_availabity = title_availabity.merge(title_availabity_sum, on = ['platform'])

In [48]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views,platform_total_page_views
0,Pluto TV (on-demand),Àquin Diablos Es Juliette?,tt0126627,1997,Entertainment,Unscripted,Not An Original,movie,257.0,80252968.0
1,Pluto TV (on-demand),.45,tt0259822,2006,Crime & Thriller,Scripted,Not An Original,movie,3582.0,80252968.0
2,Pluto TV (on-demand),'neath the Arizona Skies,tt0024805,1934,Action & Adventure,Scripted,Not An Original,movie,710.0,80252968.0
3,Pluto TV (on-demand),1 Buck,tt4685428,2017,Crime & Thriller,Scripted,Not An Original,movie,133.0,80252968.0
4,Pluto TV (on-demand),1 Mile to You,tt2184233,2017,Romance,Scripted,Not An Original,movie,3569.0,80252968.0


In [49]:
title_availabity.platform.unique()

array(['Pluto TV (on-demand)', 'Tubi', 'Roku'], dtype=object)

In [52]:
revenue_2022 = revenue_2022[['Company', 'Value']].rename(columns= {'Company':'platform', 'Value':'Revenue'})
revenue_2022['platform']=revenue_2022['platform'].replace('PlutoTV','Pluto TV (on-demand)').replace('Roku TV','Roku')

In [53]:
revenue_2022.head()

Unnamed: 0,platform,Revenue
5859,Pluto TV (on-demand),223462
5942,Roku,255558
6884,Tubi,150496


In [54]:
title_availabity = title_availabity.merge(revenue_2022, on= ['platform'])

In [55]:
title_availabity.platform.unique()

array(['Pluto TV (on-demand)', 'Tubi', 'Roku'], dtype=object)

In [64]:
title_availabity['title_revenue'] = title_availabity['Revenue'] * title_availabity['total_page_views'] *1000 /title_availabity['platform_total_page_views']

In [65]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views,platform_total_page_views,Revenue,title_revenue
0,Pluto TV (on-demand),Àquin Diablos Es Juliette?,tt0126627,1997,Entertainment,Unscripted,Not An Original,movie,257.0,80252968.0,223462,715.608848
1,Pluto TV (on-demand),.45,tt0259822,2006,Crime & Thriller,Scripted,Not An Original,movie,3582.0,80252968.0,223462,9973.972352
2,Pluto TV (on-demand),'neath the Arizona Skies,tt0024805,1934,Action & Adventure,Scripted,Not An Original,movie,710.0,80252968.0,223462,1976.973861
3,Pluto TV (on-demand),1 Buck,tt4685428,2017,Crime & Thriller,Scripted,Not An Original,movie,133.0,80252968.0,223462,370.33454
4,Pluto TV (on-demand),1 Mile to You,tt2184233,2017,Romance,Scripted,Not An Original,movie,3569.0,80252968.0,223462,9937.774239


In [66]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).head(10)\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
17860,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,6105926.0
18793,Roku,Weird: the Al Yankovic Story,2022,Comedy,Scripted,Roku Original,3449668.0
17938,Roku,The Blair Witch Project,1999,Horror,Scripted,Not An Original,3324627.0
2845,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2897367.0
3300,Pluto TV (on-demand),Titanic,1997,Romance,Scripted,Not An Original,2744015.0
2695,Pluto TV (on-demand),The Blair Witch Project,1999,Horror,Scripted,Not An Original,2472905.0
1324,Pluto TV (on-demand),Hereditary,2018,Horror,Scripted,Not An Original,2229859.0
16602,Roku,John Wick,2014,Crime & Thriller,Scripted,Not An Original,2070000.0
1512,Pluto TV (on-demand),Jeepers Creepers,2001,Horror,Scripted,Not An Original,1971675.0
1104,Pluto TV (on-demand),Forrest Gump,1994,Romance,Scripted,Not An Original,1875989.0


In [59]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).groupby(['platform']).first().reset_index()\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
0,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2897367000.0
1,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,6105926000.0
2,Tubi,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,1869951000.0


In [69]:
title_availabity[title_availabity['imdb_id'] == 'tt0133093']

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views,platform_total_page_views,Revenue,title_revenue
13024,Tubi,The Matrix,tt0133093,1999,Sci-Fi & Fantasy,Scripted,Not An Original,movie,537983.0,131270799.0,150496,616773.038519


## HBO Titles

In [70]:
fast_titles = pd.read_csv('FAST Title Analysis - February Wiki Data.csv')

In [71]:
fast_titles.head()

Unnamed: 0,Title,imdb_id,PRODUCT ID,page_views
0,"Matrix, The",tt0133093,2003656,117987
1,Edge of Tomorrow,tt1631867,2054390,105698
2,"Departed, The",tt0407887,2025893,100895
3,Me Before You,tt2674426,2068798,87504
4,"Notebook, The",tt0332280,2016856,87169


In [72]:
bbo_titles = title_availabity[title_availabity['imdb_id'].isin(fast_titles.imdb_id)]

In [73]:
bbo_titles

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_type,total_page_views,platform_total_page_views,Revenue,title_revenue
109,Pluto TV (on-demand),A Star Is Born,tt1517451,2018,Romance,Scripted,Not An Original,movie,312695.0,80252968.0,223462,870689.917537
683,Pluto TV (on-demand),Clash of the Titans,tt0800320,2010,Sci-Fi & Fantasy,Scripted,Not An Original,movie,175774.0,80252968.0,223462,489437.469627
1860,Pluto TV (on-demand),Mitchell,tt0073396,1975,Crime & Thriller,Scripted,Not An Original,movie,5708.0,80252968.0,223462,15893.756054
5429,Tubi,Cast a Dark Shadow,tt0050233,1955,Crime & Thriller,Scripted,Not An Original,movie,6714.0,131270799.0,150496,7697.295603
5747,Tubi,Cop Out,tt1385867,2010,Crime & Thriller,Scripted,Not An Original,movie,36696.0,131270799.0,150496,42070.294826
6294,Tubi,Dog Day Afternoon,tt0072890,1975,Crime & Thriller,Scripted,Not An Original,movie,119887.0,131270799.0,150496,137444.992256
6871,Tubi,Fit to Kill,tt0106922,1993,Romance,Scripted,Not An Original,movie,2001.0,131270799.0,150496,2294.055481
7006,Tubi,Freddie Steps Out,tt0038535,1946,Comedy,Scripted,Not An Original,movie,306.0,131270799.0,150496,350.815081
7015,Tubi,Free Willy,tt0106965,1993,Children & Family,Scripted,Not An Original,movie,69190.0,131270799.0,150496,79323.187787
7184,Tubi,Ghosts of Girlfriends Past,tt0821640,2009,Romance,Scripted,Not An Original,movie,43315.0,131270799.0,150496,49658.677251


## Potentials

In [137]:
title_popularity = title_popularity[title_popularity['imdb_id'].isin(title_availabity.imdb_id)]

In [139]:
title_revenue = title_popularity.copy()
title_revenue['sum_page_views'] = title_popularity.total_page_views.sum()
title_revenue['revenue'] = revenue_2022.Value.sum()
# title_revenue['total_subs'] = subscriber_2022.Value.sum()

In [140]:
title_revenue['title_revenue'] = title_revenue['total_page_views']/title_revenue['sum_page_views'] * title_revenue['revenue']

In [141]:
# title_revenue['title_revenue_per_sub'] = title_revenue['title_revenue']/title_revenue['total_subs']

In [142]:
title_revenue.head()

Unnamed: 0,imdb_id,total_page_views,sum_page_views,revenue,title_revenue
20,tt2531334,7307,223244273,629516,20.6
27,tt2168288,990,223244273,629516,2.79
34,tt0385278,7936,223244273,629516,22.38
53,tt3993894,4883,223244273,629516,13.77
83,tt1950235,67065,223244273,629516,189.11


In [143]:
title_availabity = title_availabity.merge(title_revenue[['imdb_id', 'title_revenue',]],
                                          on = ['imdb_id'], how = 'left')

In [144]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_revenue
0,Pluto TV (on-demand),ÁBoom! (Colombia),-,2017,Entertainment,Unscripted,Not An Original,
1,Pluto TV (on-demand),ÁBoom! (Colombia): Season 1,-,2017,Entertainment,Unscripted,Not An Original,
2,Pluto TV (on-demand),Áboom! Mxico,-,[Not given],Comedy,Unscripted,Not An Original,
3,Pluto TV (on-demand),Áboom! Mxico: Season 1,-,[Not given],Comedy,Unscripted,Not An Original,
4,Tubi,Áboom! Spain,-,2015,Children & Family,Scripted,Not An Original,


In [145]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).groupby(['platform']).first().reset_index()\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
0,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2934.18
1,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
2,Tubi,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39


In [146]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).head(10)\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
64960,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
64961,Tubi,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
69455,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2934.18
66376,Roku,The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
66377,Tubi,The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
66375,Pluto TV (on-demand),The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
32293,Tubi,Hellraiser,1987,Crime & Thriller,Scripted,Not An Original,2410.62
65316,Pluto TV (on-demand),The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37
65317,Roku,The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37
65318,Tubi,The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37


In [147]:
title_revenue_sub = title_availabity.groupby(['platform']).sum()[['title_revenue']].reset_index()\
                .merge(subscriber_2022.groupby(['Company']).sum()[['Value']],
                       left_on = ['platform'], right_on = ['Company'], how = 'left')
# title_revenue_sub['title_revenue_per_sub'] = title_revenue_sub['title_revenue']/title_revenue_sub['Value']

In [148]:
title_revenue_sub.rename(columns = {'Value':'Subscribers'})

Unnamed: 0,platform,title_revenue,Subscribers
0,Pluto TV (on-demand),555759.93,
1,Roku,655894.04,
2,Tubi,663633.83,
