In [73]:
# general
import boto3
import datetime as dt
import json
import numpy as np
import pandas as pd
import snowflake.connector

In [74]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

### 0.2 : Connection to Snowflake

In [75]:
from abc import ABCMeta, abstractmethod

class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
    

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_DEV","WORKSPACE")

def run_query(query):
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df



In [4]:
# run_query('''
# copy into max_dev.WORKSPACE.MARKET_DEMAND_LANGURAGE_COUNTRY_MAPPING
#     from(
#         select
#               $1, $2, $3, $4, $5, $6
#         from @HBO_OUTBOUND_DATASCIENCE_CONTENT_DEV/market_share_of_demand/Language_Country_Mapping_all.csv
#         )
#     file_format = (type = csv null_if=('') skip_header = 1  FIELD_OPTIONALLY_ENCLOSED_BY='"')
#     on_error = 'CONTINUE';

# ''')

In [5]:
ampere_future = pd.read_csv('Ampere Future Slate.csv', encoding = "ISO-8859-1")

In [6]:
# ampere_temp['Month Announced'] = pd.to_datetime(ampere_temp['Month Announced'])

In [7]:
ampere_temp = ampere_future[ampere_future['All Commissioners'].isin(['Amazon Prime Video', 'Apple TV+', 'Discovery+', 'Disney+', 'Hulu', 'Netflix', 'Paramount+', 'Peacock', 'HBO', 'HBO Max'])]

In [8]:
ampere_temp['All Commissioners'].unique()

array(['Netflix', 'Discovery+', 'Disney+', 'Peacock', 'Hulu',
       'Paramount+', 'HBO Max', 'HBO'], dtype=object)

In [9]:
ampere_temp.head()

Unnamed: 0,Month Announced,Title Type,Upcoming,Status,Fate,Title,All Commissioners,All Commissioner Parent Companies,All Commissioner Types,All Commissioner Countries,Co-Commission,Platform,In-House,Primary Production Company,All Parent Producers,All Producers,Country Of Origin,Region of Origin,One Episode Special,Season Number,Scripted,Primary Genre,Secondary Genre,Language,Ampere Description,Adaptations,Detailed Status,Production ID,Show ID,IMdb ID,Announced Quarter,Development Order Month,Development Order Quarter,Pilot Order Month,Pilot Order Quarter,Series Order Month,Series Order Quarter,Released Month,Released Quarter
0,2023-03-01,TV: Renewal,Upcoming,Renewed Series,TBD,Community Squad / Divisin Palermo / Palermo ...,Netflix,Netflix,SVoD,Global,Single Commissioner,VoD,External Production,Other Indie Producer,Other Indie Producer,K&S Films,Argentina,Central and South America,Series,2,Scripted,Comedy,Drama,Spanish,,Not an adaptation,Renewed Series- Series Production,RM53537321P,AG53515340P,tt26451138,2023 Q1,-,-,-,-,2023-03-01,2023 Q1,-,-
9,2023-03-01,TV: First-Run,Upcoming,Series Order,TBD,Unicorn Academy: Season 1,Netflix,Netflix,SVoD,Global,Single Commissioner,VoD,External Production,Other Indie Producer,Other Indie Producer,Spin Master Entertainment,Canada,North America,Series,1,Scripted,Children & Family,Animation,English,"Based on the book series of the same name, ani...",Book adaptation,Series Order- Series Production,RM53537665UA,RM53537665UA,,2023 Q1,-,-,-,-,2023-03-01,2023 Q1,-,-
17,2023-03-01,TV: Renewal,Upcoming,Renewed Series,TBD,Stars in the Shirt / Stjerner i tr¿jen: Season 3,Discovery+,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,No Known Producer,No known Producer,No known Producer,,Denmark,Western Europe,Series,3,Unscripted,Reality,Miscellaneous,Danish,,Not an adaptation,Renewed Series- Series Production,ML53537650SI,V1278SI,,2023 Q1,-,-,-,-,2023-03-01,2023 Q1,-,-
26,2023-03-01,Movie,Upcoming,Movie,N/A (Movie),60 Minutes / 60 Minuten,Netflix,Netflix,SVoD,Global,Single Commissioner,VoD,External Production,LEONINE,LEONINE,W&B Television,Germany,Western Europe,Movie,N/A (Movie),Scripted,Crime & Thriller,Drama,German,"The male protagonist, a professional mixed mar...",Not an adaptation,Movie- In Production,IA535377366M,,,2023 Q1,-,-,-,-,2023-03-01,2023 Q1,-,-
27,2023-03-01,TV: First-Run,Upcoming,Series Order,TBD,Criminal / Criminel: Season 1,Netflix,Netflix,SVoD,Global,Single Commissioner,VoD,External Production,LEONINE,LEONINE,Wiedemann & Berg,Germany,Western Europe,Series,1,Scripted,Crime & Thriller,Drama,German,"The male duo of swindlers, after an unsuccessf...",Not an adaptation,Series Order- Series Production,IA53537734C,IA53537734C,,2023 Q1,-,-,-,-,2023-03-01,2023 Q1,-,-


In [10]:
imdb_mapping = run_query('''
SELECT DISTINCT TITLE_ID, ORIGINAL_TITLE AS title_name, TITLE_TYPE, US_REGION_RELEASE_DATE
FROM "ENTERPRISE_DATA"."CATALOG"."IMDB_TITLE"
--WHERE US_REGION_RELEASE_DATE >= '2023-03-01'
''')

In [11]:
ampere_temp = ampere_temp.merge(imdb_mapping, left_on = 'IMdb ID', right_on = 'title_id', how = 'left')

In [12]:
ampere_temp.count()

Month Announced                      1570
Title Type                           1570
Upcoming                             1570
Status                               1570
Fate                                 1570
Title                                1570
All Commissioners                    1570
All Commissioner Parent Companies    1570
All Commissioner Types               1570
All Commissioner Countries           1570
Co-Commission                        1570
Platform                             1570
In-House                             1570
Primary Production Company           1570
All Parent Producers                 1570
All Producers                        1368
Country Of Origin                    1570
Region of Origin                     1570
One Episode Special                  1570
Season Number                        1570
Scripted                             1570
Primary Genre                        1570
Secondary Genre                      1570
Language                          

In [13]:
ampere_temp[(ampere_temp['All Commissioners'] == 'HBO Max')
    &(ampere_temp['title_id'].notnull())]

Unnamed: 0,Month Announced,Title Type,Upcoming,Status,Fate,Title,All Commissioners,All Commissioner Parent Companies,All Commissioner Types,All Commissioner Countries,Co-Commission,Platform,In-House,Primary Production Company,All Parent Producers,All Producers,Country Of Origin,Region of Origin,One Episode Special,Season Number,Scripted,Primary Genre,Secondary Genre,Language,Ampere Description,Adaptations,Detailed Status,Production ID,Show ID,IMdb ID,Announced Quarter,Development Order Month,Development Order Quarter,Pilot Order Month,Pilot Order Quarter,Series Order Month,Series Order Quarter,Released Month,Released Quarter,title_id,title_name,title_type,us_region_release_date
42,2023-02-01,TV: Renewal,Upcoming,Renewed Series,TBD,The Great Bake Off Mexico / El Gran Pastelero:...,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,Warner Bros. Discovery,WarnerMedia Latin America,Mexico,Central and South America,Series,2,Unscripted,Reality,Food,Spanish,,Not an adaptation,Renewed Series- Series Production,AG53536067TG,Z53497451TG,tt14741098,2023 Q1,-,-,-,-,2023-02-01,2023 Q1,-,-,tt14741098,Bake Off México: El gran pastelero,tvSeries,
45,2023-02-01,TV: First-Run,Upcoming,Series Order,TBD,#BringBackAlice: Season 1,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,External Production,Other Indie Producer,Other Indie Producer,Telemark,Poland,Central and Eastern Europe,Series,1,Scripted,Crime & Thriller,Drama,Polish,"2 female teenagers go missing the same day, wh...",Not an adaptation,Series Order- Series Production,HS53536237#,HS53536237#,tt26732848,2023 Q1,-,-,-,-,2023-02-01,2023 Q1,-,-,tt26732848,#BringBackAlice,tvMiniSeries,
90,2023-02-01,TV: Renewal,Upcoming,Renewed Series,TBD,Velma: Season 2,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,Warner Bros. Discovery,Warner Bros. Animation,USA,North America,Series,2,Scripted,Comedy,Animation,English,,Not an adaptation,Renewed Series- Series Production,RM53536152V,F12642VE,tt14153790,2023 Q1,-,-,-,-,2023-02-01,2023 Q1,-,-,tt14153790,Velma,tvSeries,2023-01-12
117,2023-01-01,TV: First-Run,Upcoming,Series Order,TBD,Booster Gold: Season 1,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,External Production,Other Indie Producer,Other Indie Producer,DC Studios,USA,North America,Series,1,Scripted,Sci-Fi & Fantasy,Drama,English,"Adapted from the DC comic book character, Foll...",Comic adaptation,Series Order- Series Production,RM53534159BG,RM53534159BG,tt5716028,2023 Q1,-,-,-,-,2023-01-01,2023 Q1,-,-,tt5716028,Booster Gold,tvSeries,
125,2023-01-01,TV: First-Run,Upcoming,Series Order,TBD,Lanterns: Season 1,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,Warner Bros. Discovery,DC Comics,USA,North America,Series,1,Scripted,Sci-Fi & Fantasy,Crime & Thriller,English,"Based on DC comic book characters, follows a m...",Comic adaptation,Series Order- Series Production,RM53534160L,RM53534160L,tt26545992,2023 Q1,-,-,-,-,2023-01-01,2023 Q1,-,-,tt26545992,Lanterns,tvSeries,
126,2023-01-01,TV: Renewal,Upcoming,Renewed Series,TBD,Looney Tunes Cartoons: Season 6,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,Warner Bros. Discovery,Warner Bros,USA,North America,Series,6,Scripted,Children & Family,Animation,English,,Not an adaptation,Renewed Series- Series Production,RM53533598LT,5157LT,tt8543208,2023 Q1,-,-,-,-,2023-01-01,2023 Q1,-,-,tt8543208,Looney Tunes Cartoons,tvSeries,2019-06-12
129,2023-01-01,TV: First-Run,Upcoming,Series Order,TBD,Paradise Lost: Season 1,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,No Known Producer,No known Producer,No known Producer,,USA,North America,Series,1,Scripted,Sci-Fi & Fantasy,Drama,English,"Based on the Wonder Woman DC Comics, follows t...",Comic adaptation,Series Order- Series Production,RM53534161PL,RM53534161PL,tt26546001,2023 Q1,-,-,-,-,2023-01-01,2023 Q1,-,-,tt26546001,Paradise Lost,tvSeries,
199,2022-12-01,TV: Renewal,Upcoming,Renewed Series,TBD,The Sex Lives of College Girls: Season 3,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,"Lionsgate, Warner Bros. Discovery","3 Arts Entertainment, Kaling International, Wa...",USA,North America,Series,3,Scripted,Comedy,Drama,English,,Not an adaptation,Renewed Series- Series Production,RM53531439TS,F8238CG,tt11212276,2022 Q4,-,-,-,-,2022-12-01,2022 Q4,-,-,tt11212276,The Sex Lives of College Girls,tvSeries,2021-11-18
208,2022-11-01,TV: Renewal,Upcoming,Renewed Series,TBD,My Life is a Circus / Minha Vida  um Circo: S...,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,External Production,Other Indie Producer,Other Indie Producer,bigBonsai,Brazil,Central and South America,Series,2,Unscripted,Documentary,Travel,Portuguese,Documentary series follows circus troupe as th...,Not an adaptation,Renewed Series- Series Production,AG53529574MV,AG53529574MV,tt10571068,2022 Q4,-,-,-,-,2022-11-01,2022 Q4,-,-,tt10571068,Minha Vida é um Circo,tvSeries,
300,2022-10-01,TV: First-Run,Upcoming,Series Order,TBD,How to Be a Bookie: Season 1,HBO Max,Warner Bros. Discovery,SVoD,Global,Single Commissioner,VoD,Internal Production,Warner Bros. Discovery,Warner Bros. Discovery,"Warner Bros. Television, Chuck Lorre Productions",USA,North America,Series,1,Scripted,Comedy,Drama,English,Male bookie in the city of Los Angeles struggl...,Not an adaptation,Series Order- Series Production,FB53527239HT,FB53527239HT,tt22485826,2022 Q4,-,-,-,-,2022-10-01,2022 Q4,-,-,tt22485826,How to Be a Bookie,tvSeries,


In [14]:
revenue = pd.read_csv('Revenue.csv', encoding = "ISO-8859-1")
revenue.head()

Unnamed: 0,Region,Country,Company,Business Line,Business model,Distribution/Technology,Digital/Analogue,Year,Quarter,Value
0,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q4,23637
1,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q3,23514
2,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q2,23390
3,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q1,23269
4,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2026,Q4,23149


In [15]:
subscriber = pd.read_csv('Customers(RGUs).csv', encoding = "ISO-8859-1")
subscriber.head()

Unnamed: 0,Region,Country,Company,Business Line,Business model,Distribution/Technology,Digital/Analogue,Year,Quarter,Value
0,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q3,1349
1,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q4,1349
2,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q1,1349
3,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2027,Q2,1349
4,North America,USA,Acorn TV,Subscription OTT,Subscription,OTT,,2026,Q4,1349


In [16]:
revenue_2022 = revenue[(revenue['Company'].isin(['Amazon', 'Apple TV+', 'Discovery+', 'Disney+', 'Hulu',
                                                'Netflix', 'Paramount+', 'Peacock', 'HBO Max']))
                      &(revenue['Year'] == 2022)
                      &(revenue['Quarter'] == 'Q4')]

In [17]:
subscriber_2022 = subscriber[(subscriber['Company'].isin(['Amazon', 'Apple TV+', 'Discovery+', 'Disney+', 'Hulu',
                                                'Netflix', 'Paramount+', 'Peacock', 'HBO Max']))
                      &(subscriber['Year'] == 2022)
                      &(subscriber['Quarter'] == 'Q4')]

In [18]:
revenue_2022['Value'] = revenue_2022['Value'].str.replace(',', '').astype(int)
subscriber_2022['Value'] = subscriber_2022['Value'].str.replace(',', '').astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [31]:
subscriber_2022[['Company', 'Business model', 'Year', 'Quarter', 'Value']].rename(columns = {'Value':'Subscribers(000)'})

Unnamed: 0,Company,Business model,Year,Quarter,Subscribers(000)
66,Amazon,Subscription,2022,Q4,57187
160,Apple TV+,Subscription,2022,Q4,11672
2666,Discovery+,Subscription,2022,Q4,14994
2787,Disney+,Subscription,2022,Q4,42659
3291,HBO Max,Subscription,2022,Q4,39250
3344,Hulu,Subscription,2022,Q4,48000
3870,Netflix,Subscription,2022,Q4,65806
4750,Paramount+,Subscription,2022,Q4,35192
4798,Peacock,Subscription,2022,Q4,30957


In [None]:
revenue_2022[['Company', 'Business model', 'Year', 'Quarter', 'Value']].rename(columns = {'Value':'Revenue(000)'})

In [19]:
title_popularity = run_query('''
SELECT imdb_id, SUM(PAGE_VIEWS) AS total_page_views
FROM max_prod.ckg.wikipedia_page_views_media
WHERE DATE >= '2022-10-01'
AND DATE < '2023-01-01'
AND LANGUAGE = 'en'
GROUP BY 1
''')

In [20]:
title_popularity.head()

Unnamed: 0,imdb_id,total_page_views
0,nm2768666,531
1,nm2924573,3409
2,nm3806038,8100
3,nm0844055,2727
4,tt11199132,2904


In [21]:
title_availabity = run_query('''
SELECT DISTINCT 
       imdb.ORIGINAL_TITLE as title,
       imdb.title_id as IMDB_ID,
       amp_o.PLATFORM,
       imdb.PRODUCTION_RELEASE_DATE as PRODUCTION_RELEASE_DATE,
       imdb.NUMBER_OF_VOTES,
       imdb.IMDB_RATING,
       amp_o.country,
       amp_meta.CONTENT_TYPE,
       amp_meta.PRIMARY_GENRE,
       amp_meta.SECONDARY_GENRE,
       amp_o.IN_HOUSE,
       amp_meta.SCRIPTED,
       CASE WHEN CONTAINS(amp_meta.ORIGINAL, 'Original') THEN 'Yes' Else 'No' END AS IS_ORIGINAL,
       amp_meta.PRIMARY_PRODUCTION_COMPANY,
       amp_meta.PRIMARY_PRODUCTION_PARENT_COMPANY,
       amp_meta.PRIMARY_PRODUCTION_COUNTRY
FROM "ENTERPRISE_DATA"."CATALOG"."IMDB_TITLE" imdb 
JOIN "MAX_PROD"."CKG"."AMPERE_METADATA"amp_meta ON imdb.title_id = amp_meta.imdb_id
JOIN "MAX_PROD"."CKG"."AMPERE_OCCURRENCE" amp_o ON  amp_meta.content_pid = amp_o.content_pid 
WHERE 1=1
and amp_meta.CONTENT_TYPE in ('TVShow', 'Movie')
and amp_o.month IN ('Oct-2022', 'Nov-2022', 'Dec-2022')
and amp_o.PLATFORM in ('Amazon Prime Video', 'Apple TV+', 'Discovery+', 'Disney+', 'Hulu', 'Netflix', 'Paramount+', 'Peacock', 'HBO', 'HBO Max')
and amp_o.COUNTRY = 'USA'
--GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
''')

In [22]:
title_availabity.head()

Unnamed: 0,title,imdb_id,platform,production_release_date,number_of_votes,imdb_rating,country,content_type,primary_genre,secondary_genre,in_house,scripted,is_original,primary_production_company,primary_production_parent_company,primary_production_country
0,Liv and Maddie,tt2794380,Disney+,2015-06-26,9138,6.3,USA,TVShow,Children & Family,Comedy,In-House,Scripted,Yes,Oops Doughnuts Productions,Disney,USA
1,Lessons from the Water: Diving with a Purpose,tt15139822,Discovery+,2021-08-07,0,,USA,Movie,Documentary,Science & Education,Licensed,Unscripted,Yes,No Company Assigned,Independent / Other,No Country Assigned
2,Don't Breathe,tt4160708,Peacock,2016-03-14,282738,7.1,USA,Movie,Crime & Thriller,Horror,Licensed,Scripted,Yes,Ghost House Pictures,Sony,USA
3,Saint Maud,tt7557108,Amazon Prime Video,2019-08-28,40610,6.7,USA,Movie,Crime & Thriller,Horror,Licensed,Scripted,Yes,Film4,Independent / Other,UK
4,Hot Skull,tt11988676,Netflix,2022-12-02,6006,7.2,USA,TVShow,Sci-Fi & Fantasy,Crime & Thriller,Licensed,Scripted,Yes,TIMS Productions,Netflix,Turkey


In [24]:
title_popularity = title_popularity[title_popularity['imdb_id'].isin(title_availabity.imdb_id)]

In [25]:
title_popularity.head()

Unnamed: 0,imdb_id,total_page_views
7,tt6599742,1351
13,tt5963958,32839
52,tt12189308,213
63,tt5180504,1041169
83,tt1950235,67065


In [26]:
title_revenue = title_popularity.copy()
title_revenue['sum_page_views'] = title_popularity.total_page_views.sum()
title_revenue['revenue'] = revenue_2022.Value.sum()
title_revenue['total_subs'] = subscriber_2022.Value.sum()

In [36]:
title_revenue['title_revenue'] = title_revenue['total_page_views']/title_revenue['sum_page_views'] * title_revenue['revenue']

In [37]:
title_revenue['title_revenue_per_sub'] = title_revenue['title_revenue']/title_revenue['total_subs']

In [38]:
title_revenue.head()

Unnamed: 0,imdb_id,total_page_views,sum_page_views,revenue,total_subs,title_revenue,title_revenue_per_sub
7,tt6599742,1351,1020844936,9750346,345717,12.90374,3.7e-05
13,tt5963958,32839,1020844936,9750346,345717,313.653525,0.000907
52,tt12189308,213,1020844936,9750346,345717,2.034416,6e-06
63,tt5180504,1041169,1020844936,9750346,345717,9944.466232,0.028765
83,tt1950235,67065,1020844936,9750346,345717,640.554634,0.001853


In [39]:
revenue_2022.groupby(['Business model']).sum()

Unnamed: 0_level_0,Year,Value
Business model,Unnamed: 1_level_1,Unnamed: 2_level_1
Advertising,8088,982373
Subscription,18198,8767973


In [40]:
title_availabity = title_availabity.merge(title_revenue[['imdb_id', 'title_revenue', 'title_revenue_per_sub']],
                                          on = ['imdb_id'], how = 'left')

In [41]:
title_availabity.head()

Unnamed: 0,title,imdb_id,platform,production_release_date,number_of_votes,imdb_rating,country,content_type,primary_genre,secondary_genre,in_house,scripted,is_original,primary_production_company,primary_production_parent_company,primary_production_country,title_revenue,title_revenue_per_sub
0,Liv and Maddie,tt2794380,Disney+,2015-06-26,9138,6.3,USA,TVShow,Children & Family,Comedy,In-House,Scripted,Yes,Oops Doughnuts Productions,Disney,USA,531.259671,0.001537
1,Lessons from the Water: Diving with a Purpose,tt15139822,Discovery+,2021-08-07,0,,USA,Movie,Documentary,Science & Education,Licensed,Unscripted,Yes,No Company Assigned,Independent / Other,No Country Assigned,,
2,Don't Breathe,tt4160708,Peacock,2016-03-14,282738,7.1,USA,Movie,Crime & Thriller,Horror,Licensed,Scripted,Yes,Ghost House Pictures,Sony,USA,1925.245621,0.005569
3,Saint Maud,tt7557108,Amazon Prime Video,2019-08-28,40610,6.7,USA,Movie,Crime & Thriller,Horror,Licensed,Scripted,Yes,Film4,Independent / Other,UK,1105.87247,0.003199
4,Hot Skull,tt11988676,Netflix,2022-12-02,6006,7.2,USA,TVShow,Sci-Fi & Fantasy,Crime & Thriller,Licensed,Scripted,Yes,TIMS Productions,Netflix,Turkey,,


In [42]:
subscriber_2022['Company'] = subscriber_2022['Company'].replace('Amazon', 'Amazon Prime Video')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [44]:
title_availabity.sort_values(by = ['title_revenue_per_sub'], ascending = False).groupby(['platform']).first().reset_index()\
[['platform', 'title', 'production_release_date', 'primary_genre', 'in_house', 'scripted', 'is_original', 'primary_production_parent_company', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,in_house,scripted,is_original,primary_production_parent_company,title_revenue
0,Amazon Prime Video,The Lord of the Rings: The Rings of Power,2022-09-01,Action & Adventure,In-House,Scripted,Yes,Amazon,39475.309958
1,Apple TV+,Ted Lasso,2020-08-14,Comedy,Licensed,Scripted,Yes,NBCUniversal,9874.541525
2,Discovery+,Sister Wives,2010-09-26,Reality,In-House,Unscripted,Yes,Discovery,2919.38756
3,Disney+,Avatar,2009-11-30,Sci-Fi & Fantasy,In-House,Scripted,Yes,Disney,50316.972933
4,HBO Max,Black Adam,2022-10-19,Sci-Fi & Fantasy,Licensed,Scripted,Yes,WarnerMedia,75711.112635
5,Hulu,Andor,2022-09-20,Sci-Fi & Fantasy,In-House,Scripted,Yes,Disney,45450.438734
6,Netflix,Wednesday,2022-10-31,Sci-Fi & Fantasy,Licensed,Scripted,Yes,MGM,79339.126593
7,Paramount+,xXx: Return of Xander Cage,2016-12-23,Action & Adventure,Licensed,Scripted,Yes,Shanghai Film Group,98477.531322
8,Peacock,xXx,2002-08-09,Crime & Thriller,Licensed,Scripted,Yes,Sony,34957.778463


In [45]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).head(10)\
[['platform', 'title', 'production_release_date', 'primary_genre', 'in_house', 'scripted', 'is_original', 'primary_production_parent_company', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,in_house,scripted,is_original,primary_production_parent_company,title_revenue
20338,Paramount+,xXx: Return of Xander Cage,2016-12-23,Action & Adventure,Licensed,Scripted,Yes,Shanghai Film Group,98477.531322
20652,Netflix,Wednesday,2022-10-31,Sci-Fi & Fantasy,Licensed,Scripted,Yes,MGM,79339.126593
15466,HBO Max,Black Adam,2022-10-19,Sci-Fi & Fantasy,Licensed,Scripted,Yes,WarnerMedia,75711.112635
15861,HBO Max,House of the Dragon,2022-08-21,Sci-Fi & Fantasy,Licensed,Scripted,Yes,WarnerMedia,72763.835423
7429,HBO Max,The White Lotus,2021-07-11,Comedy,Licensed,Scripted,Yes,WarnerMedia,52292.181147
24211,Disney+,Avatar,2009-11-30,Sci-Fi & Fantasy,In-House,Scripted,Yes,Disney,50316.972933
12794,Disney+,Andor,2022-09-20,Sci-Fi & Fantasy,In-House,Scripted,Yes,Disney,45450.438734
2269,Hulu,Andor,2022-09-20,Sci-Fi & Fantasy,In-House,Scripted,Yes,Disney,45450.438734
15258,Amazon Prime Video,The Lord of the Rings: The Rings of Power,2022-09-01,Action & Adventure,In-House,Scripted,Yes,Amazon,39475.309958
17836,HBO Max,xXx,2002-08-09,Crime & Thriller,Licensed,Scripted,Yes,Sony,34957.778463


In [47]:
title_revenue_sub = title_availabity.groupby(['platform']).sum()[['title_revenue']].reset_index()\
                .merge(subscriber_2022.groupby(['Company']).sum()[['Value']],
                       left_on = ['platform'], right_on = ['Company'], how = 'left')
title_revenue_sub['title_revenue_per_sub'] = title_revenue_sub['title_revenue']/title_revenue_sub['Value']

In [48]:
title_revenue_sub.rename(columns = {'Value':'Subscribers'})

Unnamed: 0,platform,title_revenue,Subscribers,title_revenue_per_sub
0,Amazon Prime Video,1741457.0,57187,30.451978
1,Apple TV+,130261.1,11672,11.160137
2,Discovery+,66159.13,14994,4.412373
3,Disney+,1064568.0,42659,24.955296
4,HBO Max,2109718.0,39250,53.75078
5,Hulu,1650400.0,48000,34.383334
6,Netflix,2722945.0,65806,41.378374
7,Paramount+,997838.4,35192,28.354125
8,Peacock,1072167.0,30957,34.634079


In [49]:
content_spend = pd.read_csv('Content Spend.csv')

In [50]:
content_spend=content_spend[content_spend['Scripted']!= 'Totals']
content_spend = content_spend.drop(['Country', 'YearQuarter'], axis = 1)

In [51]:
meta_column = ['Company', 'Spend Type', 'Content Type', 'Primary Genre', 'Scripted']

In [52]:
num_col = [i for i in content_spend.columns if i not in meta_column]

In [53]:
for i in num_col:
    content_spend[i] = content_spend[i].replace('-', np.nan)
    content_spend[i] = content_spend[i].astype(float)

In [54]:
content_spend.head()

Unnamed: 0,Company,Spend Type,Content Type,Primary Genre,Scripted,Q2 2015,Q3 2015,Q4 2015,Q1 2016,Q2 2016,Q3 2016,Q4 2016,Q1 2017,Q2 2017,Q3 2017,Q4 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Q1 2019,Q2 2019,Q3 2019,Q4 2019,Q1 2020,Q2 2020,Q3 2020,Q4 2020,Q1 2021,Q2 2021,Q3 2021,Q4 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Q1 2023,Q2 2023,Q3 2023,Q4 2023,Q1 2024,Q2 2024,Q3 2024,Q4 2024,Q1 2025,Q2 2025,Q3 2025,Q4 2025,Q1 2026,Q2 2026,Q3 2026,Q4 2026,Q1 2027,Q2 2027,Q3 2027,Q4 2027
5,Amazon Prime Video,Acquired Film & TV,Movie,Action & Adventure,Scripted,,33.8,35.7,44.8,59.5,65.5,77.4,95.4,98.0,99.3,100.7,78.6,81.8,74.8,73.9,73.5,69.2,72.5,74.6,69.6,77.2,93.0,96.2,86.9,80.4,75.7,74.9,64.3,66.7,67.7,240.9,222.6,241.8,229.5,257.4,241.0,261.2,241.1,269.2,248.2,271.8,257.2,285.5,266.0,295.7,284.8,326.0,286.7,312.6,294.3,322.4
6,Amazon Prime Video,Acquired Film & TV,Movie,Action & Adventure,Unscripted,,,,,,,,,,0.1,0.1,0.0,0.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,Amazon Prime Video,Acquired Film & TV,Movie,Adult,Scripted,,0.4,0.3,0.4,0.4,0.4,0.5,0.4,0.4,0.3,0.4,0.3,0.6,0.6,0.5,0.4,0.3,0.4,0.6,0.6,0.5,0.5,0.6,0.6,0.8,1.1,0.6,0.6,0.6,0.2,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1
10,Amazon Prime Video,Acquired Film & TV,Movie,Children & Family,Scripted,,12.2,13.3,13.5,12.4,12.2,11.4,11.6,19.3,20.3,24.8,22.4,30.1,27.1,27.8,30.3,30.9,31.2,34.3,34.8,38.7,42.8,50.3,50.6,41.4,34.6,36.9,36.4,40.9,27.9,32.9,30.4,33.0,31.3,35.1,32.9,35.6,32.9,36.7,33.9,37.1,35.1,39.0,36.3,40.3,38.9,44.5,39.1,42.7,40.2,44.0
11,Amazon Prime Video,Acquired Film & TV,Movie,Children & Family,Unscripted,,,,,,,,,,,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.1,0.1,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [55]:
content_spend_22 = content_spend[['Company', 'Spend Type', 'Content Type', 'Primary Genre', 'Scripted', 'Q4 2022']]

In [57]:
content_spend_22 = content_spend_22.groupby(['Company']).sum()

In [58]:
content_spend_22

Unnamed: 0_level_0,Q4 2022
Company,Unnamed: 1_level_1
Amazon Prime Video,1829.4
Apple TV+,536.8
Disney+,645.3
HBO Max,719.7
Netflix,2444.0
Paramount+,359.7


In [61]:
title_revenue_sub = title_revenue_sub.merge(content_spend_22.reset_index(), left_on = ['platform'], right_on = ['Company'], how = 'left')

In [63]:
title_revenue_sub = title_revenue_sub.drop(['Company'], axis = 1).rename(columns = {'Value':'Subscribers', 'Q4 2022':'content_spend'})

In [68]:
title_revenue_sub['ROI'] = title_revenue_sub['title_revenue']/title_revenue_sub['content_spend']/1000

In [69]:
pd.set_option('precision', 2)

In [70]:
title_revenue_sub

Unnamed: 0,platform,title_revenue,Subscribers,title_revenue_per_sub,content_spend,ROI
0,Amazon Prime Video,1740000.0,57187,30.45,1829.4,0.95
1,Apple TV+,130000.0,11672,11.16,536.8,0.24
2,Discovery+,66200.0,14994,4.41,,
3,Disney+,1060000.0,42659,24.96,645.3,1.65
4,HBO Max,2110000.0,39250,53.75,719.7,2.93
5,Hulu,1650000.0,48000,34.38,,
6,Netflix,2720000.0,65806,41.38,2444.0,1.11
7,Paramount+,998000.0,35192,28.35,359.7,2.77
8,Peacock,1070000.0,30957,34.63,,


# AVOD/FAST

In [98]:
revenue_2022 = revenue[(revenue['Company'].isin(['PlutoTV', 'Tubi', 'Roku TV']))
                      &(revenue['Year'] == 2022)
                      &(revenue['Quarter'] == 'Q4')]

In [149]:
revenue_2022

Unnamed: 0,Region,Country,Company,Business Line,Business model,Distribution/Technology,Digital/Analogue,Year,Quarter,Value
5859,North America,USA,PlutoTV,Online Video Advertising,Advertising,-,-,2022,Q4,223462
5942,North America,USA,Roku TV,Online Video Advertising,Advertising,-,-,2022,Q4,255558
6884,North America,USA,Tubi,Online Video Advertising,Advertising,-,-,2022,Q4,150496


In [99]:
subscriber_2022 = subscriber[(subscriber['Company'].isin(['PlutoTV', 'Tubi', 'Roku TV']))
                      &(subscriber['Year'] == 2022)
                      &(subscriber['Quarter'] == 'Q4')]

In [100]:
revenue_2022['Value'] = revenue_2022['Value'].str.replace(',', '').astype(int)
subscriber_2022['Value'] = subscriber_2022['Value'].str.replace(',', '').astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [76]:
title_popularity = run_query('''
SELECT imdb_id, SUM(PAGE_VIEWS) AS total_page_views
FROM max_prod.ckg.wikipedia_page_views_media
WHERE DATE >= '2022-10-01'
AND DATE < '2023-01-01'
AND LANGUAGE = 'en'
GROUP BY 1
''')

In [119]:
title_availabity = pd.read_csv('fast titles.csv', encoding= 'unicode_escape')

In [120]:
title_availabity = title_availabity.rename(columns = {'IMDb ID':'imdb_id', 'Platform':'platform', 'Title':'title', 'First Release Year':'production_release_date',
                                                     'Primary Genre':'primary_genre', 'Scripted':'scripted', 'Original':'is_original'})

In [135]:
title_availabity = title_availabity[title_availabity['platform'].isin(['Pluto TV (on-demand)', 'Tubi', 'Roku', ])]

In [136]:
title_availabity = title_availabity[['platform', 'title', 'imdb_id', 'production_release_date', 'primary_genre', 'scripted', 'is_original']]\
                    .drop_duplicates()

In [137]:
title_popularity = title_popularity[title_popularity['imdb_id'].isin(title_availabity.imdb_id)]

In [139]:
title_revenue = title_popularity.copy()
title_revenue['sum_page_views'] = title_popularity.total_page_views.sum()
title_revenue['revenue'] = revenue_2022.Value.sum()
# title_revenue['total_subs'] = subscriber_2022.Value.sum()

In [140]:
title_revenue['title_revenue'] = title_revenue['total_page_views']/title_revenue['sum_page_views'] * title_revenue['revenue']

In [141]:
# title_revenue['title_revenue_per_sub'] = title_revenue['title_revenue']/title_revenue['total_subs']

In [142]:
title_revenue.head()

Unnamed: 0,imdb_id,total_page_views,sum_page_views,revenue,title_revenue
20,tt2531334,7307,223244273,629516,20.6
27,tt2168288,990,223244273,629516,2.79
34,tt0385278,7936,223244273,629516,22.38
53,tt3993894,4883,223244273,629516,13.77
83,tt1950235,67065,223244273,629516,189.11


In [143]:
title_availabity = title_availabity.merge(title_revenue[['imdb_id', 'title_revenue',]],
                                          on = ['imdb_id'], how = 'left')

In [144]:
title_availabity.head()

Unnamed: 0,platform,title,imdb_id,production_release_date,primary_genre,scripted,is_original,title_revenue
0,Pluto TV (on-demand),ÁBoom! (Colombia),-,2017,Entertainment,Unscripted,Not An Original,
1,Pluto TV (on-demand),ÁBoom! (Colombia): Season 1,-,2017,Entertainment,Unscripted,Not An Original,
2,Pluto TV (on-demand),Áboom! Mxico,-,[Not given],Comedy,Unscripted,Not An Original,
3,Pluto TV (on-demand),Áboom! Mxico: Season 1,-,[Not given],Comedy,Unscripted,Not An Original,
4,Tubi,Áboom! Spain,-,2015,Children & Family,Scripted,Not An Original,


In [145]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).groupby(['platform']).first().reset_index()\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
0,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2934.18
1,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
2,Tubi,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39


In [146]:
title_availabity.sort_values(by = ['title_revenue'], ascending = False).head(10)\
[['platform', 'title', 'production_release_date', 'primary_genre', 'scripted', 'is_original', 'title_revenue']]

Unnamed: 0,platform,title,production_release_date,primary_genre,scripted,is_original,title_revenue
64960,Roku,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
64961,Tubi,Terrifier,2018,Crime & Thriller,Scripted,Not An Original,4599.39
69455,Pluto TV (on-demand),The Godfather,1972,Crime & Thriller,Scripted,Not An Original,2934.18
66376,Roku,The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
66377,Tubi,The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
66375,Pluto TV (on-demand),The Blair Witch Project,1999,Horror,Scripted,Not An Original,2504.33
32293,Tubi,Hellraiser,1987,Crime & Thriller,Scripted,Not An Original,2410.62
65316,Pluto TV (on-demand),The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37
65317,Roku,The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37
65318,Tubi,The Addams Family (1964),1964,Horror,Scripted,Not An Original,2311.37


In [147]:
title_revenue_sub = title_availabity.groupby(['platform']).sum()[['title_revenue']].reset_index()\
                .merge(subscriber_2022.groupby(['Company']).sum()[['Value']],
                       left_on = ['platform'], right_on = ['Company'], how = 'left')
# title_revenue_sub['title_revenue_per_sub'] = title_revenue_sub['title_revenue']/title_revenue_sub['Value']

In [148]:
title_revenue_sub.rename(columns = {'Value':'Subscribers'})

Unnamed: 0,platform,title_revenue,Subscribers
0,Pluto TV (on-demand),555759.93,
1,Roku,655894.04,
2,Tubi,663633.83,
