https://thecleverprogrammer.com/2024/09/30/netflix-content-strategy-analysis-with-python/

In [137]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

In [138]:
netflix_content = pd.read_csv(r"D:\WORK\datascience\practice\Netflix-Content-Strategy-Analysis\netflix_content_2023.csv")
netflix_content

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie
...,...,...,...,...,...,...
24807,We Are Black and British: Season 1,No,,100000,English,Show
24808,Whitney Cummings: Can I Touch It?,Yes,2019-07-30,100000,English,Movie
24809,Whitney Cummings: Jokes,No,2022-07-26,100000,English,Movie
24810,"Whose Vote Counts, Explained: Limited Series",Yes,2020-09-28,100000,English,Movie


In [139]:
netflix_content.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24812 entries, 0 to 24811
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Title                24812 non-null  object
 1   Available Globally?  24812 non-null  object
 2   Release Date         8166 non-null   object
 3   Hours Viewed         24812 non-null  object
 4   Language Indicator   24812 non-null  object
 5   Content Type         24812 non-null  object
dtypes: object(6)
memory usage: 1.1+ MB


In [140]:
netflix_content['Hours Viewed'] = netflix_content['Hours Viewed'].replace(',','',regex=True).astype('float')
netflix_content

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000.0,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000.0,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000.0,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000.0,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000.0,English,Movie
...,...,...,...,...,...,...
24807,We Are Black and British: Season 1,No,,100000.0,English,Show
24808,Whitney Cummings: Can I Touch It?,Yes,2019-07-30,100000.0,English,Movie
24809,Whitney Cummings: Jokes,No,2022-07-26,100000.0,English,Movie
24810,"Whose Vote Counts, Explained: Limited Series",Yes,2020-09-28,100000.0,English,Movie


In [141]:
content_type = netflix_content.groupby('Content Type')['Hours Viewed'].sum()
content_type

Content Type
Movie    5.063780e+10
Show     1.077641e+11
Name: Hours Viewed, dtype: float64

In [142]:
fig = go.Figure(data=[go.Bar(x = content_type.index,
           y = content_type.values,
           marker_color=['skyblue', 'red']
           
    )
])


fig = fig.update_layout(
    title='Total Viewership Hours by Content Type (2023)',
    xaxis_title='Content Type',
    yaxis_title='Total Hours Viewed (in billions)',
    xaxis_tickangle=0,
    height=500,
    width=800
)

fig.show()

In [143]:
content_langauge = netflix_content.groupby('Language Indicator')['Hours Viewed'].sum()
content_langauge

Language Indicator
English        1.244417e+11
Hindi          9.261000e+08
Japanese       7.102000e+09
Korean         1.537840e+10
Non-English    1.043910e+10
Russian        1.146000e+08
Name: Hours Viewed, dtype: float64

In [144]:
fig = go.Figure(data = [go.Bar(x = content_langauge.index,
               y = content_langauge.values,
               marker_color = ['skyblue','red','blue','orange'])])


fig.update_layout(
    title='Total Viewership Hours by Language (2023)',
    xaxis_title='Language',
    yaxis_title='Total Hours Viewed (in billions)',
    xaxis_tickangle=45,
    height=600,
    width=1000
)
fig.show()

analyze how viewership varies based on release dates to identify any trends over time, such as seasonality or patterns around specific months

In [145]:
netflix_content['Release Date'] = pd.to_datetime(netflix_content['Release Date'])
netflix_content['release month'] = netflix_content['Release Date'].dt.month

monthly_viwership = netflix_content.groupby('release month')['Hours Viewed'].sum()
monthly_viwership

release month
1.0     7.271600e+09
2.0     7.103700e+09
3.0     7.437100e+09
4.0     6.865700e+09
5.0     7.094600e+09
6.0     8.522000e+09
7.0     6.524800e+09
8.0     6.817800e+09
9.0     7.262200e+09
10.0    8.123200e+09
11.0    7.749500e+09
12.0    1.005580e+10
Name: Hours Viewed, dtype: float64

In [146]:
fig = go.Figure(data=[
    go.Scatter(
    x = monthly_viwership.index,
    y = monthly_viwership.values,
    mode = 'lines+markers')
])

fig = fig.update_layout(
    xaxis_title = 'months',
    yaxis_title = 'monthly viwership',
        xaxis=dict(
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),
)

fig.show()

analyze the most successful content (both shows and movies) and understand the specific characteristics, such as genre or theme, that may have contributed to high viewership

In [147]:
top_content = netflix_content.nlargest(5,'Hours Viewed')
top_content

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type,release month
0,The Night Agent: Season 1,Yes,2023-03-23,812100000.0,English,Show,3.0
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000.0,English,Show,1.0
18227,King the Land: Limited Series // 킹더랜드: 리미티드 시리즈,Yes,2023-06-17,630200000.0,Korean,Movie,6.0
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000.0,Korean,Show,12.0
18214,ONE PIECE: Season 1,Yes,2023-08-31,541900000.0,English,Show,8.0


monthly viewership trends by content type

In [156]:
monthly_viwership_bytrends = netflix_content.pivot_table(index='release month',
                                                         columns='Content Type',
                                                         values='Hours Viewed',
                                                         aggfunc='sum'
                                                         )

fig = go.Figure()

for contents_type in monthly_viwership_bytrends.columns:
    fig.add_trace(
        go.Scatter(
            x=monthly_viwership_bytrends.index,
            y=monthly_viwership_bytrends[contents_type],
            mode='lines+markers',
            name=contents_type
        )
    )
fig.update_layout(
    title='Viewership Trends by Content Type and Release Month (2023)',
    xaxis_title='Month',
    yaxis_title='Total Hours Viewed (in billions)',
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),
    height=600,
    width=1000,
    legend_title='Content Type'
)

fig.show()