In [208]:
import numpy as np
import pandas as pd

import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.stats import diagnostic

from scipy import stats
import scipy.fft as sf

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

pd.options.plotting.backend = "plotly" 

In [209]:
def ft(df, metric):
    N = df.shape[0]

    # Fourier transform
    fft = np.abs(sf.rfft(df[metric].to_numpy()))/N
    freqs = sf.rfftfreq(N, d=0.1)[::-1]

    return freqs, fft

# ***Timeseries***

### *Sample Analysis*

In [210]:
sample_df = pd.read_parquet('data/df_sample_timeseries.parquet')
sample_df.shape

(5390, 18)

In [211]:
def group_by_game(df):
    grouped_df = df.groupby(['game_category', 'datetime']).mean().reset_index().groupby(['game_category'])
    games = list(grouped_df.groups.keys())
    return games, grouped_df


def timeseries_correlations(df, groups, feature, verbose=False):
    corrs = np.zeros(shape=(len(groups),len(groups)))
    for i, (category_a, category_a_df) in enumerate(df):
        for j, (category_b, category_b_df) in enumerate(df):   
            corr = stats.pearsonr(category_a_df[feature], category_b_df[feature])
            corrs[i,j] = round(corr[0], 2)
            if verbose:
                print('-'*30)
                print(f"{category_a}-{category_b}")
                print(f"Delta subs Corr. : {corr}")
    return corrs

In [212]:
def remove_games(df, games):
    if type(games) == str:
        games = [games]
    return df[~df.game_category.isin(games)]

def select_channels(df, channels):
    if type(channels) == str:
        channels = [channels]
    return df[df.channel_name.isin(channels)]

In [213]:
# Fortnite Release Date
F_date = '2017-07-21'

sample_df = remove_games(sample_df, 'Gaming News')

# Samples
sample_beforeF = remove_games(sample_df, 'Fortnite')[sample_df.datetime < F_date]
sample_withF = sample_df[sample_df.datetime >= F_date]

games_beforeF, sample_beforeF = group_by_game(sample_beforeF)
games_withF, sample_withF = group_by_game(sample_withF)


Boolean Series key will be reindexed to match DataFrame index.



##### ***Before Fortnite***

In [214]:
# Create a color palette for each category
colors = dict(zip(games_withF, px.colors.qualitative.Plotly[:len(games_withF)]))

In [215]:
metric = 'delta_views'
metric_txt = 'Delta Subscriptions' if metric == 'delta_subs' else 'Delta Views'

In [216]:
fig = go.Figure()

for (game, game_df) in sample_beforeF:
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df[metric],
        mode='lines',
        name=game,
        line=dict(color=colors[game]),
    ))

fig.update_layout(
        title=f"Main Games: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

fig.write_html(f"plots/dviews_beforeF.html")

fig.show()

In [217]:
fig = go.Figure()

for (game, game_df) in sample_withF:
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df[metric],
        mode='lines',
        name=game,
        line=dict(color=colors[game]),
    ))

max_y = 25_000_000
br_out = pd.to_datetime('2017-09-26')

# Add a vertical line for Fortnite Battle-Royale Mode
fig.add_vline(
        x=br_out,
        line_dash='dash',
        line_color='grey',
    )
fig.add_annotation(
        x=br_out, 
        y=max_y*0.8,
        text="Battle-Royale Mode Out",
        font=dict(color='grey', size=10),
        showarrow=False,
        xshift=-10,
        textangle=-90
    )

# Plot Titles & Axis Settings
fig.update_layout(
        title=f"Main Games: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

fig.write_html(f"plots/dviews_withF.html")

fig.show()

In [218]:
fig = go.Figure()

for (game, game_df) in sample_withF:
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df[metric].pct_change(),
        mode='lines',
        name=game,
        line=dict(color=colors[game]),
    ))

max_y = 1
br_out = pd.to_datetime('2017-09-26')

# Add a vertical line for Fortnite Battle-Royale Mode
fig.add_vline(
        x=br_out,
        line_dash='dash',
        line_color='grey',
    )
fig.add_annotation(
        x=br_out, 
        y=max_y*0.8,
        text="Battle-Royale Mode Out",
        font=dict(color='grey', size=10),
        showarrow=False,
        xshift=-10,
        textangle=-90
    )

# Plot Titles & Axis Settings
fig.update_layout(
        title=f"Main Games: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

fig.write_html(f"plots/dviews_withF.html")

fig.show()

In [219]:
sample_withF['delta_views'].describe().T

game_category,COD,FIFA,Fortnite,GTA,LOL,Minecraft,Mobile Games
count,114.0,114.0,114.0,114.0,114.0,114.0,114.0
mean,1304813.0,3054835.0,19892140.0,4061399.0,1473028.0,22004490.0,879616.0
std,623199.7,750682.8,10783920.0,715877.2,376969.3,4221552.0,364275.5
min,431852.7,1961598.0,1772967.0,2357766.0,786062.3,12892360.0,349793.1
25%,829062.8,2560916.0,12597560.0,3688860.0,1225581.0,19123730.0,596659.2
50%,1069899.0,2891925.0,21541530.0,4144359.0,1419759.0,21442680.0,845819.9
75%,1731089.0,3334220.0,26838180.0,4583120.0,1634407.0,23853710.0,1051523.0
max,3346936.0,6698777.0,42411450.0,5459258.0,3214658.0,34672880.0,1972783.0


In [220]:
fig = px.imshow(timeseries_correlations(sample_beforeF, games_beforeF, metric), 
                x=list(games_beforeF),
                y=list(games_beforeF),
                text_auto=True,
                title=f'Main Games: Weekly Delta Views Correlation')

fig.write_html('plots/dviews_corr_beforeF.html')

fig.show()

In [221]:
fig = px.imshow(timeseries_correlations(sample_withF, games_withF, metric)[games_withF.index('Fortnite'), np.newaxis], 
                x=list(games_withF),
                y=['Fortnite'],
                text_auto=True,
                title=f'Fortnite - Main Games: Weekly Delta Views Correlation')

fig.write_html('plots/dviews_corr_withF.html')

fig.show()

In [222]:
sample_df.channel_name.unique()

array(['PopularMMOs', 'Klaus Gaming', 'Sky Does Everything', 'Castro1021',
       'AA9skillz', 'KjraGaming', 'LispyJimmy', 'Pianta', 'speedyw03',
       'Miniminter', 'Orange Juice Gaming', 'FaZe Clan', 'SSundee',
       'TheGamingRevolution', 'Past Amazing', 'NepentheZ', 'Logdotzip',
       'Ali-A', 'MrDalekJD', 'KingStix', 'I JACK SPARROW COC',
       'MagikarpUsedFly', 'XpertThief', 'Ninja', 'ZwebackHD', 'DanTDM',
       'TheXclusiveAce', 'Lachlan', 'Galadon Gaming', 'LazarBeam', 'Tfue',
       'Trick2G', 'RaidAway', 'DarkViperAU', 'SkinSpotlights'],
      dtype=object)

In [239]:
games = sample_df.game_category.unique()
titles = np.array([['# Subscribers', '# Videos', 'Rank'] for _ in games]).flatten()

cols = 3
fig = make_subplots(1, cols, subplot_titles=titles)

for i, game_category in enumerate(sample_df.game_category.unique()):
    subset = sample_df[sample_df['game_category'] == game_category].drop_duplicates(subset=['channel_name']).sort_values(by='subscriber_rank_sb')

    # Simple bar plot
    fig.add_trace(
        go.Bar(
            x=subset['channel_name'], 
            y=subset['subscribers_cc'],
            name=game_category,
            visible=game_category=='Fortnite'
        ),
        row=1,
        col=1
    )

    # Simple bar plot
    fig.add_trace(
        go.Bar(
            x=subset['channel_name'], 
            y=subset['videos_cc'],
            name=game_category,
            visible=game_category=='Fortnite'
        ),
        row=1,
        col=2,
    )

    # Simple bar plot
    fig.add_trace(
        go.Bar(
            x=subset['channel_name'], 
            y=subset['subscriber_rank_sb'],
            name=game_category,
            visible=game_category=='Fortnite'
        ),
        row=1,
        col=3
    )

# Update layout to add dropdown button
fig.update_layout(
    updatemenus=[
        dict(
            active=list(games).index('Fortnite'),
            type='dropdown',
            x=1,
            y=1.27,
            buttons=[
                dict(label=game, method='update',
                     args=[{'visible': [game == trace.name for trace in fig.data]},
                           {'title': f'{game} Top 5 YouTubers Statistics'}])
                for game in games
            ],
        ),
    ]
)

fig.update_layout(
    title='Fortnite Top 5 YouTubers Statistics',
    showlegend=False
)

fig.write_html(f"plots/top5_stats.html")

fig.show()

In [294]:
fig = go.Figure()
metrics = ['subscriber_rank_sb', 'subscribers_cc', 'videos_cc']
metrics_txt = {'subscriber_rank_sb': 'Rank', 'subscribers_cc': '# Subscribers', 'videos_cc': '# Videos'}

for i, metric in enumerate(metrics):
    fig.add_trace(
        go.Bar(
            x=games_withF,
            y=sample_withF[metric].mean(),
            width=0.75,
            name=metric,
        )
    )


# Update layout to add dropdown button
fig.update_layout(
    updatemenus=[
        dict(
            active=1,
            type='dropdown',
            x=1,
            y=1.27,
            buttons=[
                dict(label=metrics_txt[metric], method='update',
                     args=[{'visible': [metric == trace.name for trace in fig.data]},
                           {'title': f'YouTuber Mean {metrics_txt[metric]} Per Game'}])
                for metric in metrics
            ],
        ),
    ]
)

fig.update_layout(
    title='YouTuber Mean # Subscribers Per Game',
    showlegend=False
)

fig.write_html(f"plots/game_mean_stats.html")

fig.show()

In [296]:
sample_df

Unnamed: 0,channel,category,datetime,views,delta_views,subs,delta_subs,videos,delta_videos,activity,category_cc,join_date,channel_name,subscribers_cc,videos_cc,subscriber_rank_sb,weights,game_category
1,UCpGdL9Sn3Q5YWUH2DVUW1Ug,Gaming,2016-10-17,6.756017e+09,4.745762e+07,9.224396e+06,73457.375,3266,11,26,Gaming,2012-04-17,PopularMMOs,16900000,4521,187.0,2.0870,Minecraft
4527,UCN35DM_vPpMz6zPBxeBRWIA,Gaming,2016-10-17,9.580426e+05,4.905162e+04,9.970750e+03,829.500,260,2,5,Gaming,2015-11-03,Klaus Gaming,427000,1400,40344.0,3.3215,Mobile Games
2971,UCKlhpmbHGxBE6uw9B_uLeqQ,Gaming,2016-10-17,3.388057e+09,2.498308e+06,1.208029e+07,0.000,1692,0,14,Gaming,2011-02-28,Sky Does Everything,11600000,1947,400.0,2.0870,Minecraft
1250,UCJk0D9no2q29C2FGaPXrG4g,Gaming,2016-10-17,2.468123e+07,2.452740e+05,4.842880e+05,0.000,141,0,3,Gaming,2014-02-08,Castro1021,1450000,602,9074.0,2.5455,FIFA
941,UCL7vy7MDOq9-tE-r6taQBlw,Gaming,2016-10-17,2.760783e+08,1.830575e+06,1.214710e+06,8924.875,2154,7,16,Gaming,2011-03-08,AA9skillz,1850000,3273,7384.0,2.5350,FIFA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3903,UCuSrv3qgQA7SSi6R9bWag5A,Gaming,2019-09-23,3.966678e+08,1.728653e+06,1.080000e+06,0.000,2023,4,9,Gaming,2009-03-23,Trick2G,1080000,2025,14445.0,2.8430,LOL
1559,UCEe076nFuVobN0bAsXK7ICw,Gaming,2019-09-23,1.571503e+09,3.555959e+05,6.400000e+06,0.000,1869,0,0,Gaming,2006-06-11,speedyw03,6400000,1872,1186.0,2.0870,GTA
3748,UCaMi81Bt9geDCcow-hHrP_Q,Gaming,2019-09-23,1.100189e+08,8.298482e+05,7.745139e+05,3402.875,1889,5,9,Gaming,2013-06-15,RaidAway,773000,1888,20149.0,2.8690,COD
3592,UCWBQKvXstY4yCLQpGvfB1Ug,Gaming,2019-09-23,4.834162e+07,6.241375e+03,1.531202e+05,0.000,840,0,0,Gaming,2013-11-15,I JACK SPARROW COC,153981,841,117055.0,4.4920,Mobile Games


In [303]:
sample_df['join_year'] = pd.to_datetime(sample_df['join_date']).dt.year

df_counts = sample_df.drop_duplicates(subset=['channel_name']).groupby(['join_year', 'game_category']).size().reset_index(name='count')

all_years = list(range(2005, 2017))

color_palette = px.colors.qualitative.Plotly

fig = px.bar(df_counts, x='join_year', y='count', color='game_category',
             labels={'join_year': 'Join Year', 'count': 'Frequency'},
             text='count',
             title='Distribution of Join Dates by Game Category (Per Year)',
             color_discrete_sequence=color_palette)

fig.update_xaxes(categoryorder='array', categoryarray=all_years)

fig.write_html(f"plots/join_dates.html")

fig.show()

## Sep

In [224]:
mf, mf_df = group_by_game(pd.read_parquet('data/mf_timeseries.parquet'))
mf

['Fortnite Official Channel', 'Marshmello']

In [225]:
fig = go.Figure()

for (game, game_df) in mf_df:
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df[metric],
        mode='lines',
        name=game,
    ))

date = '2019-02-02'

# Add a vertical line for Fortnite Battle-Royale Mode
fig.add_vline(
        x=date,
        line_dash='dash',
        line_color='grey',
    )
fig.add_annotation(
        x=date, 
        y=150_000_000,
        text="Marshmello Concert",
        font=dict(color='grey', size=10),
        showarrow=False,
        xshift=-10,
        textangle=-90
    )


fig.update_layout(
        title=f"Marshmello: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

fig.write_html(f"plots/marshmello.html")

fig.show()

In [226]:
pubg, pubg_df = group_by_game(pd.read_parquet('data/pubg_timeseries.parquet'))

In [227]:
fig = go.Figure()

for (game, game_df) in pubg_df:
    display(game_df)
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df['delta_subs'],
        mode='lines',
        name=game,
    ))

fig.update_layout(
        title=f"Main Games: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

#fig.write_html(f"plots/dviews_beforeF.html")

fig.show()

Unnamed: 0,game_category,datetime,views,delta_views,subs,delta_subs,videos,delta_videos,activity,subscribers_cc,videos_cc,subscriber_rank_sb,weights
0,Fortnite,2016-10-10 00:00:00,7.047610e+06,9.470625e+03,5.225712e+04,0.000000,278.0,0.0,0.0,22400000.0,929.0,104.0,2.087
1,Fortnite,2016-10-17 00:00:00,7.061097e+06,1.348738e+04,5.252300e+04,265.875000,278.0,0.0,0.0,22400000.0,929.0,104.0,2.087
2,Fortnite,2016-10-24 00:00:00,7.073612e+06,1.251538e+04,5.278850e+04,265.500000,278.0,0.0,0.0,22400000.0,929.0,104.0,2.087
3,Fortnite,2016-10-30 23:00:00,7.086616e+06,1.300379e+04,5.316472e+04,376.215026,278.0,0.0,0.0,22400000.0,929.0,104.0,2.087
4,Fortnite,2016-11-06 23:00:00,7.112401e+06,2.578447e+04,5.414773e+04,983.019349,282.0,4.0,4.0,22400000.0,929.0,104.0,2.087
...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,Fortnite,2019-08-26 00:00:00,1.925888e+09,6.314353e+06,2.237446e+07,26070.750000,919.0,2.0,6.0,22400000.0,929.0,104.0,2.087
151,Fortnite,2019-09-02 00:00:00,1.935571e+09,9.682280e+06,2.241391e+07,39453.750000,923.0,4.0,7.0,22400000.0,929.0,104.0,2.087
152,Fortnite,2019-09-09 00:00:00,1.938905e+09,3.334153e+06,2.241518e+07,1270.500000,924.0,1.0,6.0,22400000.0,929.0,104.0,2.087
153,Fortnite,2019-09-16 00:00:00,1.941802e+09,2.897669e+06,2.240384e+07,0.000000,925.0,1.0,3.0,22400000.0,929.0,104.0,2.087


Unnamed: 0,game_category,datetime,views,delta_views,subs,delta_subs,videos,delta_videos,activity,subscribers_cc,videos_cc,subscriber_rank_sb,weights
155,PUBG,2016-10-10 00:00:00,6.480921e+06,5599.125000,164082.375000,0.000000,31.0,0.0,0.0,256000.0,58.0,65935.0,3.999
156,PUBG,2016-10-17 00:00:00,6.495188e+06,14267.125000,165174.750000,1092.375000,31.0,0.0,0.0,256000.0,58.0,65935.0,3.999
157,PUBG,2016-10-24 00:00:00,6.512070e+06,16881.439119,166712.658031,1537.908031,31.0,0.0,0.0,256000.0,58.0,65935.0,3.999
158,PUBG,2016-10-30 23:00:00,6.526600e+06,14529.823834,167676.264249,963.606218,31.0,0.0,0.0,256000.0,58.0,65935.0,3.999
159,PUBG,2016-11-06 23:00:00,6.541634e+06,15034.726630,168805.770833,1129.506585,31.0,0.0,0.0,256000.0,58.0,65935.0,3.999
...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,PUBG,2019-08-26 00:00:00,7.726160e+06,8521.625000,256783.750000,0.000000,54.0,0.0,0.0,256000.0,58.0,65935.0,3.999
306,PUBG,2019-09-02 00:00:00,7.734498e+06,8337.875000,256641.000000,0.000000,54.0,0.0,0.0,256000.0,58.0,65935.0,3.999
307,PUBG,2019-09-09 00:00:00,7.743163e+06,8664.875000,256503.500000,0.000000,54.0,0.0,0.0,256000.0,58.0,65935.0,3.999
308,PUBG,2019-09-16 00:00:00,7.751091e+06,7928.375000,256304.500000,0.000000,54.0,0.0,0.0,256000.0,58.0,65935.0,3.999


In [228]:
fortnite, fortnite_df = group_by_game(pd.read_parquet('data/fortnite_timeseries.parquet'))
fortnite

['Ali-A', 'Lachlan', 'LazarBeam', 'Ninja', 'Tfue']

In [229]:
fig = go.Figure()

for (game, game_df) in fortnite_df:
    fig.add_trace(go.Scatter(
        x=game_df['datetime'],
        y=game_df['delta_views'],
        mode='lines',
        name=game,
    ))

fig.update_layout(
        title=f"Top 5 Fortnite YouTubers: Weekly {metric_txt} Timeseries",
        xaxis_title='Date',
        yaxis_title=metric_txt,
        showlegend=True
    )

fig.write_html(f"plots/fortnite_timeseries.html")

fig.show()