In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('modern-lux.mplstyle')

In [None]:
feast = pd.read_csv('Data/mr_feast_views.csv')

In [None]:
feast = feast.drop_duplicates().drop('License', axis = 1).reset_index(drop = True)

Build a visualization that compares the distributions of max views of each video for each category

In [None]:
# create a subset of the data that is just the maximum for each vid

vids = feast.groupby('video_id').max()
vids.head()

In [None]:
# manual version
fig, ax = plt.subplots()

vids[vids.category == 'Advert']['views'].plot(kind = 'hist', bins = 8, ax = ax);
# vids[vids.category == 'Reaction']['views'].plot(kind = 'hist', bins = 8, label = cat, ax = ax);
# vids[vids.category == 'Vlog']['views'].plot(kind = 'hist', bins = 8, label = cat, ax = ax);
# vids[vids.category == 'Challenge']['views'].plot(kind = 'hist', bins = 8, label = cat, ax = ax);

In [None]:
# manual version on one fig
fig, axs = plt.subplots(nrows = 2, ncols = 2)

vids[vids.category == 'Reaction']['views'].plot(kind = 'hist', bins = 8, ax = axs[0,0])
axs[0,0].set_title('Reaction')
vids[vids.category == 'Advert']['views'].plot(kind = 'hist', bins = 8, ax = axs[0,1])
axs[0,1].set_title('Advert')
vids[vids.category == 'Challenge']['views'].plot(kind = 'hist', bins = 8, ax = axs[1,0])
axs[1,0].set_title('Challenge')
vids[vids.category == 'Vlog']['views'].plot(kind = 'hist', bins = 8, ax = axs[1,1])
axs[1,1].set_title('Vlog');

In [56]:
for index, cat in enumerate(vids.category.unique()):
    print(f'{index}, {cat}')

0, Advert
1, Reaction
2, Challenge
3, Vlog


In [None]:
# looped version on one fig

import math

# get number of categories
no_of_cats = len(vids.category.unique())

# create fig
fig, axs = plt.subplots(nrows = math.ceil(no_of_cats / 2), ncols = 2)

# starting row
row = 0

for index, cat in enumerate(vids.category.unique()):

    # calculate column on whether index/plot number is odd (col 0) or even (col 1) using modulo
    col = 1 if index % 2 == 0 else 0

    # create plot for category on appropriate ax and set title
    vids[vids.category == cat]['views'].plot(kind = 'hist', bins = 8, ax = axs[row, col])
    axs[row, col].set_title(cat)

    # increase row number if plotting to second column using same logic as above, else remain on row
    row = row + 1 if index % 2 else row


In [None]:
# looped version on one ax

fig, ax = plt.subplots()
for cat in vids.category.unique():
    vids[vids.category == cat]['views'].plot(kind = 'hist', bins = 8, label = cat, ax = ax)
    ax.legend()

ax.ticklabel_format(style = 'plain')
ax.set_xlabel("Views");

For each video, apply a label ‘greater than 100000’ or ‘less than 100000’ depending on the total daily views.

In [None]:
feast['label'] = feast.views_change.apply(lambda x: 'Over 10k' if x > 100000 else 'Under 10k')

Build a visualization that compares for each category how many videos fall into the two labels you just made.}

In [None]:
fig,ax = plt.subplots()
feast.groupby(['category','label']).count()['views'].plot(kind = 'barh', color = ['#336699', '#d2aa87'], ax = ax)

ax.set_ylabel("Category")
ax.set_xlabel("No. of Videos");

Build a visualization that compares the relationship between maximum daily views and minimum daily views for each video.

In [None]:
vids_min = feast.groupby('video_id').min()

In [None]:
fig , ax = plt.subplots()
ax.plot(vids.views_change, vids_min.views_change,'.')
ax.set_xlabel('Max Views')
ax.set_ylabel('Min Views')
ax.set_title('Max vs min views per video');

In [None]:
x = vids.views_change
y = vids_min.views_change
a, b = np.polyfit(x, y, 1)

fig , ax = plt.subplots()

ax.plot(x, y, '.')

ax.plot(x, a * x + b, color = '#a6c3c1')

ax.set_xlabel('Max Views')
ax.set_ylabel('Min Views')
ax.set_title('Max vs min views per video');