In [597]:
import numpy as np
import pandas as pd
from highcharts import Highchart
from IPython.display import HTML

In [598]:
def get_show(title):
    if ':' in title:
        return title.split(':')[0]
    else:
        return title


def convert_hours(time):
    return time.days * 24 + time.seconds/3600


def split_date(date):
    day_map = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday',
               3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
    return date.day, date.month, date.year, day_map[date.dayofweek]

In [599]:
view_act = pd.read_csv('Content_Interaction/ViewingActivity.csv')
search_act = pd.read_csv('Content_Interaction/SearchHistory.csv')

In [600]:
view_act['Show'] = view_act['Title'].apply(get_show)
view_act['Duration'] = pd.to_timedelta(view_act['Duration'])
view_act['Start Time'] = pd.to_datetime(view_act['Start Time'])
view_act['day'], view_act['month'], view_act['year'], view_act['dayofweek'] = list(
    zip(*view_act['Start Time'].map(split_date)))
view_act['Start Time'] = view_act['Start Time'].dt.tz_localize(
    'utc').dt.tz_convert('US/Mountain')
view_act['Start Hour'] = view_act['Start Time'].dt.hour

In [601]:
view_act.head()

Unnamed: 0,Profile Name,Start Time,Duration,Attributes,Title,Supplemental Video Type,Device Type,Bookmark,Latest Bookmark,Country,Show,day,month,year,dayofweek,Start Hour
0,Tony,2020-10-05 19:31:43-06:00,00:07:13,Autoplayed: user action: Unspecified;,Pokémon The Series: Indigo League: Season 1: S...,,Sony PS4,00:08:09,00:08:09,US (United States),Pokémon The Series,6,10,2020,Tuesday,19
1,Tony,2020-10-05 17:45:09-06:00,00:43:11,,Gilmore Girls: Season 4: Nag Hammadi Is Where ...,,Sony PS4,00:43:11,00:43:11,US (United States),Gilmore Girls,5,10,2020,Monday,17
2,Tony,2020-10-05 17:28:29-06:00,00:16:08,Autoplayed: user action: Unspecified;,Gilmore Girls: Season 4: A Family Matter (Epis...,,Sony PS4,00:43:08,00:43:08,US (United States),Gilmore Girls,5,10,2020,Monday,17
3,HijackerJo,2020-10-05 00:59:46-06:00,00:00:53,Autoplayed: user action: None;,Hart of Dixie: Season 1_hook_primary_16x9,HOOK,Roku Bryan-2 Set Top Box,00:00:53,00:00:53,US (United States),Hart of Dixie,5,10,2020,Monday,0
4,HijackerJo,2020-10-05 00:58:44-06:00,00:00:54,Autoplayed: user action: None;,Hart of Dixie: Season 1_hook_primary_16x9,HOOK,Roku Bryan-2 Set Top Box,00:00:54,Not latest view,US (United States),Hart of Dixie,5,10,2020,Monday,0


In [602]:
view_act.groupby('Profile Name')[
    'Device Type'].value_counts().unstack(level=0).head(10)

Profile Name,Band,HijackerJo,Kids,Sarah,Tony
Device Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Apple iPad 2 WiFi,,,,2.0,
Apple iPad 4 WiFi,55.0,,16.0,,
Apple iPad 6th Gen 9.7 (Wi-Fi) iPad,,,21.0,,
Apple iPad Air 3rd Gen (Wi-Fi),,10.0,3.0,,
Apple iPad mini,1237.0,,,,
Chrome PC (Cadmium),62.0,1.0,,,
DefaultWidevineAndroidPhone,16.0,,,,
DefaultWidevineAndroidPhone (samsung_SM-G928V),1.0,,,,
Edge (Cadmium),,,,,11.0
FireTV Stick 2016,12.0,,10.0,,


In [603]:
total_act_by_profile

Profile Name
Tony         182 days 21:56:32
Sarah         89 days 07:43:58
HijackerJo    64 days 22:45:31
Band          49 days 15:18:35
Kids          13 days 01:38:51
Name: Duration, dtype: timedelta64[ns]

In [604]:
total_act_by_profile = view_act.groupby(
    'Profile Name').Duration.sum().sort_values(ascending=False)

chart = Highchart()

options = {
    'chart': {
        'type': 'column'
    },
    'title': {
        'text': 'Cumulative Watch Time (2014-2020)'
    },
    'subtitle':{
        'text': "Click column to see favorite shows"
    },
    'legend': {
        'enabled': False
    },
    'yAxis': {
        'title': {
            'text': 'Days'
        }
    },
    'xAxis':{
        'type': 'category'
    }
}

chart.set_dict_options(options)
#chart.set_options(
#    'xAxis', {'categories': total_act_by_profile.index.to_list()})
data = list()
for profile in total_act_by_profile.index:
    data.append({'y':total_act_by_profile[profile].days, 'name':profile, 'drilldown':profile, 'valueSuffix':'test'})
#chart.add_data_set({'y':total_act_by_profile[profile].days, 'name':profile})#, 'drilldown':profile})
#print(data)
chart.add_data_set(data, 'column', 'watch time', colorByPoint=True)

fav_shows = view_act.groupby(['Profile Name', 'Show'])[
    'Duration'].sum().apply(convert_hours).unstack(level=0)
dd_data = list()
for profile in fav_shows.columns:
    top10 = fav_shows[profile].sort_values(ascending=False)[:10]
    data = [list(x) for x in zip(top10.index, (top10/24).to_list())]
    #print(profile)
    #print(data)
    chart.add_drilldown_data_set(data, 'column', profile, name=profile)

chart.add_JSsource("https://code.highcharts.com/6/modules/drilldown.js")
chart

In [605]:
watch_time_month = view_act.groupby(
    ['Profile Name', 'year', 'month']).Duration.sum().apply(convert_hours).unstack(level=0)

chart = Highchart()

options = {
    'plotOptions':{
        'line':{
            'marker':{
                'enabled':False
            }
        }
    },
    'title':{
        'text': 'Watch Time'
    },
    'yAxis':{
        'title':{
            'text':'Hours'
        }
    },
    'tooltip':{
        'valueDecimals': 1,
        'valueSuffix': ' Hours'
    }
}
chart.set_dict_options(options)
chart.set_options('xAxis', {'categories': watch_time_month.index.to_list()})
for profile in watch_time_month.columns:
    chart.add_data_set(watch_time_month[profile].to_list(), series_type='line', name=profile)
chart

In [664]:
dayofweek_mean = view_act.groupby(
    ['Profile Name', 'dayofweek']).Duration.sum().unstack(level=0)
#days = list(dayofweek_mean['dayofweek'].unique())
# dayofweek_mean
#tony_dayofweek_mean = dayofweek_mean[dayofweek_mean['Profile Name'] == 'Tony']
days = ['Monday', 'Tuesday', 'Wednesday',
        'Thursday', 'Friday', 'Saturday', 'Sunday']
dayofweek_mean = dayofweek_mean/dayofweek_mean.max()
data = list()
for ix in range(len(dayofweek_mean.columns)):
    for iy in reversed(range(len(dayofweek_mean.index))):
        print( ix, iy, dayofweek_mean.iat[iy, ix])
        data.append([ix, iy, dayofweek_mean.iat[iy, ix]])
print(data)

0 6 0.8286118178405658
0 5 0.9493433870977352
0 4 0.8889507897825789
0 3 1.0
0 2 0.9978534954805727
0 1 0.8030057021646776
0 0 0.9206776459652866
1 6 0.737274934531828
1 5 0.7701272407202095
1 4 0.5910534116964502
1 3 0.9848211198931845
1 2 1.0
1 1 0.8345254699998808
1 0 0.6569577069648598
2 6 0.7935155119533084
2 5 0.3408094878736058
2 4 0.47975587972610895
2 3 0.2975968761466937
2 2 0.5322355075223107
2 1 0.4648185714186808
2 0 1.0
3 6 0.8683454677353836
3 5 0.9063862058831229
3 4 0.9924702241957809
3 3 0.8906831270020497
3 2 1.0
3 1 0.9595599692255462
3 0 0.9001164598568194
4 6 0.4729196333163316
4 5 0.5630459693183995
4 4 0.4448933044688418
4 3 1.0
4 2 0.7734101835033892
4 1 0.7860054144000886
4 0 0.5189780954513039
[[0, 6, 0.8286118178405658], [0, 5, 0.9493433870977352], [0, 4, 0.8889507897825789], [0, 3, 1.0], [0, 2, 0.9978534954805727], [0, 1, 0.8030057021646776], [0, 0, 0.9206776459652866], [1, 6, 0.737274934531828], [1, 5, 0.7701272407202095], [1, 4, 0.5910534116964502], [1, 3

In [665]:
dayofweek_mean/dayofweek_mean.max()


Profile Name,Band,HijackerJo,Kids,Sarah,Tony
dayofweek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Friday,0.920678,0.656958,1.0,0.900116,0.518978
Monday,0.803006,0.834525,0.464819,0.95956,0.786005
Saturday,0.997853,1.0,0.532236,1.0,0.77341
Sunday,1.0,0.984821,0.297597,0.890683,1.0
Thursday,0.888951,0.591053,0.479756,0.99247,0.444893
Tuesday,0.949343,0.770127,0.340809,0.906386,0.563046
Wednesday,0.828612,0.737275,0.793516,0.868345,0.47292


In [666]:
dayofweek_mean.applymap(convert_hours)

AttributeError: 'float' object has no attribute 'days'

In [667]:
chart = Highchart()


options = {
    'chart': {
        'type': 'heatmap'
    },
    'xAxis':{
        'categories': dayofweek_mean.columns.to_list()
    },
    'yAxis':{
        'categories': dayofweek_mean.index.to_list()
    },
    'colorAxis': {
        'min': 0,
        'minColor': '#FFFFFF',
        'maxColor': '#7CB5EC'
    }
}
chart.set_dict_options(options)
#chart.set_options('xAxis', {'categories': tony_dayofweek_mean['dayofweek'].to_list()})
chart.add_data_set(data, 'heatmap')
chart

In [668]:
chart = Highchart()

options = {
    'title': {
        'text': 'Time of Day'
    },
    'chart': {
        'polar': True,
        'type': 'line'
    },
    'yAxis': {
        'gridLineInterpolation': 'polygon',
        'lineWidth': 0,
        'min': 0
    },
    'xAxis': {
        'tickmarkPlacement': 'on',
        'lineWidth': 0
    },
    'tooltip': {
        'shared': True
    }
}

chart.set_dict_options(options)
chart.set_options('xAxis', {'categories': ['12AM', '1AM', '2AM', '3AM', '4AM', '5AM', '6AM', '7AM', '8AM',
                                           '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', '5PM',
                                           '6PM', '7PM', '8PM', '9PM', '10PM', '11PM']})
sum_pf_start = view_act.groupby(['Profile Name', 'Start Hour'])[
    'Start Time'].count().unstack(level=0)
normalized = sum_pf_start / sum_pf_start.max()
for profile in normalized.columns:
    chart.add_data_set(normalized[profile].to_list(), name=profile)

chart