### Get the latest stream views by a user

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
stream_views_df = pd.read_csv("data/content_views_raw.csv", header=0)
stream_views_df.head()

Unnamed: 0,USERID,CARDID,CARDTITLE,STREAMID,STREAMNAME,TIMESTAMP,MODULEID,MODULENAME,ORGANIZATION
0,1019,3021,Best in Class,579,About HMD,2017-07-09 10:23:36,473,HMD,28
1,1054,3021,Best in Class,579,About HMD,2017-07-09 14:41:25,473,HMD,28
2,977,3021,Best in Class,579,About HMD,2017-07-09 13:35:38,473,HMD,28
3,1337,3021,Best in Class,579,About HMD,2017-07-10 05:53:05,473,HMD,28
4,1346,3021,Best in Class,579,About HMD,2017-07-10 11:23:59,473,HMD,28


In [11]:
stream_views_df['TIMESTAMP'] = stream_views_df['TIMESTAMP'].apply(lambda x: x[:-3])
stream_views_df['TIMESTAMP'].head()


0    2017-07-09 10:23
1    2017-07-09 14:41
2    2017-07-09 13:35
3    2017-07-10 05:53
4    2017-07-10 11:23
Name: TIMESTAMP, dtype: object

In [13]:
stream_views_df['TIMESTAMP'] = pd.to_datetime(stream_views_df['TIMESTAMP'], format="%Y-%m-%d %H:%M")
stream_views_df.head()

Unnamed: 0,USERID,CARDID,CARDTITLE,STREAMID,STREAMNAME,TIMESTAMP,MODULEID,MODULENAME,ORGANIZATION
0,1019,3021,Best in Class,579,About HMD,2017-07-09 10:23:00,473,HMD,28
1,1054,3021,Best in Class,579,About HMD,2017-07-09 14:41:00,473,HMD,28
2,977,3021,Best in Class,579,About HMD,2017-07-09 13:35:00,473,HMD,28
3,1337,3021,Best in Class,579,About HMD,2017-07-10 05:53:00,473,HMD,28
4,1346,3021,Best in Class,579,About HMD,2017-07-10 11:23:00,473,HMD,28


In [15]:
stream_views_df = stream_views_df.sort_values(by="TIMESTAMP", ascending=False)
stream_views_df.head()

Unnamed: 0,USERID,CARDID,CARDTITLE,STREAMID,STREAMNAME,TIMESTAMP,MODULEID,MODULENAME,ORGANIZATION
48857,2391,3074,Nokia 3310 Quiz,584,Nokia 3310 & 3310 3G,2017-08-09 23:58:00,487,Nokia 3310 & 3310 3G,28
237503,2698,4703,Nokia 3310 كيفية بيع هاتف,741,نوكيا 3310,2017-08-09 23:58:00,637,نوكيا 3310,28
237644,2698,4704,Nokia 3310,741,نوكيا 3310,2017-08-09 23:58:00,637,نوكيا 3310,28
145630,1668,3171,Nokia 5: Chipset,597,Quality & Performance,2017-08-09 23:58:00,489,Quality & Performance,28
44133,2391,3070,Nokia 3310 Feature Phone LOYALISTS,584,Nokia 3310 & 3310 3G,2017-08-09 23:58:00,487,Nokia 3310 & 3310 3G,28


In [16]:
stream_views_df = stream_views_df.sort_values(by="TIMESTAMP", ascending=True)
stream_views_df.head()

Unnamed: 0,USERID,CARDID,CARDTITLE,STREAMID,STREAMNAME,TIMESTAMP,MODULEID,MODULENAME,ORGANIZATION
192029,1246,3210,Nokia 6,580,Nokia 6,2017-07-09 00:23:00,474,Nokia 6,28
225344,1246,3277,A new incentive scheme is here!,605,Incentive Scheme,2017-07-09 00:23:00,497,Incentive Scheme,28
7786,1246,3029,Nokia 6 Colours,580,Nokia 6,2017-07-09 00:24:00,474,Nokia 6,28
13291,1246,3034,Nokia 6 Sales Pitch,580,Nokia 6,2017-07-09 00:24:00,474,Nokia 6,28
27640,1246,3059,Nokia 6 Key Selling Points,580,Nokia 6,2017-07-09 00:24:00,474,Nokia 6,28


In [24]:
from collections import Counter
def get_latest_streams_for_user(stream_views_df, userid, max_views_to_consider = 25, max_num_unique_streams = 5):
    
    """
        Get a dictionary of the streams with their views that were viewed most recently by the user
    """
    
    # sort the streams views in descending order by time
    stream_views_df = stream_views_df.sort_values(by="TIMESTAMP", ascending=False)
    stream_views_for_user_df = stream_views_df[stream_views_df["USERID"] == userid]
    
    streams_viewed = np.array(stream_views_for_user_df["STREAMID"])
    
    num_streams_viewed = len(streams_viewed)
    
    recent_stream_views = streams_viewed[:min(max_views_to_consider, num_streams_viewed)]
    
    stream_counts = Counter(recent_stream_views)
    
    num_unique_streams = min(max_num_unique_streams, len(stream_counts))
    return stream_counts.most_common(num_unique_streams)
    
    

In [26]:
get_latest_streams_for_user(stream_views_df, 1246, 100, 10)

[(587, 12), (597, 12), (580, 7), (589, 6), (590, 6), (605, 1)]