In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sys

In [3]:
user_views_per_stream_df = pd.read_csv("data/user_views_per_stream.csv", header=0)
user_views_per_stream_df.head()

Unnamed: 0,USERID,STREAMID,num_views,day_of_week,time_labeled,num_cards,views_per_card
0,245,163,1029,Wednesday,Afternoon,15,68.6
1,245,167,258,Thursday,Afternoon,14,18.428571
2,245,171,123,Wednesday,Afternoon,6,20.5
3,245,172,83,Wednesday,Afternoon,4,20.75
4,245,173,167,Monday,Afternoon,8,20.875


In [4]:
unique_users = np.unique(user_views_per_stream_df["USERID"].values)
unique_users.shape

(219,)

In [5]:
unique_streams = np.unique(user_views_per_stream_df["STREAMID"].values)
unique_streams.shape
num_unique_streams = unique_streams.shape[0]
num_unique_streams

153

## Code to predict nudge based on combination

In [6]:
stream_views_grouped_stream_df = pd.read_csv("../../nudge_framework/generated/stream_views_grouped_stream.csv", header=0)
stream_views_grouped_user_df = pd.read_csv("../../nudge_framework/generated/stream_views_grouped_user.csv", header=0)
stream_views_grouped_stream_df.head()

Unnamed: 0,STREAMID,time_concatenated,total_num_views,day_of_week,time_labeled,day_of_week_conf,time_labelled_conf
0,163,1345.566799,1759,Wednesday,Afternoon,0.312109,0.371802
1,167,1528.027076,554,Thursday,Afternoon,0.442238,0.323105
2,171,1278.251046,239,Wednesday,Afternoon,0.41841,0.209205
3,172,1339.262032,187,Thursday,Afternoon,0.331551,0.251337
4,173,1314.365729,391,Thursday,Afternoon,0.309463,0.355499


In [7]:
stream_views_grouped_user_df.head()

Unnamed: 0,USERID,time_concatenated,total_num_views,day_of_week,time_labeled,day_of_week_conf,time_labelled_conf
0,245,1567.50135,4075,Thursday,Afternoon,0.31411,0.275092
1,246,1704.980989,263,Thursday,Evening,0.802281,0.068441
2,247,1256.211226,677,Wednesday,Afternoon,0.39291,0.267356
3,248,1389.285714,21,Wednesday,Afternoon,0.714286,0.714286
4,249,1401.19907,1075,Thursday,Afternoon,0.282791,0.214884


In [9]:
actual_user_views_df = pd.read_csv("../../nudge_framework/generated/stream_views_grouped_by_userid_streamid.csv", header=0)
actual_user_views_df.head()

Unnamed: 0,USERID,STREAMID,time_concatenated,total_num_views,day_of_week,time_labeled
0,245,163,1414.703596,1029,Wednesday,Afternoon
1,245,167,1633.251938,258,Thursday,Evening
2,245,171,1419.065041,123,Wednesday,Afternoon
3,245,172,1486.048193,83,Wednesday,Afternoon
4,245,173,1482.48503,167,Monday,Afternoon


In [8]:
def predict_day_time_for_nudge(userid, streamid, choose_best=True):
    # get the details from the user df
    user_df = stream_views_grouped_user_df[stream_views_grouped_user_df["USERID"] == userid]
    
    # get the details from the stream df
    stream_df = stream_views_grouped_stream_df[stream_views_grouped_stream_df["STREAMID"] == streamid]
    
    if user_df.empty and stream_df.empty:
        raise ValueError("Both user and stream do not exist yet.")
    elif user_df.empty:
        return stream_df["day_of_week"].values[0], stream_df["time_labeled"].values[0], stream_df
    elif stream_df.empty:
        return user_df["day_of_week"].values[0], user_df["time_labeled"].values[0], user_df
    else:
        
        if choose_best:
            user_day_of_week_conf = user_df["day_of_week_conf"].values[0]
            stream_day_of_week_conf = stream_df["day_of_week_conf"].values[0]
            day_to_recommend = user_df["day_of_week"].values[0]
            if stream_day_of_week_conf > user_day_of_week_conf:
                day_to_recommend = stream_df["day_of_week"].values[0]

            user_time_conf = user_df["time_labelled_conf"].values[0]
            stream_time_conf = stream_df["time_labelled_conf"].values[0]
            time_to_recommend = user_df["time_labeled"].values[0]
            if stream_time_conf > user_time_conf:
                time_to_recommend = stream_df["time_labeled"].values[0]
        else:
            day_to_recommend_by_stream = stream_df["day_of_week"].values[0]
            day_to_recommend_by_user = user_df["day_of_week"].values[0]
            
            time_to_recommend_by_stream = user_df["time_labeled"].values[0]
            time_to_recommend_by_user = stream_df["time_labeled"].values[0]
            
            day_to_recommend = (day_to_recommend_by_stream, day_to_recommend_by_user)
            time_to_recommend = (time_to_recommend_by_stream, time_to_recommend_by_user)
            
        return day_to_recommend, time_to_recommend, None

In [10]:
nudge_predictions_time_of_day = []
nudge_predictions_day = []

for itr in [10, 15, 20, 25, 30]:
    correct_time_of_day_ratio = 0
    correct_day_ratio = 0    
    for jtr in range(10): 
        correct_time_of_day = 0
        correct_day = 0
        selected_users = np.random.choice(unique_users, itr, False)
        selected_streams = np.random.choice(unique_streams, itr, False)
        for userid in selected_users:
            for streamid in selected_streams:
                predicted_day, predicted_time, _ = predict_day_time_for_nudge(userid, streamid)
                actual_day = actual_user_views_df[(actual_user_views_df["USERID"] == userid) & (actual_user_views_df["STREAMID"] == streamid)]["day_of_week"].values[0]
                actual_time = actual_user_views_df[(actual_user_views_df["USERID"] == userid) & (actual_user_views_df["STREAMID"] == streamid)]["time_labeled"].values[0]
                if predicted_day == actual_day:
                    correct_day += 1
                if predicted_time == actual_time:
                    correct_time_of_day += 1
        
        correct_time_of_day_ratio += (correct_time_of_day)/(itr*itr)
        correct_day_ratio += (correct_day)/(itr * itr)
        
    nudge_predictions_time_of_day.append((itr, correct_time_of_day_ratio/10))
    nudge_predictions_day.append((itr, correct_day_ratio/10))

ValueError: too many values to unpack (expected 2)