In [None]:
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from pathlib import Path

fig_dir = Path('../figures')

def visualize_cumulative_rewards(results_dict,
                                fig_dir,
                                fig_name,
                                dataset):
    
    fig, ax = plt.subplots(figsize=(10,6))
    for policy, rewards in results_dict.get('policy_feedback').items():
        
        if dataset.lower() == 'zozo':
            cumulative_rewards = rewards.get('reward').cumsum() 
            colors = ['#333333', '#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF']
            
            ax.set_xlabel("Number of Rounds")
            
        elif dataset.lower() == 'deezer':
            cumulative_rewards = pd.DataFrame(rewards).groupby("batches").sum().cumsum()['reward']
            ax.set_xlabel("Number of Batches")
            colors = ['#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF']
            
        else: 
            print('Please Set `Dataset` to either `ZOZOTOWN` or `Deezer` ')
            return 
        
        ax.plot(cumulative_rewards, alpha = 0.8, label=policy)
    
    for i in range(len(colors)):
        plt.gca().get_lines()[i].set_color(colors[i])
    
    ax.legend()
    ax.set_ylabel("Cumulative Rewards")
    ax.set_title(dataset.upper() + ' Cumulative Rewards for Bandit Policies')
        
    print(str(fig_dir / (dataset.upper() + fig_name)))
    plt.savefig(str(fig_dir / (dataset.upper() + fig_name)), dpi = 1500)

# zozo cumulative reward plot
zozo_results = pickle.load(open('../data/results/zozo_ipw.pickle', 'rb'))
visualize_cumulative_rewards(zozo_results,
                             fig_dir = Path('../figures'),
                             fig_name = '_cumulative_rewards.png',
                             dataset = 'zozo')

# deezer cumulative reward plot
deezer_results = pickle.load(open('../data/results/deezer_results.pickle', 'rb'))
visualize_cumulative_rewards(deezer_results,
                             fig_dir = Path('../figures'),
                             fig_name = '_cumulative_rewards.png',
                             dataset = 'deezer')


../figures/ZOZO_cumulative_rewards.png


In [None]:
# plotting ZOZOTown Rolling Average
fig, ax = plt.subplots(figsize=(10,6))

# set window for rolling mean 
window = 300000
for policy, rewards in zozo_results.get('policy_feedback').items(): 
    print(policy)
    
    zozo_rewards = pd.DataFrame(zozo_results.get('policy_feedback').get(policy).get('reward'))
    rolling_rewards = zozo_rewards.rolling(window).mean()
    plt.plot(rolling_rewards, label = policy)
    
# setting colors
colors = ['#333333', '#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF']
for i in range(len(colors)):
    plt.gca().get_lines()[i].set_color(colors[i])

# labeling
ax.legend()
ax.set_xlabel("Number of Batches, Window: " + str(window))
ax.set_ylabel("Rolling Average Rewards")
ax.set_title('ZOZOTOWN Rolling Average for Bandit Policies, window: ' + str(window))
plt.savefig(str(fig_dir / 'ZOZO_Rolling_Average_Reward.png'), dpi = 1500)



# plotting Deezer Rolling Average
fig, ax = plt.subplots(figsize=(10,6))

# set window for rolling mean over batches
window = 10
for policy, rewards in deezer_results.get('policy_feedback').items(): 
    print(policy)
    batched_df = pd.DataFrame(deezer_results.get('policy_feedback').get(policy))
    rolling_rewards = batched_df.groupby('batches')['reward'].sum().rolling(window).mean()
    plt.plot(rolling_rewards, alpha = 0.8, label = policy)

# setting colors
colors = ['#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF']
for i in range(len(colors)):
    plt.gca().get_lines()[i].set_color(colors[i])

#labeling
ax.legend()
ax.set_xlabel("Number of Batches, Window: " + str(window))
ax.set_ylabel("Rolling Average Rewards")
ax.set_title('DEEZER Rolling Average for Bandit Policies')
plt.savefig(str(fig_dir / 'DEEZER_Rolling_Average_Reward.png'), dpi = 1500)

In [None]:
## ZOZOTOWN Bar Graph of Relative Policy

means = []
cis = []
policies = []

for policy, reward in zozo_results.get('reward_summary').items():
    mean = zozo_results.get('reward_summary').get(policy).get('mean')
    ci = (zozo_results.get('reward_summary').get(policy).get('95.0% CI (lower)'), zozo_results.get('reward_summary').get(policy).get('95.0% CI (upper)'))
    
    policies.append(policy)
    cis.append(ci)
    means.append(mean)

# Making Relative to Random, creating CIs
baseline = zozo_results.get('reward_summary').get('rand').get('mean')
cis = np.array(cis) / baseline
means = [means[i]/baseline for i in range(0,len(means))]
y_r = [means[i] - cis[i][1] for i in range(len(cis))]

# labeling
fig, ax = plt.subplots(figsize=(10,6))
plt.bar(range(len(means)), means, yerr=y_r, alpha=0.8, align='center', color=['#333333', '#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF'])
plt.xticks(range(len(means)), [policy for policy in policies])
plt.ylabel(f"Relative Policy Value (± {np.int(100*(1 - 0.05))}% CI)")
plt.title('ZOZOTOWN Relative Policy Values')
plt.savefig(str(fig_dir / 'ZOZO_Relative_Policy_Value.png'), dpi = 1500)


## DEEZER Bar Graph of Relative Policy
means = []
cis = []
policies = []

for policy, reward in deezer_results.get('reward_summary').items():
    mean = deezer_results.get('reward_summary').get(policy).get('mean')
    ci = (deezer_results.get('reward_summary').get(policy).get('95.0% CI (lower)'), deezer_results.get('reward_summary').get(policy).get('95.0% CI (upper)'))
    
    policies.append(policy)
    cis.append(ci)
    means.append(mean)

# Making Relative to Random, creating CIs
baseline = deezer_results.get('reward_summary').get('rand').get('mean')
cis = np.array(cis) / baseline
means = [means[i]/baseline for i in range(0,len(means))]
y_r = [means[i] - cis[i][1] for i in range(len(cis))]

# labeling
fig, ax = plt.subplots(figsize=(10,6))
plt.bar(range(len(means)), means, yerr=y_r, alpha = 0.8, align='center', color=['#db5f57', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF'])
plt.xticks(range(len(means)), [policy for policy in policies])
plt.ylabel(f"Relative Policy Value (± {np.int(100*(1 - 0.05))}% CI)")
plt.title('DEEZER Relative Policy Value')
plt.savefig(str(fig_dir / 'DEEZER_Relative_Policy_Value.png'), dpi = 1500)

In [None]:
# window = 1000
# n_rounds = np.linspace(0, len(zozo_results.get('policy_feedback').get('logged').get('reward')), num=window, dtype = 'int')
# # plotting ZOZOTown Rolling Average
# fig, ax = plt.subplots(figsize=(10,6))

# for policy, rewards in zozo_results.get('policy_feedback').items(): 
#     print(policy)
#     rewards = []
#     ind = 1
#     for n_round in n_rounds[:-1]:
#         rewards.append(zozo_results.get('policy_feedback').get(policy).get('reward')[:n_rounds[ind]].mean())
#         ind += 1

#     plt.plot(rewards, label = policy)

# # setting colors
# colors=['#333333', '#57db5f', '#57d3db', '#5f57db', '#FF6FFF']
# for i in range(len(colors)):
#     plt.gca().get_lines()[i].set_color(colors[i])
    
# ax.legend(loc='upper right')
# ax.set_xlabel("Window Number")
# ax.set_ylabel("Rolling Average Rewards")
# ax.set_title('ZOZOTOWN Rolling Average for Bandit Policies')
# plt.xticks()