In [9]:
from stable_baselines3 import PPO
import numpy as np
import sys
sys.path.append("..")
from simulation import simulate_policy
from evaluation.evaluator import Evaluator
from tqdm import trange

In [10]:
model = PPO.load("../../src/results/02_0-10_duopoly/model.zip")

In [11]:
import config

config.customers_types = ['recurring']
config.customer_mix = [1]
config.competitor = True


In [12]:
## Agent
# Per Customer Type
n_buys = {customer_type: [] for customer_type in config.customers_types}
reward = {customer_type: [] for customer_type in config.customers_types}
sales_price = {customer_type: [] for customer_type in config.customers_types}
# Total
offer_price = []
total_sales_price = []
total_reward = []
total_buys = []
## Competitor
# Per Customer Type
comp_n_buys = {customer_type: [] for customer_type in config.customers_types}
comp_reward = {customer_type: [] for customer_type in config.customers_types}
comp_sales_price = {customer_type: [] for customer_type in config.customers_types}
# Total
comp_offer_price = []
comp_total_sales_price = []
comp_total_reward = []
comp_total_buys = []

In [13]:
for _ in trange(config.n_eval_episodes):
    infos = simulate_policy(model)
    infos = {key: value[config.episode_length // 2:] for key, value in infos.items()}

    infos = Evaluator().add_concatenated_infos(infos)

    i_reward = 0
    i_n_buys = 0

    comp_i_reward = 0
    comp_i_n_buys = 0

    for customer_type in config.customers_types:
        n_buys[customer_type].append(np.sum(infos[f'n_{customer_type}_buy']))
        reward[customer_type].append(np.sum(infos[f'{customer_type}_reward']))
        sales_price[customer_type].append(reward[customer_type][-1] / n_buys[customer_type][-1])

        i_reward += reward[customer_type][-1]
        i_n_buys += n_buys[customer_type][-1]

        if config.competitor:
            comp_n_buys[customer_type].append(np.sum(infos[f'n_{customer_type}_competitor_buy']))
            comp_reward[customer_type].append(np.sum(infos[f'{customer_type}_competitor_reward']))
            comp_sales_price[customer_type].append(comp_reward[customer_type][-1] / comp_n_buys[customer_type][-1])

            comp_i_reward += comp_reward[customer_type][-1]
            comp_i_n_buys += comp_n_buys[customer_type][-1]

    
    offer_price.append(np.mean(infos['agent_offer_price']))
    total_reward.append(i_reward)
    total_buys.append(i_n_buys)
    total_sales_price.append(i_reward / i_n_buys)

    if config.competitor:
        comp_offer_price.append(np.mean(infos['competitor_offer_price']))
        comp_total_reward.append(comp_i_reward)
        comp_total_buys.append(comp_i_n_buys)
        comp_total_sales_price.append(comp_i_reward / comp_i_n_buys)


100%|██████████| 1000/1000 [00:20<00:00, 49.78it/s]


In [14]:
print(' --- Agent ---')
for customer_type in config.customers_types:
    print(f'{customer_type}:')
    print(f'\tAverage Sales Price: {np.mean(sales_price[customer_type])}')
    print(f'\tn Buys: {np.mean(n_buys[customer_type])}')
    print(f'\tReward: {np.mean(reward[customer_type])}')
    print()
print(f'Average Offer Price: {np.mean(offer_price)}')
#print(f'Average Sales Price: {np.mean(total_sales_price)}')
#print(f'Average Total Buys: {np.mean(total_buys)}')
#print(f'Average Total Reward: {np.mean(total_reward)}')

 --- Agent ---
recurring:
	Average Sales Price: 5.982901996501499
	n Buys: 883.359
	Reward: 5285.055039756775

Average Offer Price: 7.009186186109271


In [15]:
if config.competitor:
    print(' --- Competitor ---')
    for customer_type in config.customers_types:
        print(f'{customer_type}:')
        print(f'\tAverage Sales Price: {np.mean(comp_sales_price[customer_type])}')
        print(f'\tn Buys: {np.mean(comp_n_buys[customer_type])}')
        print(f'\tReward: {np.mean(comp_reward[customer_type])}')
        print()
    print(f'Average Offer Price: {np.mean(comp_offer_price)}')
#    print(f'Average Sales Price: {np.mean(comp_total_sales_price)}')
#    print(f'Average Total Buys: {np.mean(comp_total_buys)}')
#    print(f'Average Total Reward: {np.mean(comp_total_reward)}')

 --- Competitor ---
recurring:
	Average Sales Price: 4.880912594693501
	n Buys: 791.646
	Reward: 3863.966268081665

Average Offer Price: 6.005005025863649


In [16]:
if config.competitor:
    print(' --- Total ---')
    for customer_type in config.customers_types:
        print(f'{customer_type}:')
        total_n_buys = np.mean(np.add(n_buys[customer_type], comp_n_buys[customer_type]))
        total_reward = np.mean(np.add(reward[customer_type], comp_reward[customer_type]))
        print(f'\tAverage Sales Price: {total_reward / total_n_buys}')
        print(f'\tn Buys: {total_n_buys}')
        print(f'\tReward: {total_reward}')
        print()
#    print(f'Average Offer Price: {np.mean((offer_price + comp_offer_price)}')
#    total_buys = np.mean(np.add(total_buys, comp_total_buys))
#    total_reward = np.mean(np.add(total_reward, comp_total_reward))
#    print(f'Average Sales Price: {total_reward / total_buys}')
#    print(f'Average Total Buys: {total_buys}')
#    print(f'Average Total Reward: {total_reward}')

 --- Total ---
recurring:
	Average Sales Price: 5.462085968602147
	n Buys: 1675.005
	Reward: 9149.02130783844

