In [1]:
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings("ignore")

import sys, os
sys.path.append('/home/jovyan/work/')

from biddingModule.agents import UniformRandomAgent, GymRLAgent
from biddingModule.info_settings import OfferInformationSetting
from biddingModule.engine import MarketEngine
from biddingModule.modeDTO import Mode,Strategy

from tqdm.notebook import tqdm

from stable_baselines import A2C, DQN, PPO2
from stable_baselines.common.policies import *


In [2]:
def get_reward(agent, deals, trade_quantity):
    if not agent.name in deals:
        return [0,0,0,0]
    deal_price = deals[agent.name]
    quantity_got = trade_quantity[agent.name]
    if(deal_price==0): reward=0
    sign = -1 if agent.role == 'buyer' else 1
    if(deal_price!=0): 
        reward = (sign*(deal_price-agent.reservation_price))*quantity_got
    return [reward,deal_price,agent.reservation_price,quantity_got]

In [3]:
def play_games(agents, setting, n_games=100, max_steps=30):
    buyer_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'seller'
    ]
    ids = set(buyer_ids+ seller_ids)
    # ids_info=set(buyer_ids_deal + seller_ids_deal+ buyer_ids_resev + seller_ids_resev)
    market = MarketEngine(buyer_ids, seller_ids,strategy=setting.strategy,max_steps=max_steps)
    
    rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    wins = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    # rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids.union(ids_info), dtype=float)
    for game_idx,i in zip(range(n_games),tqdm(range(n_games))):
        while market.done != ids:
            observations = setting.get_states(ids, market)
            unmatched_agents = [
                agent for agent in agents
                if agent.name not in market.done
            ]
            offers = {
                agent.name: {'price': agent.get_offer(observations[agent.name]), 'quantity': setting.getAgentQuantity(game_idx,agent.name)}
                for agent in unmatched_agents
            }
            allQWant=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='buyer'])
            allQAvailable=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='seller'])
            deals,trade_quantity = market.step(offers)
            for agent in unmatched_agents:
                reward,deal_price,reserve_price,quantity_got=get_reward(agent, deals, trade_quantity)
                rewards[agent.name][game_idx] = reward
                win = 1 if(offers[agent.name]['quantity']==quantity_got) else 0
                if(win==0):
                    if(agent.role=='buyer' and offers[agent.name]['quantity']==allQWant): win=1
                    if(agent.role=='seller' and offers[agent.name]['quantity']==allQAvailable): win=1
                wins[agent.name][game_idx]=win
                # rewards[agent.name+"_deal"][game_idx] = get_reward(agent, deals)[1]
                # rewards[agent.name+"_resev"][game_idx] = get_reward(agent, deals)[2]
        market.reset()
    return rewards.reindex(sorted(rewards.columns), axis=1),wins.reindex(sorted(rewards.columns), axis=1)

## Fixed Agent

In [286]:
fixed_agents = [
    UniformRandomAgent('seller', 1.68, name='CHAM1-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM2-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM3-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM4-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM5-PV'),
#     UniformRandomAgent('buyer', 5, name='CHAM1'),
    UniformRandomAgent('buyer', 5, name='CHAM2'),
    UniformRandomAgent('buyer', 5, name='CHAM3'),
#     UniformRandomAgent('buyer', 5, name='CHAM4'),
    UniformRandomAgent('buyer', 5, name='CHAM5')
]

## RL Agent

In [287]:
rl_agents=[
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM1-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM2-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM3-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM4-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM5-PV'),
    GymRLAgent('buyer', 5, discretization=20,name='CHAM1'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM2'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM3'),
    GymRLAgent('buyer', 5, discretization=20,name='CHAM4'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM5'),
]

In [304]:
setting = OfferInformationSetting(5,mode=Mode.TEST, strategy=Strategy.UNIKDA) #set data train/test/all
# WEIGHT_AVG

1752
929


## Model Selection

In [308]:
# model = PPO2.load("../model/PPO2/buyer3_MlpLstm_disKDA")
# model = PPO2.load("../model/PPO2/buyer3_MlpLnLstm_disKDA")
# model = DQN.load("../model/DQN/buyer3_lnMlp_disKDA")
models = [
#     DQN.load("../model/DQN/seller1_Mlp_weightedAVG"),
#     DQN.load("../model/DQN/seller2_LnMlp_weightedAVG"),
#     DQN.load("../model/DQN/seller3_LnMlp_weightedAVG"),
#     DQN.load("../model/DQN/seller4_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller5_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer1_LnMlp_uniKDA"),
#     DQN.load("../model/DQN/buyer2_LnMlp_disKDA"),
#     DQN.load("../model/DQN/buyer3_LnMlp_weightedAVG"),
    DQN.load("../model/DQN/buyer4_Mlp_uniKDA"),
#     DQN.load("../model/DQN/buyer5_LnMlp_weightedAVG"),
]
# model = DQN.load("../model/DQN/buyer_LnMlp_disKDA")
# buyer5_Mlp_weightedAVG
for rl_agent,model in zip(rl_agents,models):
    rl_agent.model = model

Loading a model without an environment, this model cannot be trained until it has a valid environment.








In [309]:
start=time.time()
df_rewards,df_wins=play_games(fixed_agents + rl_agents, setting, setting.num_round)
print((time.time()-start)/60, " mins")

HBox(children=(FloatProgress(value=0.0, max=929.0), HTML(value='')))

2.123331880569458  mins


In [310]:
df_rewards.to_csv("./results/cham4_rewards_Mlp_uniKDA.csv")
df_wins.to_csv("./results/cham4_wins_Mlp_uniKDA.csv")
# weightedAVG

In [296]:
df_rewards.describe()

Unnamed: 0,CHAM1,CHAM1-PV,CHAM2,CHAM2-PV,CHAM3,CHAM3-PV,CHAM4,CHAM4-PV,CHAM5,CHAM5-PV
count,929.0,929.0,929.0,929.0,929.0,929.0,929.0,929.0,929.0,929.0
mean,3.633885,47.775061,25.868368,41.961777,15.291582,38.911474,46.764915,15.528061,20.455074,29.733947
std,4.247429,51.664888,44.168603,45.597756,32.476812,41.64511,56.58946,16.50009,38.803225,31.989292
min,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.0,0.0,-0.0,0.0,-0.0,0.0,2.84,0.0,-0.0,0.0
50%,2.31,28.99,7.21,24.48,-0.0,24.6675,16.2,9.6,2.7,18.76
75%,6.35,84.24,23.29,73.32,12.16,66.97,83.033333,27.94,20.32,52.5
max,21.28,220.41,266.0,192.51,240.62,178.56,226.98,66.96,254.61,133.92


In [251]:
# df_rewards.plot()

Percentage win out of 929 rounds

In [12]:
df_wins.sum(axis=0).apply(lambda val: f'{round(100*val/len(df_wins),2)} %')

CHAM1       54.79 %
CHAM1-PV    71.69 %
CHAM2       47.36 %
CHAM2-PV    73.63 %
CHAM3       55.33 %
CHAM3-PV    74.17 %
CHAM4       52.42 %
CHAM4-PV    78.04 %
CHAM5       71.15 %
CHAM5-PV     73.3 %
dtype: object