In [1]:
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings("ignore")

import sys, os
sys.path.append('/home/jovyan/work/')

from biddingModule.agents import UniformRandomAgent, GymRLAgent
from biddingModule.info_settings import OfferInformationSetting
from biddingModule.engine import MarketEngine
from biddingModule.modeDTO import Mode,Strategy

from tqdm.notebook import tqdm

from stable_baselines import A2C, DQN, PPO2
from stable_baselines.common.policies import *


In [2]:
def get_reward(agent, deals, trade_quantity):
    if not agent.name in deals:
        return [0,0,0,0]
    deal_price = deals[agent.name]
    quantity_got = trade_quantity[agent.name]
    if(deal_price==0): reward=0
    sign = -1 if agent.role == 'buyer' else 1
    if(deal_price!=0): 
        reward = (sign*(deal_price-agent.reservation_price))*quantity_got
    return [reward,deal_price,agent.reservation_price,quantity_got]

In [3]:
def play_games(agents, setting, n_games=100, max_steps=30):
    buyer_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'seller'
    ]
    ids = set(buyer_ids+ seller_ids)
    # ids_info=set(buyer_ids_deal + seller_ids_deal+ buyer_ids_resev + seller_ids_resev)
    market = MarketEngine(buyer_ids, seller_ids,strategy=setting.strategy,max_steps=max_steps)
    
    rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    wins = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    # rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids.union(ids_info), dtype=float)
    for game_idx,i in zip(range(n_games),tqdm(range(n_games))):
        while market.done != ids:
            observations = setting.get_states(ids, market)
            unmatched_agents = [
                agent for agent in agents
                if agent.name not in market.done
            ]
            offers = {
                agent.name: {'price': agent.get_offer(observations[agent.name]), 'quantity': setting.getAgentQuantity(game_idx,agent.name)}
                for agent in unmatched_agents
            }
            allQWant=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='buyer'])
            allQAvailable=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='seller'])
            deals,trade_quantity = market.step(offers)
            for agent in unmatched_agents:
                reward,deal_price,reserve_price,quantity_got=get_reward(agent, deals, trade_quantity)
                rewards[agent.name][game_idx] = reward
                win = 1 if(offers[agent.name]['quantity']==quantity_got) else 0
                if(win==0):
                    if(agent.role=='buyer' and offers[agent.name]['quantity']==allQWant): win=1
                    if(agent.role=='seller' and offers[agent.name]['quantity']==allQAvailable): win=1
                wins[agent.name][game_idx]=win
                # rewards[agent.name+"_deal"][game_idx] = get_reward(agent, deals)[1]
                # rewards[agent.name+"_resev"][game_idx] = get_reward(agent, deals)[2]
        market.reset()
    return rewards.reindex(sorted(rewards.columns), axis=1),wins.reindex(sorted(rewards.columns), axis=1)

## Fixed Agent

In [4]:
fixed_agents = [
    UniformRandomAgent('seller', 1.68, name='CHAM1-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM2-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM3-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM4-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM5-PV'),
    UniformRandomAgent('buyer', 5, name='CHAM1'),
    UniformRandomAgent('buyer', 5, name='CHAM2'),
    UniformRandomAgent('buyer', 5, name='CHAM3'),
    UniformRandomAgent('buyer', 5, name='CHAM4'),
    UniformRandomAgent('buyer', 5, name='CHAM5')
]

## RL Agent

In [5]:
rl_agents=[
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM1-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM2-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM3-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM4-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM5-PV'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM1'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM2'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM3'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM4'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM5'),
]

In [21]:
setting = OfferInformationSetting(5,mode=Mode.TEST, strategy=Strategy.UNIKDA) #set data train/test/all

1752
929


## Model Selection

In [22]:
# model = PPO2.load("../model/PPO2/buyer3_MlpLstm_disKDA")
# model = PPO2.load("../model/PPO2/buyer3_MlpLnLstm_disKDA")
# model = DQN.load("../model/DQN/buyer3_lnMlp_disKDA")
models = [
#     DQN.load("../model/DQN/seller1_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller2_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller3_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller4_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller5_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer1_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer2_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer3_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer4_Mlp_disKDA"),
#     DQN.load("../model/DQN/buyer5_Mlp_weightedAVG"),
]
# model = DQN.load("../model/DQN/buyer_LnMlp_disKDA")
for rl_agent,model in zip(rl_agents,models):
    rl_agent.model = model

In [29]:
start=time.time()
df_rewards,df_wins=play_games(fixed_agents + rl_agents, setting, setting.num_round)
print((time.time()-start)/60, " mins")

HBox(children=(FloatProgress(value=0.0, max=929.0), HTML(value='')))




JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [27]:
df_rewards.describe()

NameError: name 'df_rewards' is not defined

Percentage win out of 929 rounds

In [28]:
df_wins.sum(axis=0).apply(lambda val: f'{round(100*val/len(df_wins),2)} %')

NameError: name 'df_wins' is not defined