In [1]:
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings("ignore")

import sys, os
sys.path.append('/home/jovyan/work/')

from biddingModule.agents import UniformRandomAgent, GymRLAgent
from biddingModule.info_settings import OfferInformationSetting
from biddingModule.engine import MarketEngine
from biddingModule.modeDTO import Mode,Strategy

from tqdm.notebook import tqdm

from stable_baselines import A2C, DQN, PPO2
from stable_baselines.common.policies import *


In [2]:
def get_reward(agent, deals, trade_quantity):
    if not agent.name in deals:
        return [0,0,0,0]
    deal_price = deals[agent.name]
    quantity_got = trade_quantity[agent.name]
    if(deal_price==0): reward=0
    sign = -1 if agent.role == 'buyer' else 1
    if(deal_price!=0): 
        reward = (sign*(deal_price-agent.reservation_price))*quantity_got
    return [reward,deal_price,agent.reservation_price,quantity_got]

In [3]:
def play_games(agents, setting, n_games=100, max_steps=30):
    buyer_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids =  [
        agent.name
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_deal =  [
        agent.name+"_deal"
        for agent in agents
        if agent.role == 'seller'
    ]
    buyer_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'buyer'
    ]
    seller_ids_resev =  [
        agent.name+"_resev"
        for agent in agents
        if agent.role == 'seller'
    ]
    ids = set(buyer_ids+ seller_ids)
    # ids_info=set(buyer_ids_deal + seller_ids_deal+ buyer_ids_resev + seller_ids_resev)
    market = MarketEngine(buyer_ids, seller_ids,strategy=setting.strategy,max_steps=max_steps)
    
    rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    wins = pd.DataFrame(0, index=np.arange(n_games), columns=ids, dtype=float)
    # rewards = pd.DataFrame(0, index=np.arange(n_games), columns=ids.union(ids_info), dtype=float)
    for game_idx,i in zip(range(n_games),tqdm(range(n_games))):
        while market.done != ids:
            observations = setting.get_states(ids, market)
            unmatched_agents = [
                agent for agent in agents
                if agent.name not in market.done
            ]
            offers = {
                agent.name: {'price': agent.get_offer(observations[agent.name]), 'quantity': setting.getAgentQuantity(game_idx,agent.name)}
                for agent in unmatched_agents
            }
            allQWant=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='buyer'])
            allQAvailable=sum([setting.getAgentQuantity(game_idx,agent.name) for agent in unmatched_agents if agent.role=='seller'])
            deals,trade_quantity = market.step(offers)
            for agent in unmatched_agents:
                reward,deal_price,reserve_price,quantity_got=get_reward(agent, deals, trade_quantity)
                rewards[agent.name][game_idx] = reward
                win = 1 if(offers[agent.name]['quantity']==quantity_got) else 0
                if(win==0):
                    if(agent.role=='buyer' and offers[agent.name]['quantity']==allQWant): win=1
                    if(agent.role=='seller' and offers[agent.name]['quantity']==allQAvailable): win=1
                wins[agent.name][game_idx]=win
                # rewards[agent.name+"_deal"][game_idx] = get_reward(agent, deals)[1]
                # rewards[agent.name+"_resev"][game_idx] = get_reward(agent, deals)[2]
        market.reset()
    return rewards.reindex(sorted(rewards.columns), axis=1),wins.reindex(sorted(rewards.columns), axis=1)

## Fixed Agent

In [20]:
fixed_agents = [
    UniformRandomAgent('seller', 1.68, name='CHAM1-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM2-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM3-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM4-PV'),
    UniformRandomAgent('seller', 1.68, name='CHAM5-PV'),
#     UniformRandomAgent('buyer', 5, name='CHAM1'),
    UniformRandomAgent('buyer', 5, name='CHAM2'),
    UniformRandomAgent('buyer', 5, name='CHAM3'),
    UniformRandomAgent('buyer', 5, name='CHAM4'),
    UniformRandomAgent('buyer', 5, name='CHAM5')
]

## RL Agent

In [21]:
rl_agents=[
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM1-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM2-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM3-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM4-PV'),
#     GymRLAgent('seller', 1.68, discretization=20,name='CHAM5-PV'),
    GymRLAgent('buyer', 5, discretization=20,name='CHAM1'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM2'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM3'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM4'),
#     GymRLAgent('buyer', 5, discretization=20,name='CHAM5'),
]

In [22]:
setting = OfferInformationSetting(5,mode=Mode.TEST, strategy=Strategy.UNIKDA) #set data train/test/all
# WEIGHT_AVG
# UNIKDA

1752
1752


## Model Selection

In [30]:
models = [
#     DQN.load("../model/DQN/seller1_Mlp_weightedAVG"),
#     DQN.load("../model/DQN/seller2_LnMlp_weightedAVG"),
#     DQN.load("../model/DQN/seller3_LnMlp_weightedAVG"),
#     DQN.load("../model/DQN/seller4_Mlp_disKDA"),
#     DQN.load("../model/DQN/seller5_Mlp_disKDA"),
    DQN.load("../model/DQN/buyer1_Mlp_uniKDA"),
#     DQN.load("../model/DQN/buyer2_LnMlp_disKDA"),
#     DQN.load("../model/DQN/buyer3_LnMlp_weightedAVG"),
#     DQN.load("../model/DQN/buyer4_Mlp_uniKDA"),
#     DQN.load("../model/DQN/buyer5_LnMlp_weightedAVG"),
]
# model = DQN.load("../model/DQN/buyer_LnMlp_disKDA")
# buyer5_Mlp_weightedAVG
for rl_agent,model in zip(rl_agents,models):
    rl_agent.model = model

Loading a model without an environment, this model cannot be trained until it has a valid environment.








In [31]:
start=time.time()
df_rewards,df_wins=play_games(fixed_agents + rl_agents, setting, setting.num_round)
print((time.time()-start)/60, " mins")

HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.627977975209554  mins


In [32]:
# df_rewards.to_csv("./nonDropResults/random_rewards_weightedAVG.csv")
# df_wins.to_csv("./nonDropResults/random_wins_weightedAVG.csv")
df_rewards.to_csv("./nonDropResults/cham1_rewards_Mlp_uniKDA.csv")
df_wins.to_csv("./nonDropResults/cham1_wins_Mlp_uniKDA.csv")
# weightedAVG
# uniKDA

=========

In [72]:
strategies=[Strategy.UNIKDA,Strategy.DISKDA,Strategy.WEIGHT_AVG]
strategy_names=['uniKDA','disKDA','weightedAVG']
model_names=['Mlp','LnMlp']
players=['CHAM1-PV','CHAM2-PV','CHAM3-PV','CHAM4-PV','CHAM5-PV','CHAM1','CHAM2','CHAM3','CHAM4','CHAM5']

for agent in players:
    fixed_agents=[]
    for player_name in players:
        if(player_name==agent): continue
        if('PV' in player_name):
            fixed_agents.append(UniformRandomAgent('seller', 1.68, name=player_name))
        else:
            fixed_agents.append(UniformRandomAgent('buyer', 5, name=player_name))
    if 'PV' in agent:
        agent_type_name='seller'+agent[4]
        rl_agents=[GymRLAgent('seller', 1.68, discretization=20,name=agent)]
    else:
        agent_type_name='buyer'+agent[4]
        rl_agents=[GymRLAgent('buyer', 5, discretization=20,name=agent)]
    for strat, strat_name in zip(strategies,strategy_names):
        setting = OfferInformationSetting(5,mode=Mode.TEST, strategy=strat)
        for model_name in model_names:
            modelDir=f'../model/DQN/{agent_type_name}_{model_name}_{strat_name}'
            models = [DQN.load(modelDir)]
            for rl_agent,model in zip(rl_agents,models):
                rl_agent.model = model
            start=time.time()
            df_rewards,df_wins=play_games(fixed_agents + rl_agents, setting, setting.num_round)
            print((time.time()-start)/60, " mins")
            agent_name=agent.lower().replace('-','')
            df_rewards.to_csv(f'./nonDropResults/{agent_name}_rewards_{model_name}_{strat_name}.csv')
            df_wins.to_csv(f'./nonDropResults/{agent_name}_wins_{model_name}_{strat_name}.csv')

1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.014400283495585  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.6799484213193259  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7006513079007466  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.9455047806104024  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.0089826941490174  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.9299710988998413  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.8030086000760397  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.4875346024831138  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8568119327227275  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.327967592080434  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8925067464510599  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7168229818344116  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8623993237813314  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.081853703657786  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.0537018616994223  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.9988842725753784  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.0786526958147684  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7095828930536905  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.5202630281448364  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.5588409860928854  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3523747126261394  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3497364521026611  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4030261953671774  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4067360122998556  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.5845348238945007  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.6116936564445496  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.022176976998647  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8676884730656942  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.111778140068054  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


2.7729430238405866  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


5.031181267897288  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


3.626310336589813  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7106245279312133  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8165690819422404  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4416388750076294  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.358291514714559  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.606143828233083  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.5660301605860392  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.2990876674652099  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3224697788556417  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.2328731258710226  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3483767787615457  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4046824375788372  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.701935609181722  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.2473087469736734  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3173202196757  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.2816653490066527  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.2982049942016602  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.8619852582613627  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.6993513067563375  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4355140248934428  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7712528864542643  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.507627781232198  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.372874395052592  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.856693967183431  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7566919604937234  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.3416179895401001  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.4232701500256857  mins
1752
1752
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


1.7698957165082296  mins
Loading a model without an environment, this model cannot be trained until it has a valid environment.








HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))


4.177862326304118  mins


In [26]:
df_rewards.describe()

Unnamed: 0,CHAM1,CHAM1-PV,CHAM2,CHAM2-PV,CHAM3,CHAM3-PV,CHAM4,CHAM4-PV,CHAM5,CHAM5-PV
count,1752.0,1752.0,1752.0,1752.0,1752.0,1752.0,1752.0,1752.0,1752.0,1752.0
mean,2.913799,21.655402,19.995028,19.109598,15.724042,17.952407,14.95944,6.850195,18.092246,13.558397
std,4.140188,38.580845,47.126917,33.527729,42.145652,31.287874,40.105599,12.078969,47.210617,23.797111
min,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0
25%,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0
50%,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0
75%,5.08875,28.125,10.0125,26.1465,2.46,25.4375,5.9325,9.37,8.23,18.6525
max,20.97,228.78,287.0,200.88,280.72,186.93,282.24,69.75,308.2,139.5


In [27]:
# df_rewards.plot()

Percentage win out of 929 rounds

In [12]:
df_wins.sum(axis=0).apply(lambda val: f'{round(100*val/len(df_wins),2)} %')

CHAM1       54.79 %
CHAM1-PV    71.69 %
CHAM2       47.36 %
CHAM2-PV    73.63 %
CHAM3       55.33 %
CHAM3-PV    74.17 %
CHAM4       52.42 %
CHAM4-PV    78.04 %
CHAM5       71.15 %
CHAM5-PV     73.3 %
dtype: object