In [None]:
import json
import numpy as np
import pandas as pd

In [None]:
class Seller:
    
    def __init__(self, min_value, max_value, alpha_s1, alpha_s2):
     
        self.min_value = min_value
        self.max_value = max_value
        self.alpha_s1 = alpha_s1
        self.alpha_s2 = alpha_s2
    
    
    def seller_strategy(self):
        
        theta_s = np.random.uniform(self.min_value, self.max_value)

        s1 = min(self.alpha_s1 * theta_s, 1.0)
        s2 = min(self.alpha_s2 * theta_s, 1.0)

        s = [min(s1, s2), max(s1, s2)]
        # print("asks_list", s)

        return theta_s, s

In [None]:
class Buyer:
    
    def __init__(self, min_value, max_value ,initial_req_quantity=2):
        
        self.min_value = min_value
        self.max_value = max_value
        self.replay_buffer = dict()
        self.replay_buffer_index = 1
        self.initial_req_quantity = initial_req_quantity
        
    
    def buyer_strategy(self):

        theta_b = np.random.uniform(self.min_value, self.max_value)
        current_state = [self.initial_req_quantity, theta_b]
        
        actions = np.random.uniform(0, 1, 2).tolist()
        # rand = np.random.uniform(0, 1)
        # actions = [rand, rand]
        
        actions.sort(reverse=True)
        
        self.replay_buffer.update({self.replay_buffer_index: {"state": current_state, "action": actions, "reward": None, "next_state": None}})

        b = (theta_b*np.array(actions))
        # print("bids_list", b)

        return theta_b, b
    
    
    def update_reward_and_next_state(self, clearing_price, clearing_quantity, theta_b):
        
        reward = 0.0
        next_state = self.initial_req_quantity
        
        if (clearing_quantity != 0):
            reward = -(clearing_price*clearing_quantity)
            next_state = self.initial_req_quantity - clearing_quantity
        
        partial_dict = self.replay_buffer.get(self.replay_buffer_index)
        partial_dict["reward"] = reward
        partial_dict["next_state"] = [next_state, theta_b]
        
        exp = list()
        exp.extend(partial_dict["state"])
        exp.extend(partial_dict["action"])
        exp.append(partial_dict["reward"])
        exp.extend(partial_dict["next_state"])
        
        self.replay_buffer.update({self.replay_buffer_index: exp})
        self.replay_buffer_index += 1
        
        
    def print_replay_buffer(self):
        print(self.replay_buffer)  
        
    
    def save_replay_buffer(self):
        data = pd.DataFrame.from_dict(self.replay_buffer.values())
        data.to_csv("replay_buffer.csv", header=None, index=None)       

In [None]:
def auction_clearing(bids_list, asks_list):
            
    bids = list(bids_list)
    asks = list(asks_list)

    clearing_price = -1.0;
    clearing_quantity = 0;

    asks.sort()
    bids.sort(reverse=True)

    print("asks", asks)
    print("bids", bids)

    if(bids[1] >= asks[1]):
        clearing_price = (bids[1] + asks[1]) / 2;
        clearing_quantity = 2;
    elif(bids[0] >= asks[0]):
        clearing_price = (bids[0] + asks[0]) / 2;        
        clearing_quantity = 1;

    print("cp", clearing_price)
    print("cq", clearing_quantity)
    
    return clearing_price, clearing_quantity

In [None]:
# configurable parameters

seller_min_value = 0
seller_max_value = 1

buyer_min_value = 0
buyer_max_value = 1

# keeping the seller's scaling factors for biding strategy fixed (equilibrium strategy for the seller)
alpha_s2 = 1.2207
alpha_s1 = 1.10806

# Below are buyer's scaling factors when seller is following above equilibrium strategy 
# our wholesale strategy should learn this numbers
# alpha_b1 = 0.66667
# alpha_b2 = 0.66667

In [None]:
def single_shot_auction_simulator(number_of_episodes):
    
    seller = Seller(seller_min_value, seller_max_value, alpha_s1, alpha_s2)
    buyer = Buyer(buyer_min_value, buyer_max_value)
    
    while (number_of_episodes > 0):

        theta_s, s = seller.seller_strategy()
        theta_b, b = buyer.buyer_strategy()

        cp, cq = auction_clearing(b, s)

        buyer.update_reward_and_next_state(cp, cq, theta_b)
        
        number_of_episodes -= 1
        
    buyer.save_replay_buffer()

In [None]:
single_shot_auction_simulator(100000)