In [110]:
import math
import pandas as pd
import random as rd

from tqdm.notebook import tqdm

In [2]:
class Agent:
    def __init__(self, id, role: str, price: int):
        self.id = id
        self.role: str = role
        self.price: int = price
        self.utility: int = 0 # price if role == 'buyer' else -price
            
    def sell(self, price: int):
        assert self.role == 'seller', f"Agent {self.id} is a buyer but was trying to sell!"
        self.utility += price - self.price
        self.role = 'buyer'
        self.price = price
            
    def buy(self, price: int):
        assert self.role == 'buyer', f"Agent {self.id} is a seller but was trying to buy!"
        self.utility += self.price - price
        self.role = 'seller'
        self.price = price

In [3]:
NB_AGENTS = 100

NB_SELLERS = math.floor(rd.random() * NB_AGENTS)
NB_SELLERS = min(NB_AGENTS - 1, max(1, NB_SELLERS))   # To ensure there are at least one seller/buyer
NB_BUYERS = NB_AGENTS - NB_SELLERS

NB_SELLERS, NB_BUYERS

(6, 94)

In [9]:
sellers, buyers = None, None
utility_hist, price_hist = None, None

In [14]:
def reset_agents():
    global sellers, buyers
    
    sellers = [ 
        Agent(
            id=i,
            role='seller', 
            price=rd.randint(1, 100)
        )
        for i in range(NB_SELLERS) 
    ]
    buyers = [ 
        Agent(
            id=i + NB_SELLERS,
            role='buyer', 
            price=rd.randint(1, 100)
        )
        for i in range(NB_BUYERS) 
    ]

## Simple maximization

In [5]:
def write_history():
    global utility_hist, price_hist
    utility, price = {}, {}
    
    for agent in sellers + buyers:
        utility[agent.id] = agent.utility
        price[agent.id] = agent.price
        
    utility_hist = utility_hist.append(utility, ignore_index=True)
    price_hist = price_hist.append(price, ignore_index=True)
    

def reset_history():
    global utility_hist, price_hist
    utility_hist = pd.DataFrame(columns=range(NB_AGENTS))
    price_hist = pd.DataFrame(columns=range(NB_AGENTS))
    write_history()
    
    
def display_history():
    global utility_hist, price_hist
    utility_hist[~(utility_hist == utility_hist.iloc[0]).all()].plot()
    price_hist[~(price_hist == price_hist.iloc[0]).all()].plot()

In [6]:
def play_round(sellers: list, buyers: list, Verbose: bool = True):
    
    # Buyers go to cheapest sellers first
    sellers = sorted(sellers, key=lambda seller: seller.price)
    buyers = rd.sample(buyers, k=len(buyers))
    sellers_new, buyers_new = [], []

    for i, buyer in enumerate(buyers):   # Try other ways to prioritize   
        if Verbose:
            print(f"Agent {buyer.id} buying at price {buyer.price}")
            
        seller = sellers[0]
        if seller.price > buyer.price:
            if Verbose:
                print("No deal")
            continue
            
        price = rd.randint(seller.price, buyer.price)
        if Verbose:
            print(f"Agent {seller.id} selling at price {seller.price} accepted to sell at price {price}")
        seller.sell(price), buyer.buy(price)
        
        sellers_new.append(i)
        buyers_new.append(sellers.pop(0))
        
    sellers += [ buyers.pop(i) for i in reversed(sellers_new) ]
    buyers += buyers_new
    return sellers, buyers

In [11]:
reset_agents()
reset_history()

for i in range(10):
    sellers, buyers = play_round(sellers, buyers, Verbose=False)
    write_history()
    
display_history()

IndexError: list index out of range

## Reinforcement Learning

In [119]:
sellers, buyers = None, None

In [120]:
NB_SELLERS, NB_BUYERS = 5, 5

ALPHA_SELLER, ALPHA_BUYER = 0.5, 0.5
GAMMA_SELLER, GAMMA_BUYER = 0.2, 0.2

In [141]:
class Seller:
    def __init__(self, id, 
                 price_min: int, 
                 alpha: float = ALPHA_SELLER, 
                 gamma: float = GAMMA_SELLER, 
                 stochastic: bool = False):
        self.id = id
        self.alpha, self.gamma = alpha, gamma
        
        self.price_min = price_min
        if stochastic:
            price_min = rd.gauss(price_min, (PRICE_MAX - PRICE_MIN) / 100)
            self.price_min = max(PRICE_MIN, floor(price_min))
        self.price = rd.randint(self.price_min, PRICE_MAX)
        
        self.sold: bool = False
        self.profit = 0
        
        self.q_table: pd.Series = pd.Series(
            index=range(PRICE_MIN, PRICE_MAX+1), 
            dtype=int
        ).fillna(0)
        self.history: list = []
            
            
    def get_price(self):
        return self.price
            
    def get_history(self):
        return self.history
        
    
    
    def sell(self):
        self.sold = True
            
    def step(self):
        self.q_table[self.price] *= 1 - self.alpha
        reward = - self.price_min 
        if self.sold:
            reward += self.price
            self.history.append(self.price)
        else:
            self.history.append(None)
        self.q_table[self.price] += self.alpha * reward   # + self.alpha * self.gamma * self.q_table.max()
        
        self.profit += reward
        self.price = rd.choices(list(self.q_table.index), weights=self.q_table.values)
        self.sold = False

In [142]:
class Buyer:
    def __init__(self, id, 
                 budget: int, 
                 alpha: float = ALPHA_BUYER, 
                 gamma: float = GAMMA_BUYER, 
                 stochastic: bool = False):
        self.id = id
        self.alpha, self.gamma = alpha, gamma
        
        self.budget = budget
        if stochastic:
            budget = rd.gauss(budget, (PRICE_MAX - PRICE_MIN) / 100)
            self.budget = min(PRICE_MAX, floor(budget))
        
        self.buy_price: int = -1
        self.surplus = 0
        
        self.q_table: pd.DataFrame = pd.DataFrame(
            index=range(PRICE_MIN, PRICE_MAX+1), 
            columns=['take', 'leave'], 
            dtype=int
        ).fillna(0)
        
    
    def buy(self, price: int) -> bool:
        if self.q_table.loc[price, 'take'] >= self.q_table.loc[price, 'leave']:
            self.buy_price = price
            return True
        return False
            
    def step(self):
        if self.buy_price:
            self.q_table.loc[self.buy_price, 'take'] *= 1 - ALPHA
            reward = self.budget - self.buy_price 
            self.q_table.loc[self.buy_price, 'take'] += self.alpha * reward   # + self.alpha * self.gamma * self.q_table.max()
            self.surplus += reward
        self.bought = -1

In [143]:
def reset_agents():
    global sellers, buyers
    
    sellers = [ 
        Seller(
            id=i,
            price_min=30
        )
        for i in range(NB_SELLERS) 
    ]
    buyers = [ 
        Buyer(
            id=i,
            budget=70
        )
        for i in range(NB_BUYERS) 
    ]

In [144]:
reset_agents()

In [145]:
def play_round(sellers: list, buyers: list, Verbose: bool = True):
    
    # Buyers go to cheapest sellers first
    sellers = rd.sample(sellers, k=len(sellers))
    buyers = rd.sample(buyers, k=len(buyers))

    for seller, buyer in zip(sellers, buyers): 
        seller = sellers[0]            
        price = seller.get_price()
        if Verbose:
            print(f"Seller {seller.id} selling at price {price} to Buyer {buyer.id}")
            
        bought = buyer.buy(price)
        if bought:
            seller.sell()
            if Verbose:
                print(f"Buyer {buyer.id} accepted to buy!")
            
        seller.step(), buyer.step()
        
    return sellers, buyers

In [146]:
def display_history(sellers: list):
    history = pd.DataFrame({
        seller.id: seller.get_history()
        for seller in sellers
    })
    history.plot()

In [147]:
for _ in tqdm(range(10000)):
    sellers, buyers = play_round(sellers, buyers, Verbose=False)
    
display_history(sellers)

  0%|          | 0/10000 [00:00<?, ?it/s]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [130]:
sellers[2].history

[47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,
 47,


In [None]:
display_history(sellers)