In [24]:
import numpy as np
from datetime import datetime, timedelta

# Generating an array of 2000 random integers between 50 and 500 to represent the price range of each product.
price_range = np.random.randint(50, 500, size=2000)

# Creating a list of 10 customer groups and generating an array of 10 random integers between 50 and 500 
#to represent the minimum price each group is willing to pay for a product.(copcar)
customer_groups = ['Group 1', 'Group 2', 'Group 3', 'Group 4', 'Group 5', 'Group 6', 'Group 7', 'Group 8', 'Group 9', 'Group 10']
min_prices = np.random.randint(50, 500, size=10)

# Creating a state space, which is a list of tuples representing all possible states of the system. 
#Each tuple contains a customer group and a product index. 
#The action space is a list of integers from 0 to 1999, representing the index of each product.
state_space = [(customer, product) for customer in customer_groups for product in range(2000)]
action_space = range(2000)

# initialize the Q-values for all state-action pairs to zero
q_values = np.zeros((len(state_space), len(action_space)))

# Defining the epsilon-greedy policy for selecting an action in a given state. 
#The policy chooses a random action with probability epsilon, or chooses the action with the highest Q-value for the given state with probability 1-epsilon.
def epsilon_greedy(state, q_values, epsilon=0.1):
    if np.random.uniform(0, 1) < epsilon:
        action = np.random.choice(action_space)
    else:
        state_idx = state_space.index(state)
        action = np.argmax(q_values[state_idx])
    return action

# Defining a function to simulate a customer's purchase behavior for a given product and minimum price. 
#The function returns the reward (i.e. the price of the product if purchased) 
#and a boolean indicating whether the customer bought the product.
def simulate_customer(customer, product, min_price):
    # simulate if the customer buys or not
    buy_prob = max(0, (min_price - price_range[product]) / min_price)
    if np.random.uniform(0, 1) < buy_prob:
        reward = price_range[product]
        buy = True
    else:
        reward = 0
        buy = False
    return reward, buy

# Defining a function to obtain input from a human to determine the minimum price that a customer is willing to pay for a product. 
#The function displays the current price of the product and the customer's minimum price for products (i.e. copcar) in the console and waits for the human's input.
#If the human enters '
def get_human_input(state, min_price):
    while True:
        human_response = input(f"Is ksh.{price_range[state[1]]} an acceptable price for {state[0]} with copcar {min_prices[int(state[0][-2:])]} (y/n)? ")
        if human_response == 'y':
            return price_range[state[1]], True
        elif human_response == 'n':
            return min_price + 10, False
        else:
            print("Invalid input, please enter 'y' or 'n'.")

# run the simulation with human input
# get the start time of the simulation
start_time = datetime.now()

# set a time limit for the simulation
time_limit = timedelta(minutes=30)

# loop through each customer and product
for i in range(10):
    # get the current customer group and their minimum price
    customer = customer_groups[i]
    min_price = min_prices[i]
    
    for j in range(2000):
        # get the current product
        product = j
        
        # check if time limit has been reached
        if datetime.now() - start_time > time_limit:
            # if time limit has been reached, stop the simulation for the current customer and move on to the next
            print("Time limit reached, moving to next customer.")
            break
        
        # simulate the customer and get the reward
        # determine whether the customer buys the product, and the reward associated with that
        reward, buy = simulate_customer(customer, product, min_price)

        # update the state and Q-value for the chosen action
        # determine the current state, and choose an action based on the epsilon-greedy policy
        state = (customer, product)
        action = epsilon_greedy(state, q_values)
        q_values[state_space.index(state), action] += reward
        
        # get human input if the customer does not buy and update the Q-value
        # if the customer does not buy the product, prompt the human for input on whether the product is priced too high
        if not buy:
            price, accepted = get_human_input(state, min_price)
            if accepted:
                # if the human accepts the price, add it to the Q-value for the chosen action
                q_values[state_space.index(state), action] += price
            else:
                # if the human does not accept the price, subtract 10 from the Q-value for the chosen action
                q_values[state_space.index(state), action] -= 10
        
        # print the product price, reward, and whether the customer buys
        # output the product price, reward and whether the customer buys or not
        print(f"Product price: ${price_range[product]}, Reward: ${reward}, Buy: {buy}")



Product price: $104, Reward: $104, Buy: True


Is ksh.244 an acceptable price for Group 1 with copcar 335 (y/n)?  n


Product price: $244, Reward: $0, Buy: False


Is ksh.110 an acceptable price for Group 1 with copcar 335 (y/n)?  n


Product price: $110, Reward: $0, Buy: False


Is ksh.425 an acceptable price for Group 1 with copcar 335 (y/n)?  y


Product price: $425, Reward: $0, Buy: False


KeyboardInterrupt: Interrupted by user

In [14]:
min_prices

array([ 66, 210, 223,  64, 206,  85, 210, 108, 268, 288])

In [19]:
min_prices[int(state[0][-2:])]

210

In [22]:
product

2

In [23]:
price_range

array([120, 496, 180, ...,  82, 255, 306])