In [1]:
# -*- coding: utf-8 -*-
"""
@Created at 2022/7/3 13:07
@Author: Kurt
@file:dual.py
@Desc:
"""
from collections import Counter
import numpy as np
import logging
import ray
import matplotlib.pyplot as plt

logging.basicConfig()
logger = logging.getLogger("uniform")
logger.setLevel(logging.DEBUG)

EPSILON = 0.000001

In [14]:
def myround(num):
    num = num if abs(num) > EPSILON else 0
    return num


def get_return_probability(m,pon):
    return m*pon
    

def utility_tie_online(loc, c, con, m, pon, poff):
    """
    In this function, the tie is broken by assuming consumers buy online directly
    :param loc: value of theta
    """
    u_o = 1 / 2 * (loc - pon) - 1 / 2 *  (1-get_return_probability(m=m, pon=pon)) * pon - con
    u_s = 1 / 2 * (loc - poff) - c
    if myround(u_o - u_s) >= 0:
        if myround(u_o) >= 0:
            return "o"
        else:
            return "l"
    else:
        if myround(u_s) >= 0:
            return "s"
        else:
            return "l"


def utility_tie_offline(loc, c, con, m, pon, poff):
    """
    In this function, the tie is broken by assuming consumers visit the store
    :param loc: value of theta
    """
    u_o = 1 / 2 * (loc - pon) - 1 / 2 *  (1-get_return_probability(m=m, pon=pon)) * pon - con
    u_s = 1 / 2 * (loc - poff) - c
    if myround(u_o - u_s) > 0:
        if myround(u_o) >= 0:
            return "o"
        else:
            return "l"
    else:
        if myround(u_s) >= 0:
            return "s"
        else:
            return "l"


def get_demand(behaviors):
    total = len(behaviors)
    count = Counter(behaviors)
    alpha_o = count['o'] / total
    alpha_s = count['s'] / total
    alpha_l = count['l'] / total
    assert myround(alpha_o + alpha_s + alpha_l - 1) == 0

    return alpha_o, alpha_s


def simulate_behavior(consumers, c, con, m, pon, poff):
    # if consumers are indifferent between buying online directly and visiting the store,
    # we break the tie by maximizing the retailer's profit
    if myround(c - 1/2*con - 1/2*(1-get_return_probability(m=m, pon=pon))*pon) == 0:
        behaviors_tie_online = [utility_tie_online(loc=consumer, c=c, con=con,
                                                   m=m, pon=pon, poff=poff) for consumer in consumers]
        behaviors_tie_offline = [utility_tie_offline(loc=consumer, c=c, con=con,
                                                     m=m, pon=pon, poff=poff) for consumer in consumers]

        return behaviors_tie_online, behaviors_tie_offline
    else:
        # if there is no tie, utility_tie_online and utility_tie_offline are equivalent.
        behaviors = [utility_tie_online(loc=consumer, c=c, con=con, m=m, pon=pon, poff=poff) for consumer in consumers]

        return behaviors


def cal_profit(m, pon, poff, cr, behaviors):
    alpha_o, alpha_s = get_demand(behaviors)
    logger.debug("current demand: alpha_o {:.3f}, alpha_s {:.3f}, return probability:{:.3f}".format(
        alpha_o, alpha_s, m * pon))
    online_profit = alpha_o * (1 / 2 * pon + 1 / 2 * (
        (1 - get_return_probability(m=m, pon=pon)) * pon - get_return_probability(m=m, pon=pon) * cr))  # w.p. 1/2, b=b_H.
    store_profit = alpha_s * 1 / 2 * poff  # w.p. 1/2, b=b_H
    logger.debug("current demand: online_profit {:.5f}, store_profit {:.5f}".format(online_profit, store_profit))
    profit = 1 / 2 * store_profit + 1 / 2 * online_profit  # w.p. 1/2, a=a_H
    return profit

In [15]:

class dual:
    def __init__(self, c, con, cr, return_prop, step=0.001, density=0.001):
        self.pon = 0
        self.poff = 0
        self.profit = 0
        self.solve(c=c, con=con, cr=cr, return_prop=return_prop, step=step, density=density)

    def solve(self, c, con, cr, return_prop, step, density):
        consumers = np.arange(0, 1, density)
        optimal_profit = 0
        optimal_pon = 0
        for pon in np.arange(0.0001, 1, step):
            poff = pon + con
            if isinstance(return_prop, str):
                m = 1 / (2 * pon)
            else:
                m = return_prop
#             logger.debug("current m: {:.3f}, pon: {:.3f}".format(m, pon))
            if myround(c - 1/2*con - 1/2*(1-get_return_probability(m=m, pon=pon))*pon) == 0:
                behaviors_tie_online, behaviors_tie_offline = simulate_behavior(consumers=consumers,
                                                                                c=c, con=con, m=m, pon=pon, poff=poff)
                profit_tie_online = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors_tie_online)
                profit_tie_offline = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors_tie_offline)
                profit = max(profit_tie_online, profit_tie_offline)
            else:
                behaviors = simulate_behavior(consumers=consumers, c=c, con=con, m=m, pon=pon, poff=poff)
                profit = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors)

            logger.debug("current loop: pon={:.3f}, poff={:.3f}, profit={:.5f}".format(pon, poff, profit))
            logger.debug("-------"*10)
            if profit - optimal_profit > 0:
                optimal_profit = profit
                optimal_pon = pon
        self.pon = optimal_pon
        self.poff = optimal_pon + con
        self.profit = optimal_profit


## Main test

In [16]:
c=0.145
cr = 0.5
con = 0.1
step = 0.001
density = 0.005
return_prop = 0.131 # if this is a string, it means that we set m=1/(2*pon), which degrades to the baseline model.
dual_ins = dual(c=c, con=con, return_prop=return_prop, cr=cr, step=0.005, density=0.0001)

DEBUG:uniform:current demand: alpha_o 0.800, alpha_s 0.000, return probability:0.000
DEBUG:uniform:current demand: online_profit 0.00008, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.000, poff=0.100, profit=0.00004
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.790, alpha_s 0.000, return probability:0.001
DEBUG:uniform:current demand: online_profit 0.00389, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.005, poff=0.105, profit=0.00195
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.780, alpha_s 0.000, return probability:0.001
DEBUG:uniform:current demand: online_profit 0.00761, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.010, poff=0.110, profit=0.00381
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.770, alpha_s 0.000, return pro

DEBUG:uniform:current loop: pon=0.130, poff=0.230, profit=0.03380
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.532, alpha_s 0.000, return probability:0.018
DEBUG:uniform:current demand: online_profit 0.06890, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.135, poff=0.235, profit=0.03445
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.522, alpha_s 0.000, return probability:0.018
DEBUG:uniform:current demand: online_profit 0.07011, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.140, poff=0.240, profit=0.03505
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.512, alpha_s 0.000, return probability:0.019
DEBUG:uniform:current demand: online_profit 0.07122, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.145, poff=0.245, profit=0.03561
DEBU

DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.345, return probability:0.035
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.06296
DEBUG:uniform:current loop: pon=0.265, poff=0.365, profit=0.03148
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.340, return probability:0.035
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.06290
DEBUG:uniform:current loop: pon=0.270, poff=0.370, profit=0.03145
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.335, return probability:0.036
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.06281
DEBUG:uniform:current loop: pon=0.275, poff=0.375, profit=0.03141
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.330, return pro

DEBUG:uniform:current loop: pon=0.395, poff=0.495, profit=0.02660
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.210, return probability:0.052
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.05249
DEBUG:uniform:current loop: pon=0.400, poff=0.500, profit=0.02624
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.205, return probability:0.053
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.05175
DEBUG:uniform:current loop: pon=0.405, poff=0.505, profit=0.02587
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.200, return probability:0.054
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.05098
DEBUG:uniform:current loop: pon=0.410, poff=0.510, profit=0.02549
DEBU

DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.080, return probability:0.069
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.02517
DEBUG:uniform:current loop: pon=0.530, poff=0.630, profit=0.01259
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.075, return probability:0.070
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.02378
DEBUG:uniform:current loop: pon=0.535, poff=0.635, profit=0.01189
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.070, return probability:0.071
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.02237
DEBUG:uniform:current loop: pon=0.540, poff=0.640, profit=0.01119
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.065, return pro

DEBUG:uniform:current loop: pon=0.660, poff=0.760, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.087
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.665, poff=0.765, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.088
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.670, poff=0.770, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.088
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.675, poff=0.775, profit=0.00000
DEBU

DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.104
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.795, poff=0.895, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.105
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.800, poff=0.900, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.105
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.805, poff=0.905, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return pro

DEBUG:uniform:current loop: pon=0.925, poff=1.025, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.122
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.930, poff=1.030, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.122
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.935, poff=1.035, profit=0.00000
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.000, return probability:0.123
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.940, poff=1.040, profit=0.00000
DEBU

In [17]:
print("pon={:.3f}, poff={:.3f}, profit={:.3f}".format(dual_ins.pon, dual_ins.poff, dual_ins.profit))
u_o = 1 / 2 * 1 + 1 / 2 * return_prop * dual_ins.pon * dual_ins.pon - dual_ins.pon - con
u_s = 1 / 2 * (1 - dual_ins.poff) - c
print(u_o, u_s)

pon=0.190, poff=0.290, profit=0.039
0.21226703965499996 0.20995


In [18]:
consumers = np.arange(0, 1, 0.0001)
m =return_prop
pon = 0.35
poff=0.40


if myround(1 / 2 * m * pon * pon - pon + 1 / 2 * poff + c - con) == 0:
    behaviors_tie_online, behaviors_tie_offline = simulate_behavior(consumers=consumers,
                                                                    c=c, con=con, m=m, pon=pon, poff=poff)
    profit_tie_online = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors_tie_online)
    profit_tie_offline = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors_tie_offline)
    profit = max(profit_tie_online, profit_tie_offline)
else:
    behaviors = simulate_behavior(consumers=consumers, c=c, con=con, m=m, pon=pon, poff=poff)
    profit = cal_profit(m=m, pon=pon, poff=poff, cr=cr, behaviors=behaviors)

DEBUG:uniform:current demand: alpha_o 0.000, alpha_s 0.310, return probability:0.046
DEBUG:uniform:current demand: online_profit 0.00000, store_profit 0.06200


In [19]:
Counter([utility_tie_online(loc=x, c=c, con=con, m=return_prop, pon=pon, poff=poff) for x in consumers])

Counter({'l': 6900, 's': 3100})

## check

In [20]:
c=0.11
cr = 0.4
con = 0.02
step = 0.001
density = 0.005
return_prop = 1.8 # if this is a string, it means that we set m=1/(2*pon), which degrades to the baseline model.
dual_ins = dual(c=c, con=con, return_prop=return_prop, cr=cr, step=0.005, density=0.0001)

DEBUG:uniform:current demand: alpha_o 0.960, alpha_s 0.000, return probability:0.000
DEBUG:uniform:current demand: online_profit 0.00006, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.000, poff=0.020, profit=0.00003
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.950, alpha_s 0.000, return probability:0.009
DEBUG:uniform:current demand: online_profit 0.00308, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.005, poff=0.025, profit=0.00154
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.940, alpha_s 0.000, return probability:0.018
DEBUG:uniform:current demand: online_profit 0.00599, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.010, poff=0.030, profit=0.00299
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.930, alpha_s 0.000, return pro

DEBUG:uniform:current loop: pon=0.130, poff=0.150, profit=0.02484
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.723, alpha_s 0.000, return probability:0.243
DEBUG:uniform:current demand: online_profit 0.05061, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.135, poff=0.155, profit=0.02530
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.715, alpha_s 0.000, return probability:0.252
DEBUG:uniform:current demand: online_profit 0.05149, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.140, poff=0.160, profit=0.02574
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.708, alpha_s 0.000, return probability:0.261
DEBUG:uniform:current demand: online_profit 0.05230, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.145, poff=0.165, profit=0.02615
DEBU

DEBUG:uniform:current demand: alpha_o 0.556, alpha_s 0.000, return probability:0.477
DEBUG:uniform:current demand: online_profit 0.05920, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.265, poff=0.285, profit=0.02960
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.551, alpha_s 0.000, return probability:0.486
DEBUG:uniform:current demand: online_profit 0.05908, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.270, poff=0.290, profit=0.02954
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.546, alpha_s 0.000, return probability:0.495
DEBUG:uniform:current demand: online_profit 0.05894, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.275, poff=0.295, profit=0.02947
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.541, alpha_s 0.000, return pro

DEBUG:uniform:current loop: pon=0.395, poff=0.415, profit=0.02532
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.448, alpha_s 0.000, return probability:0.720
DEBUG:uniform:current demand: online_profit 0.05016, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.400, poff=0.420, profit=0.02508
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.445, alpha_s 0.000, return probability:0.729
DEBUG:uniform:current demand: online_profit 0.04966, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.405, poff=0.425, profit=0.02483
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.443, alpha_s 0.000, return probability:0.738
DEBUG:uniform:current demand: online_profit 0.04916, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.410, poff=0.430, profit=0.02458
DEBU

DEBUG:uniform:current demand: alpha_o 0.406, alpha_s 0.000, return probability:0.954
DEBUG:uniform:current demand: online_profit 0.03503, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.530, poff=0.550, profit=0.01751
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.405, alpha_s 0.000, return probability:0.963
DEBUG:uniform:current demand: online_profit 0.03434, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.535, poff=0.555, profit=0.01717
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.405, alpha_s 0.000, return probability:0.972
DEBUG:uniform:current demand: online_profit 0.03365, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.540, poff=0.560, profit=0.01682
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.405, alpha_s 0.000, return pro

DEBUG:uniform:current loop: pon=0.660, poff=0.680, profit=0.00643
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.426, alpha_s 0.000, return probability:1.197
DEBUG:uniform:current demand: online_profit 0.01173, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.665, poff=0.685, profit=0.00587
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.428, alpha_s 0.000, return probability:1.206
DEBUG:uniform:current demand: online_profit 0.01059, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.670, poff=0.690, profit=0.00529
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.430, alpha_s 0.000, return probability:1.215
DEBUG:uniform:current demand: online_profit 0.00941, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.675, poff=0.695, profit=0.00471
DEBU

DEBUG:uniform:current demand: alpha_o 0.508, alpha_s 0.000, return probability:1.431
DEBUG:uniform:current demand: online_profit -0.03051, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.795, poff=0.815, profit=-0.01526
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.512, alpha_s 0.000, return probability:1.440
DEBUG:uniform:current demand: online_profit -0.03281, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.800, poff=0.820, profit=-0.01640
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.516, alpha_s 0.000, return probability:1.449
DEBUG:uniform:current demand: online_profit -0.03518, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.805, poff=0.825, profit=-0.01759
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.521, alpha_s 0.000, retu

DEBUG:uniform:current demand: online_profit -0.11584, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.925, poff=0.945, profit=-0.05792
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.657, alpha_s 0.000, return probability:1.674
DEBUG:uniform:current demand: online_profit -0.12042, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.930, poff=0.950, profit=-0.06021
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.664, alpha_s 0.000, return probability:1.683
DEBUG:uniform:current demand: online_profit -0.12511, store_profit 0.00000
DEBUG:uniform:current loop: pon=0.935, poff=0.955, profit=-0.06256
DEBUG:uniform:----------------------------------------------------------------------
DEBUG:uniform:current demand: alpha_o 0.671, alpha_s 0.000, return probability:1.692
DEBUG:uniform:current demand: online_profit -0.12993, store_prof

In [22]:
print("pon={:.3f}, poff={:.3f}, profit={:.3f}, return probability :{}".format(dual_ins.pon, dual_ins.poff, dual_ins.profit, return_prop * dual_ins.pon))
u_o = 1 / 2 * 1 + 1 / 2 * return_prop * dual_ins.pon * dual_ins.pon - dual_ins.pon - con
u_s = 1 / 2 * (1 - dual_ins.poff) - c
print(u_o-u_s, np.isclose(u_o, u_s))

pon=0.245, poff=0.265, profit=0.030, return probability :0.44117999999999996
0.03151660900000003 False
