In [1]:
import numpy as np
from scipy.stats import norm


In [2]:
k = 10
l = 3
number = 1000
z = np.random.uniform(low=1, high=2, size=(number, k))

In [3]:
z, z[:,:l]

(array([[1.49160322, 1.3289432 , 1.67947046, ..., 1.09775986, 1.40802285,
         1.5856357 ],
        [1.68148325, 1.17905955, 1.62775707, ..., 1.45146215, 1.35222255,
         1.30389615],
        [1.54748067, 1.97568162, 1.45611175, ..., 1.18777316, 1.61029834,
         1.55408657],
        ...,
        [1.5242942 , 1.67414079, 1.3533845 , ..., 1.79637187, 1.23695457,
         1.95800373],
        [1.9556612 , 1.54098561, 1.35142941, ..., 1.02340529, 1.88099219,
         1.37753931],
        [1.37014425, 1.18164806, 1.39043702, ..., 1.13495291, 1.23361261,
         1.96897449]]),
 array([[1.49160322, 1.3289432 , 1.67947046],
        [1.68148325, 1.17905955, 1.62775707],
        [1.54748067, 1.97568162, 1.45611175],
        ...,
        [1.5242942 , 1.67414079, 1.3533845 ],
        [1.9556612 , 1.54098561, 1.35142941],
        [1.37014425, 1.18164806, 1.39043702]]))

In [4]:
def get_functions(mode):
    if mode=='quadratic':
        a = lambda z : 2*z**2
        b = lambda z : 0.6*z
    elif mode=='step':
        a = lambda z : np.where((z<1.5), 5, 6)
        b = lambda z : np.where((z<1.5), 0.7, 1.2)
    elif mode=='sigmoid':
        a = lambda z : 1/(1+np.exp(z))
        b = lambda z : 2/(1+np.exp(z)) + 0.1
    else:
        a = lambda z : 6*z
        b = lambda z : z
    return a, b
    

In [5]:
z_bar = np.mean(z[:,:l], axis=1)

In [6]:
p = np.random.normal(loc=z_bar, scale=1)

In [7]:
a, b = get_functions('quadratic')

In [8]:
epsilon_noise = np.random.normal(loc=np.zeros_like(z_bar), scale=1)

In [9]:
p*(a(z_bar) - b(z_bar) * p) + epsilon_noise

array([ 3.62017777,  3.40946863,  4.55516641,  7.71221345,  4.17819018,
        3.9933977 ,  1.88842672,  0.50525455,  3.56826845,  5.01180121,
        1.57883655,  8.17927622,  5.23879004,  6.97792035,  1.88797962,
        3.71967093, 10.77666657,  4.2548405 , -0.73100152,  0.88369325,
       11.55614283,  2.53838871, -2.66701379,  2.36619145,  4.9903087 ,
        5.53551008,  1.32661495,  5.79065176,  0.80027308,  4.90524819,
        2.64547914,  3.87282907,  6.2246207 ,  3.87849323,  3.14471892,
        5.70974964,  8.53199448,  8.66793377,  5.15344902,  0.82882762,
        7.81962694,  0.28002442,  5.9972818 ,  4.7009588 ,  2.6573824 ,
        2.68488317,  6.12447631,  6.20228797,  5.11838663, -0.32085857,
        3.12621997,  2.51939969,  7.85343655,  6.03472803,  2.90977579,
       -5.26814086, -1.14245158,  7.91154062,  5.00142513,  2.38890554,
        3.73848472,  5.42578498,  5.77264947,  1.85954762,  4.53843897,
        3.50693283,  5.53786244,  4.77183108,  1.90560387,  5.81

In [10]:
contexts = z
actions = p
propensities = norm(loc=z_bar, scale=1).pdf(actions)
rewards = p*(a(z_bar) - b(z_bar) * p) + epsilon_noise

optimal_prices = a(z_bar)/(2*b(z_bar))

In [12]:
optimal_rewards = optimal_prices*(a(z_bar) - b(z_bar) * optimal_prices) + epsilon_noise

In [13]:
np.mean(optimal_rewards)

5.811205541832971

In [35]:
propensities.shape

(1000,)

In [36]:
rewards.shape

(1000,)

In [37]:
rewards.mean()

8.789360067324203

In [64]:
from sklearn.linear_model import LogisticRegression, RidgeCV


pistar_determinist = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1])

In [71]:
pistar_determinist.fit(contexts, optimal_prices)
pistar_determinist.score(contexts, optimal_prices)

0.9999999998546452

In [70]:
pistar_determinist.coef_

array([ 5.55548350e-01,  5.55549302e-01,  5.55548835e-01, -3.13649923e-07,
       -1.50389039e-08, -2.67148553e-07,  4.95896122e-07,  2.03645658e-07,
       -4.40740343e-07, -2.50083058e-07])

In [75]:
np.array([pistar_determinist.intercept_])

array([3.12107725e-05])

In [76]:
optimal_paramter = np.concatenate([np.array([pistar_determinist.intercept_]), pistar_determinist.coef_])

In [77]:
optimal_paramter

array([ 3.12107725e-05,  5.55548350e-01,  5.55549302e-01,  5.55548835e-01,
       -3.13649923e-07, -1.50389039e-08, -2.67148553e-07,  4.95896122e-07,
        2.03645658e-07, -4.40740343e-07, -2.50083058e-07])

In [15]:
p_matthieu = 7 * np.ones_like(optimal_prices)
loss_matthieu = -( p_matthieu*(a(z_bar) - b(z_bar) * p_matthieu) + epsilon_noise)
np.mean(loss_matthieu)

12.232710586565965