In [216]:
from scipy.optimize import linprog
from math import sqrt
from collections import defaultdict
import numpy as np
import random
import copy
import math
import matplotlib.pyplot as plt 
import timeit
import pandas as pd
from scipy import stats
import time

from cvxopt import matrix, solvers
from cvxopt.modeling import variable
from cvxopt.modeling import op, dot

random.seed(42)

In [217]:
# notes

# The get_demand_matrix function has some hard-coded stuff that needs to be modified if # treatments != 2 
# -- create a variable for each treatment and make the process programmatic

# changed from np.array() to lists up to clearing error

In [242]:
# hardcoded constants

num_subjects = 1540 # i
num_treatments = 2 # t
capacity_matrix = [663, 877]
budget = 100
epsilon = 0.1 # has to be less than 0.5
rct_treatment_probabilities = [(capacity_matrix[0]*1.0)/(num_subjects), (capacity_matrix[1]*1.0)/(num_subjects)]
budget_matrix = [budget] * num_subjects

# Scaling factor for alpha, beta to set new prices
alpha_scaling_factor = 0.75
beta_scaling_factor = budget/50

In [243]:
# Init alpha, beta assumed to be positive
def init_alpha():
    alpha = [random.randint(-budget, 0) for i in range(num_treatments)]
    return alpha
def init_beta():
    beta = [random.randint(-budget, budget) for i in range(num_treatments)]
    return beta

In [244]:
# Price vector pi(i,t) = alpha(t) * pte(i,t) + beta(t). Dimensions num_subjects * num_treatments
def get_price_matrix(alpha, beta):
    price_matrix = [[(alpha[t] * pte_it + beta[t]) for t, pte_it in enumerate(pte_row)] for pte_row in pte_matrix]
    #print "get_price_matrix: Price matrix:", price_matrix
    return price_matrix

In [245]:
def get_demand_matrix(price_matrix):
    demand_matrix = np.zeros((num_subjects, num_treatments)).tolist()
    # solve LP problem subject by subject
    for i in range(num_subjects):
        p_i0 = variable()
        p_i1 = variable()

        objective = wtp_matrix[i][0]*p_i0 + wtp_matrix[i][1]*p_i1
        budget_constraint = (price_matrix[i][0]*p_i0 + price_matrix[i][1]*p_i1 <= budget_matrix[i])
        probability_constraint = (p_i0 + p_i1 == 1)
        b1 = (p_i0 >= epsilon)
        b2 = (p_i0 <= 1-epsilon)
        b3 = (p_i1 >= epsilon)
        b4 = (p_i1 <= 1-epsilon)

        lp_subject = op(objective, [budget_constraint, probability_constraint, b1, b2, b3, b4])
        solvers.options['show_progress'] = False
        sol = lp_subject.solve()
        
        demand_matrix[i][0] = p_i0.value[0]
        demand_matrix[i][1] = p_i1.value[0]
    
    return demand_matrix

In [257]:
# Excess_demand(t) = treatment_demand(t) - capacity(t). Dimensions 1 * num_treatments
# Treatment_demand(t) = sum of demand(t) across all i. Dimensions 1 * num_treatments
def get_excess_demand_matrix(demand_matrix):
    treatment_demand_matrix = np.zeros(num_treatments)
    excess_demand_matrix = np.zeros(num_treatments)
    for subject in range(num_subjects):
        for treatment in range(num_treatments):
            treatment_demand_matrix[treatment] += demand_matrix[subject][treatment]
    excess_demand_matrix = treatment_demand_matrix - capacity_matrix
    #print "get_excess_demand_matrix: Excess demand matrix:", excess_demand_matrix
    return excess_demand_matrix.tolist()

In [258]:
# Clearing error in market = sqrt(sum of excess_demand(t)^2 for every treatment t)
def get_clearing_error(excess_demand_matrix):
    # If demand is satisfied everywhere and total capacity > number of subjects, no clearing error
    if all(excess <= 0 for excess in excess_demand_matrix):
        print "get_clearing_error: Market clear, no clearing error!"
        return 0
    else:
        clearing_error = sqrt(sum([excess**2 for excess in excess_demand_matrix])) / sum(capacity_matrix)
        print "get_clearing_error: Clearing error:", clearing_error
        return clearing_error

In [259]:
# Recalibrate alpha, beta values to set new prices
def get_alpha_new(alpha, excess_demand_matrix):
    alpha_new = [(alpha[t] + excess_demand_matrix[t]*alpha_scaling_factor) for t in range(num_treatments)]
#     alpha_new = alpha + excess_demand_matrix * alpha_scaling_factor
    for (i, a) in enumerate(alpha_new):
        if (a > 0):
            # alpha become +ve, so reset to random initialization
            alpha_new[i] = random.randint(-budget, 0)
    return alpha_new

def get_beta_new(beta, excess_demand_matrix):
    beta_new = [(beta[t] + excess_demand_matrix[t]*beta_scaling_factor) for t in range(num_treatments)]
#     beta_new = beta + excess_demand_matrix * beta_scaling_factor
    return beta_new  

In [260]:
# Find market clearing price vector. The objective is to change alpha and beta values so that we reduce clearing error
def clear_market():
    # Initialize market prices and demand
    alpha = init_alpha()
    beta = init_beta()    
    price_matrix = get_price_matrix(alpha, beta)
    demand_matrix = get_demand_matrix(price_matrix)  
    excess_demand_matrix = get_excess_demand_matrix(demand_matrix)
    clearing_error = get_clearing_error(excess_demand_matrix)
        
    # clearing error is percentage of total capacity so we want the market to clear at 1%
    minimum_clearing_error = clearing_error
    clearing_error_threshold = 0.01
    threshold_iterations = 10
    iterations = 0
    alpha_star = 0
    beta_star = 0
    
    # Set new prices to clear market
    while True:
        if iterations > threshold_iterations:
            # new search start
            alpha = init_alpha()
            beta = init_beta()
            iterations = 0
            print "new search start"
        else:
            # continue down current search
            alpha = get_alpha_new(alpha, excess_demand_matrix)
            beta = get_beta_new(beta, excess_demand_matrix)
        
        price_matrix = get_price_matrix(alpha, beta)
        demand_matrix = get_demand_matrix(price_matrix)
        excess_demand_matrix = get_excess_demand_matrix(demand_matrix)
        clearing_error = get_clearing_error(excess_demand_matrix)
        
        # Store parameter values for minimum clearing error
        if clearing_error < minimum_clearing_error:
            minimum_clearing_error = clearing_error
            alpha_star = copy.copy(alpha)
            beta_star = copy.copy(beta)
        # cleared the market! 
        if minimum_clearing_error < clearing_error_threshold:
            break
        iterations += 1
    
    print "Minimum clearing error:", minimum_clearing_error
    print "Alpha_star:", alpha_star
    print "Beta star:", beta_star
    return (minimum_clearing_error, alpha_star, beta_star)

In [261]:
def simulate():
    while True: 
        min_error, alpha_star, beta_star = clear_market()
        price_star = get_price_matrix(alpha_star, beta_star)
        demand_star = get_demand_matrix(price_star)
        
        control_probs = [demand_star_i[0] for demand_star_i in demand_star]
        treatment_probs = [demand_star_i[1] for demand_star_i in demand_star]
        
        if (min_error < 0.01):
            print "cleared market!"
            break
    return demand_star

In [262]:
# dict of form {dataset : demand_star} 
# demand_star is a list of [control_demand, treatment_demand]
# every dataset is mapped to the market clearing probability distribution
demand_dict = {}
# dict of form {dataset : # unique groups} 
num_input_groups_dict = {}
num_output_groups_dict = {}
problem_datasets = []

start_dataset, end_dataset = 1, 5
for d in range(start_dataset, end_dataset):
    # load data
    pte_df = pd.read_csv("data/PTE_"+str(d)+".csv")
    wtp_df = pd.read_csv("data/WTP_"+str(d)+".csv")
    pte_matrix = [[0, i] for i in pte_df['PTE'].values.tolist()]
    wtp_matrix = [[0, i] for i in wtp_df['WTP'].values.tolist()]
    
    # solve market, add to dict
    demand_star = simulate()
    demand_dict[d] = demand_star
    print demand_star
    control_probs = [demand_star_i[0] for demand_star_i in demand_star]
    treatment_probs = [demand_star_i[1] for demand_star_i in demand_star]

    # sanity check 
    # make dictionary to idenitfy subjects with same pte, wtp 
    # {(pte, wtp) : [subject numbers]} -- get groups
    # now make sure that in each group, everyone has the same treatment and control assignment probability
    sanity_dict = defaultdict(list)
    for subject_num in range(len(wtp_matrix)):
        sanity_dict[(wtp_matrix[subject_num][1], pte_matrix[subject_num][1])].append(subject_num)
    num_input_groups_dict[d] = len(sanity_dict)
    
    for group in sanity_dict.values():
        if not all([treatment_probs[group[0]] == treatment_probs[subject_num] for subject_num in group]):
            print "problem"
        if not all([control_probs[group[0]] == control_probs[subject_num] for subject_num in group]):
            print "problem"
    
    # bounds sanity check -- some datasets appear to be problematic
    if (min(control_probs)<epsilon) or (max(control_probs)>1-epsilon) \
        or (min(treatment_probs)<epsilon) or (max(treatment_probs)>1-epsilon):
            problem_datasets.append(d)
            print "bounds are not correct for", d
    
    # count the number of unique values of p_it -- groups of subjects with same demand
    output_groups_dict = defaultdict(list)
    for i, demand_i in enumerate(demand_star):
        output_groups_dict[(demand_i[0], demand_i[1])].append(i)
    num_output_groups_dict[d] = len(output_groups_dict)
        
    print "finished dataset", d
    
df_results = pd.DataFrame.from_dict(demand_dict)

get_clearing_error: Clearing error: 0.383306969376
get_clearing_error: Clearing error: 0.0505004532045
get_clearing_error: Clearing error: 0.0388388230747
get_clearing_error: Clearing error: 0.0316223696305
get_clearing_error: Clearing error: 0.026593582844
get_clearing_error: Clearing error: 0.022842110556
get_clearing_error: Clearing error: 0.0199157521489
get_clearing_error: Clearing error: 0.017559720952
get_clearing_error: Clearing error: 0.0156176068926
get_clearing_error: Clearing error: 0.0139871896286
get_clearing_error: Clearing error: 0.0125983862948
get_clearing_error: Clearing error: 0.0114012904571
new search start
get_clearing_error: Clearing error: 0.383306975706
get_clearing_error: Clearing error: 0.004162919901
Minimum clearing error: 0.004162919901
Alpha_star: [-36, -321.0500008764737]
Beta star: [908.8000023372911, -875.8000023372632]
cleared market!
[[0.10000000114498578, 0.8999999988550145], [0.5400229094631742, 0.45997709053682606], [0.10000000115359853, 0.899999