# Reproducing Calvano et al. (2020)
## Baseline - Stat des - Convergence
### Author: Andréa Epivent

In [2]:
# Set working directory
path = "/Users/admin/Desktop/PhD/RL_AlgorithmicCollusion"

# Import packages and custom functions
exec(open(path+"/Functions/import.py").read())

# Import parameters
exec(open(path+"/Functions/parameters.py").read())

In [3]:
# Import data from training
q_table_1 = np.load(path+'/Output/Baseline/q_table_a1.npy')
q_table_2 = np.load(path+'/Output/Baseline/q_table_a2.npy')
q_info = np.load(path+'/Output/Baseline/q_info.npy')
A = np.load(path+'/Output/Baseline/actions.npy')
S = np.load(path+'/Output/Baseline/states.npy')

### Retrieve number of iterations for each episode

In [4]:
n_iterations = np.zeros((n_episodes,))
for j in range(n_episodes):
    if q_info[criterion_final-1,j*4] != 0:
        n_iterations[j] = criterion_final
    else:
        n_iterations[j] = np.where(q_info[:,j*4] == 0)[0][0]

In [28]:
# Save for later use
np.save(path+'/Output/Baseline/n_iterations', n_iterations)

### Descriptive statistics

* Average number of iterations per episode

In [29]:
n_iterations.mean()

1141930.59

* Number of iterations that did not converge

In [30]:
np.sum(n_iterations == criterion_final)

4

### Robustness

<b>Idea</b>: check that optimal actions don't change when we turn exploration off.

In [24]:
# Stock last price for both agents for each episode
#f_price1, f_price2 = get_forward_price(30,q_table_1,q_table_2,q_info,n_iterations,S,A)
#price1 = f_price1[:,f_price1.shape[1]-1:f_price1.shape[1]]
#price2 = f_price2[:,f_price2.shape[1]-1:f_price2.shape[1]]
price1, price2 = get_last_price(1,q_info,n_iterations)
threshold = 1e4
conv_episodes = []

for j in range(n_episodes): 
    
    t = 0
    
    # Import q-matrix of both agents
    q1 = q_table_1[(j+1)*225:(j+1)*225+225,:]
    q2 = q_table_2[(j+1)*225:(j+1)*225+225,:]
        
    # Find last state and optimal action response according to limit strategy
    state = find_rowindex(S,price1[j][0],price2[j][0])
        
    # Initialize convergence criteria
    convergence = False
    
    # Initialize matrix for keeping track of argmax_p q
    stab1 = np.full([state_space],-1)
    stab2 = np.full([state_space],-1)

    while convergence == False:

        # Find optimal actions and keep track
        action_a1 = np.argmax(q1[state])
        action_a2 = np.argmax(q2[state])
        
        # Retrieve prices and next state
        p1,p2 = A[action_a1], A[action_a2]
        next_state = find_rowindex(S,p1,p2) # We find the row index associated with these two new prices

        # Rewards
        reward_a1 = profit_compute(p1,p2,ci,ai,mu,a0)
        reward_a2 = profit_compute(p2,p1,ci,ai,mu,a0)

        # Updating Q-table - for agent 1 
        old_value_a1 = q1[state, action_a1]
        next_max_a1 = np.max(q1[next_state])

        new_value_a1 = (1 - alpha) * old_value_a1 + alpha * (reward_a1 + delta * next_max_a1)
        q1[state, action_a1] = new_value_a1
        
        # Updating Q-table - for agent 2
        old_value_a2 = q2[state, action_a2]
        next_max_a2 = np.max(q2[next_state])

        new_value_a2 = (1 - alpha) * old_value_a2 + alpha * (reward_a2 + delta * next_max_a2)
        q2[state, action_a2] = new_value_a2

        # We always stick to the same state
        state = next_state

        if (action_a1 != stab1[state]) & (t > 2):
            convergence = True
            print(f"Agent 1 changed optimal action at stage {state} and period {t}")
            conv_episodes.append(False)
        
        if (action_a2 != stab2[state]) & (t > 2):
            convergence = True
            print(f"Agent 2 changed optimal action at stage {state} and period {t}")
        
        stab1[state] = action_a1
        stab2[state] = action_a2
        
        t += 1

        # Stop if optimal action changes
        
        # Stop in any case
        if t == threshold:
            convergence = True
            conv_episodes.append(True)
    
    

Agent 1 changed optimal action at stage 223 and period 3
Agent 2 changed optimal action at stage 223 and period 3
Agent 1 changed optimal action at stage 58 and period 3
Agent 2 changed optimal action at stage 58 and period 3
Agent 1 changed optimal action at stage 111 and period 3
Agent 2 changed optimal action at stage 111 and period 3
Agent 1 changed optimal action at stage 41 and period 3
Agent 2 changed optimal action at stage 41 and period 3
Agent 1 changed optimal action at stage 146 and period 3
Agent 2 changed optimal action at stage 146 and period 3
Agent 1 changed optimal action at stage 207 and period 3
Agent 2 changed optimal action at stage 207 and period 3
Agent 1 changed optimal action at stage 221 and period 3
Agent 2 changed optimal action at stage 221 and period 3
Agent 1 changed optimal action at stage 174 and period 3
Agent 2 changed optimal action at stage 174 and period 3
Agent 1 changed optimal action at stage 218 and period 3
Agent 2 changed optimal action at s

In [25]:
conv_episodes.count(True)

91