# Replicating Calvano et al. (2020)
## Baseline
### Author: Andréa Epivent

In [1]:
# Set working directory
import os
path = os.getcwd()

# Import packages
exec(open(path+"/packages.py").read())

# Import custom functions
from functions import *

# Import parameters
exec(open(path+"/parameters.py").read())

In [2]:
# Create output folders
try: 
    os.mkdir("Output")
    os.mkdir("Output/Baseline")
    print("Directories are created")
except:
    print("Directories already exist")

Directories already exist


### Initialization

* Action space

In [2]:
# m equally spaced points in an interval that includes Nash and monopoly prices
A = np.linspace(p_N()-ksi*(p_M()-p_N()),p_M()+ksi*(p_M()-p_N()),m)
print(A.shape)

(15,)


* State space

In [3]:
# Combination of prices from last period
S = np.zeros([state_space, 2])
l = 0
for i in A: # price of agent 1
    for j in A: # price of agent 2
        S[l,0] = i
        S[l,1] = j
        l += 1
print(S.shape)

(225, 2)


In [5]:
np.save(path+'/Output/Baseline/actions', A)
np.save(path+'/Output/Baseline/states', S)

* Initialize Q-matrix

In [4]:
q_table = init_Q(A)
print(q_table)

[[5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]
 [5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]
 [5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]
 ...
 [5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]
 [5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]
 [5.75190335 5.97100207 6.12637963 ... 4.74337049 4.43287756 4.1174899 ]]


### Training

* Launch training

In [6]:
# Test - leaving all print and saving q_info at each iteration
q_info, q_tables1, q_tables2 = q_learning_2agents(S, A, q_table, n_episodes=10)

Iteration: 1258200
Episode: 9
Process has converged
Training finished, episode: 9
--- 25.898724432786306 minutes ---


In [5]:
# Test - Not saving q_info
q_tables1, q_tables2 = q_learning_2agents(S, A, q_table, n_episodes=10, save_info=False)

--- 21.975573404630026 minutes ---


In [6]:
# Test - With Calvano's convergence criterion
# Save_info must be set to False, otherwise a memory error occurs
q_tables1, q_tables2 = q_learning_2agents(S, A, q_table, n_episodes=10, criterion=int(1e5), criterion_final=int(1e9), save_info=False)

Iteration: 2696000
Episode: 9
Process has converged
Training finished, episode: 9
--- 43.87923192183177 minutes ---


* Store results

In [None]:
# Information on prices and profits throughout simulation for both agents
try:
    np.save(path+'/Output/Baseline/q_info', q_info)
except:
    pass

# Last Q matrix of both agents
np.save(path+'/Output/Baseline/q_table_a1', q_tables1)
np.save(path+'/Output/Baseline/q_table_a2', q_tables2)