In [1]:
using DataFrames
using NamedArrays

In [2]:
table = readtable("data/DistMat.csv")
cities = Array(table[:,1])
delete!(table, 1)
distMat = NamedArray(Array(table), (cities, cities))

24×24 Named Array{Float64,2}
           A ╲ B │        Barcelona  …            Warsaw
─────────────────┼──────────────────────────────────────
Barcelona        │              0.0  …           1862.33
Belgrade         │          1528.13               826.66
Berlin           │          1497.61               516.06
Brussels         │          1062.89              1159.85
Bucharest        │          1968.42               946.12
Budapest         │          1498.79               545.29
Copenhagen       │          1757.54                667.8
Dublin           │          1469.29              1823.72
Hamburg          │          1471.78               750.49
Istanbul         │          2230.42              1386.08
Kiev             │          2391.06               690.12
London           │          1137.67              1445.85
Madrid           │           504.64              2288.42
Milan            │           725.12              1143.01
Moscow           │          3006.93              1149.41
Mu

In [3]:
type RLTask
    stateSpace::Array{Any, 1} 
    actionSpace::Array{Any, 1}
    currentState # current state, an element of stateSpace
    stepFunction::Function # (currentState, a) -> (newState, reward)
end
function step(r::RLTask, action)
    newState, reward = stepFunction(r.currentState, action)
    println("Moving to $newState obtaining reward $reward")
    return newState, reward
end

step (generic function with 1 method)

In [4]:
function stepFunction(s, a)
    newState = a
    reward = -distMat[s, a]
    return newState, reward 
end
stepFunction("London", "Barcelona")

("Barcelona",-1137.67)

In [5]:
currentState = "London"
graphWalk = RLTask(cities, cities, currentState, stepFunction)

RLTask(Any["Barcelona","Belgrade","Berlin","Brussels","Bucharest","Budapest","Copenhagen","Dublin","Hamburg","Istanbul"  …  "Moscow","Munich","Paris","Prague","Rome","Saint Petersburg","Sofia","Stockholm","Vienna","Warsaw"],Any["Barcelona","Belgrade","Berlin","Brussels","Bucharest","Budapest","Copenhagen","Dublin","Hamburg","Istanbul"  …  "Moscow","Munich","Paris","Prague","Rome","Saint Petersburg","Sofia","Stockholm","Vienna","Warsaw"],"London",stepFunction)

In [6]:
step!(graphWalk, "Barcelona")

LoadError: UndefVarError: step! not defined

In [7]:
# Lets create an agent
type Agent
    policy::Function # (state) - > (action)
    totalReward::Real
end
function choose(agent::Agent, rltask::RLTask)
    action = agent.policy(rltask)
    println("The agent chooses $action from state $(rltask.currentState)")
    return(action)
end

choose (generic function with 1 method)

In [8]:
function randomPolicy(rltask::RLTask)
    return rand(rltask.actionSpace)
end
dumbAgent = Agent(randomPolicy, 0)

Agent(randomPolicy,0)

In [9]:
for i in 1:10 choose(dumbAgent, graphWalk) end

The agent chooses Copenhagen from state London
The agent chooses Moscow from state London
The agent chooses Budapest from state London
The agent chooses Prague from state London
The agent chooses Vienna from state London
The agent chooses Berlin from state London
The agent chooses Stockholm from state London
The agent chooses Dublin from state London
The agent chooses Copenhagen from state London
The agent chooses Brussels from state London


In [10]:
function interact!(agent::Agent, rltask::RLTask)
    action = choose(agent, rltask)
    newState, reward = step(rltask, action)
    agent.totalReward += reward
    rltask.currentState = newState
end

interact! (generic function with 1 method)

In [18]:
println("Current state: $(graphWalk.currentState)")
println("Current total reward: $(dumbAgent.totalReward)")

Current state: Barcelona
Current total reward: -1137.67


In [20]:
interact!(dumbAgent, graphWalk)
println("Current state: $(graphWalk.currentState)")
println("Current total reward: $(dumbAgent.totalReward)")

The agent chooses Milan from state Barcelona
Moving to Milan obtaining reward -725.12
Current state: Milan
Current total reward: -1862.79


In [21]:
interact!(dumbAgent, graphWalk)
println("Current state: $(graphWalk.currentState)")
println("Current total reward: $(dumbAgent.totalReward)")

The agent chooses Istanbul from state Milan
Moving to Istanbul obtaining reward -1669.43
Current state: Istanbul
Current total reward: -3532.2200000000003
