-
Notifications
You must be signed in to change notification settings - Fork 6
/
gridql.py
96 lines (83 loc) · 3.09 KB
/
gridql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Uses the model parameterized by t
author: Hakan Hekimgil, Jafar Chaab
"""
import numpy as np
import modelt
customer = 1
# model setup
ntimeslots = modelt.ntimeslots
actions = np.round(np.arange(2.4, 8.3, 0.1), 1)
nactions = len(actions)
#minprice = modelt.k1 * min(modelt.wholepricedata)
#possibleactions = {t:[a for a in range(nactions) if nactions[a] >= modelt.k1 * modelt.wholeprice(t) and nactions[a] <= modelt.k2 * modelt.wholeprice(t)] for t in range(1,ntimeslots+1)}
epsilon = 0.5
discount = 0.9
alpha = 0.1
def reward(t,n,price):
return modelt.obj(t,n,price)
def futurerewards(t,n,greedy = True):
if t >= ntimeslots:
return 0.0
else:
if greedy and np.random.random() <= epsilon:
randomaction = np.random.randint(nactions)
randomprice = actions[randomaction]
return reward(t,n,randomprice) + discount * futurerewards(t+1,n)
else:
bestaction = np.argmax(qmatrix[t-1,:])
bestprice = actions[bestaction]
return reward(t,n,bestprice) + discount * futurerewards(t+1,n)
# initialization
timeslot = 1
#i = 1
qmatrix = np.zeros([ntimeslots+1,nactions]) # one extra row
#qmatrix = np.full([ntimeslots+1,nactions], -np.inf) # one extra row
# Q-Learning loop
for i in range(100):
for t in range(1,ntimeslots+1):
qprev = qmatrix.copy()
for action in range(nactions):
# IMPORTANT REMINDER:
# The functions use t as it is so reward(t,n,p) refers to time t
# Arrays use t with 0-index addressing so qmatrix[t-1:] refers to time t
qmatrix[t-1,action] = (
(1 - alpha ) * qprev[t-1,action] +
alpha * (
reward(t,1,actions[action]) +
discount * np.max(qprev[t,:])))
# totalreward = 0
# action = np.argmax(qmatrix[t-1,:])
# aprice = actions[action]
# reward = modelt.obj(timeslot,1,aprice)
bestpolicy = [actions[x] for x in np.argmax(qmatrix[:-1,:], axis=1)]
print(bestpolicy)
# VISUALISE INPUT AND OUTPUT DATA
# Combined plot
def plotresults():
import matplotlib.pyplot as plt
trange = list(range(1,25))
barw = 0.25
trange1 = [t-barw for t in trange]
fig, ax1 = plt.subplots()
p1 = plt.bar(trange1, [modelt.edemandcurt(t,customer) for t in trange], width=barw, color="blue")
p2 = plt.bar(trange, [modelt.econscurt(t,customer,bestpolicy[t-1]) for t in trange], width=barw, color="red")
plt.title("Customer {:}".format(customer))
plt.xlabel("Time slot")
ax1.set_ylabel("Electricity (kWh)")
plt.yticks(list(range(0,14,2)))
plt.xlim(0.3,24.7)
plt.xticks(list(range(1,25)))
ax2 = ax1.twinx()
p3 = plt.plot(trange, modelt.wholepricedata, "o-g")
p4 = plt.plot(trange, bestpolicy, "o-r")
ax2.set_ylabel("Price (ȼ/kWh)")
plt.yticks(list(range(0,9)))
plt.legend((p1[0], p2[0], p3[0], p4[0]),
("Energy demand", "Energy consumption", "Wholesale price", "Retail price"),
loc=2)
fig.tight_layout()
plt.show()
return