In [1]:
import numpy as np
from fitted_qiteration import FittedQIteration

import julia
j = julia.Julia(compiled_modules=False)
julia_model = j.include("power_model.jl")

In [5]:
class BatchModel():
    def __init__(self, **kwargs):
        # Set up parameters
        self.max_experiences = 700
        self.episode_len = 720
        self.action_mapping = {}


    def model_init(self):
        self.stateSpace = 5
        self.numActions = 0
        self.populate_actions()


        # Initialize storage for training data
        self.experiences = np.zeros((self.max_experiences, self.numActions, self.stateSpace))
        self.transitions = np.zeros((self.max_experiences, self.numActions, self.stateSpace))
        self.rewards = np.zeros((self.max_experiences, self.numActions))
        self.exp_index = np.zeros(self.numActions)

    def populate_actions(self):
        b1 = 1.0
        self.numActions = 0
        while b1 <= 5:
            b2 = 1.0
            while b2 <= 5:
                self.action_mapping[self.numActions] = [b1,b2]
                b2+=0.5
                self.numActions += 1
            b1+=0.5

    def getStateSpace(self):
        state_ranges = np.array([[0.0,self.numActions],[0.0,self.numActions],[0.0,self.numActions],[0.0,self.numActions],[1.0,720.0]])
        return state_ranges, self.numActions

    def sampleStateActions(self, num_requested):
        sample = []
        pred = []
        for a in range(self.numActions):
            rnd = list(range(int(min(self.exp_index[a], self.experiences.shape[0]))))
            np.random.shuffle(rnd)
            num_available = int(min(self.exp_index[a], num_requested))
            action_sample = self.experiences[rnd[:num_available],a]

            sample += [action_sample]
            pState = self.transitions[rnd[:num_available],a]
            pRewards = self.rewards[rnd[:num_available],a]

            
            pred += [[pState, pRewards, [1.0]*num_available]]
        return sample, pred

    def step(self, state, action, print_it=False):
        rewards = julia_model(self.action_mapping[action][0], self.action_mapping[action][1],state[4])
        rewards_n = (rewards-np.average(rewards))
        rewards_n /= np.std(rewards_n)
        reward = rewards_n[0]
        cur_ts = state[4] + 1
        done = False
        self.bids[cur_ts-1] = action
        newState = [self.bids[cur_ts-48], self.bids[cur_ts-24], self.bids[cur_ts-1], self.bids[cur_ts-2], cur_ts]
        if print_it:
            print('==== Step ====', state, action, rewards_n, newState)

        if newState[4] == 720:
            done = True
        return newState,reward, done
    
    def step_2(self, state, action, print_it=False):
        rewards = julia_model(self.action_mapping[action][0], self.action_mapping[action][1],state[4])
        rewards_n = (rewards-np.average(rewards))
        rewards_n /= np.std(rewards_n)
        reward = rewards_n[0]
        cur_ts = state[4] + 1
        done = False
        self.bids[cur_ts-1] = action
        newState = [self.bids[cur_ts-48], self.bids[cur_ts-24], self.bids[cur_ts-1], self.bids[cur_ts-2], cur_ts]
#         if print_it:
#             print('==== Step ====', state, action, rewards_n, newState)

        if newState[4] == 720:
            done = True
        return newState,rewards, done

    def sample_action(self):
        return np.random.randint(self.numActions)

    def reset(self):
        self.bids = np.random.randint(self.numActions, size=self.episode_len+1)
        return [self.sample_action(),self.sample_action(),self.sample_action(),self.sample_action(), \
        np.random.randint(720)+1]

    def updateExperience(self, lastState, action):
        newState, reward, done = self.step(lastState, action)
        index = int(self.exp_index[action] % self.max_experiences)
        self.experiences[index,action, :] = lastState
        self.rewards[index, action] = reward
        self.transitions[index, action, :] = newState
        self.exp_index[action]+=1
        return newState, done

    def populateExperience(self):
        num_exp = 0
        state = self.reset()
        while num_exp < 7000:
            state, done = self.updateExperience(state, self.sample_action())
            if done:
                state = self.reset()
                print('Added episode', num_exp)
            num_exp+=1

In [11]:
np.random.seed(44)

In [3]:
model = BatchModel()
model.model_init()
model.populateExperience()

Added episode 402
Added episode 555
Added episode 1249
Added episode 1911
Added episode 2302
Added episode 2971
Added episode 3468
Added episode 4001
Added episode 4134
Added episode 4256
Added episode 4414
Added episode 4853
Added episode 4893
Added episode 5400


In [12]:
fitted_q = FittedQIteration(model)
#fitted_q.randomize_parameters()
fitted_q.planner_init()
fitted_q.updatePlan()
fitted_q.test()

KeyboardInterrupt: 