# Semi-gradient TD(0) Prediction

In [1]:
import numpy as np
import jdc

![SGTD](./src/SGTDPrediction.png)

In [2]:
class GradientTD():
    def __init__(self, env, n_features, feature_vector, gamma=1, alpha=1e-4):
        self.w = np.zeros(n_features)
        self.env = env
        self.feature_vector = feature_vector
        self.gamma = gamma
        self.alpha = alpha
        
    def reset(self):
        self.w = np.zeros(n_features)
    
    def error(self):
        approx = [np.dot(self.w, self.feature_vector(i+1)) for i in range(0,1000)]
        return np.sqrt(((approx - real_values) ** 2).mean())


In [3]:
%%add_to GradientTD
def estimate(self, n_episodes):
    err = np.zeros(n_episodes)
    for e in range(n_episodes):
        state = self.env.reset()
        while True:
            s_p, reward, done = self.env.step()
            v = np.dot(self.w, self.feature_vector(state))
            v_p = np.dot(self.w, self.feature_vector(s_p))
            self.w += self.alpha * (reward + self.gamma * v_p - v) * self.feature_vector(state)
            if done:
                break
            state = s_p
        err[e] = self.error()
    return err

In [4]:
%%add_to GradientTD
def iterate(self, n_runs, n_episodes):
    err = np.zeros(n_episodes)
    for i in tqdm(range(n_runs)):
        self.reset()
        err += self.estimate(n_episodes)
    return err/n_episodes