In [1]:
import gym
import numpy as np
import time


In [2]:


def init_q(s, a, type="ones"):
    """
    @param s the number of states
    @param a the number of actions
    @param type random, ones or zeros for the initialization
    """
    if type == "ones":
        return np.ones((s, a))
    elif type == "random":
        return np.random.random((s, a))
    elif type == "zeros":
        return np.zeros((s, a))


def epsilon_greedy(Q, epsilon, n_actions, s, train=False):
    """
    @param Q Q values state x action -> value
    @param epsilon for exploration
    @param s number of states
    @param train if true then no random actions selected
    """
    if train or np.random.rand() < epsilon:
        action = np.argmax(Q[s, :])
    else:
        action = np.random.randint(0, n_actions)
    return action

def TD(alpha, gamma, epsilon, episodes, max_steps, n_tests, render = False, test=False):
    """
    @param alpha learning rate
    @param gamma decay factor
    @param epsilon for exploration
    @param max_steps for max step in each episode
    @param n_tests number of test episodes
    """
    env = gym.make('Taxi-v3')
    n_states, n_actions = env.observation_space.n, env.action_space.n
    Q = init_q(n_states, n_actions, type="ones")
    timestep_reward = []
    for episode in range(episodes):
        print(f"Episode: {episode}")
        total_reward = 0
        s = env.reset()
        a = epsilon_greedy(Q, epsilon, n_actions, s)
        t = 0
        done = False
        while t < max_steps:
            if render:
                env.render()
            t += 1
            s_, reward, done, info = env.step(a)
            total_reward += reward
            a_ = epsilon_greedy(Q, epsilon, n_actions, s_)
            if done:
                Q[s, a] += alpha * ( reward  - Q[s, a] )
            else:
                Q[s, a] += alpha * ( reward + (gamma * Q[s_, a_] ) - Q[s, a] )
            s, a = s_, a_
            if done:
                if render:
                    print(f"This episode took {t} timesteps and reward {total_reward}")
                timestep_reward.append(total_reward)
                break
    if render:
        print(f"Here are the Q values:\n{Q}\nTesting now:")
    if test:
        test_agent(Q, env, n_tests, n_actions)
    return timestep_reward

def test_agent(Q, env, n_tests, n_actions, delay=0.1):
    for test in range(n_tests):
        print(f"Test #{test}")
        s = env.reset()
        done = False
        epsilon = 0
        total_reward = 0
        while True:
            time.sleep(delay)
            env.render()
            a = epsilon_greedy(Q, epsilon, n_actions, s, train=True)
            print(f"Chose action {a} for state {s}")
            s, reward, done, info = env.step(a)
            total_reward += reward
            if done:
                print(f"Episode reward: {total_reward}")
                time.sleep(1)
                break


if __name__ =="__main__":
    alpha = 0.4
    gamma = 0.999
    epsilon = 0.9
    episodes = 3000
    max_steps = 2500
    n_tests = 20
    timestep_reward = TD(alpha, gamma, epsilon, episodes, max_steps, n_tests)
    print(timestep_reward)

Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
Episode: 52
Episode: 53
Episode: 54
Episode: 55
Episode: 56
Episode: 57
Episode: 58
Episode: 59
Episode: 60
Episode: 61
Episode: 62
Episode: 63
Episode: 64
Episode: 65
Episode: 66
Episode: 67
Episode: 68
Episode: 69
Episode: 70
Episode: 71
Episode: 72
Episode: 73
Episode: 74
Episode: 75
Episode: 76
Episode: 77
Episode: 78
Episode: 79
Episode: 80
Episode: 81
Episode: 82
Episode: 83
Ep

Episode: 962
Episode: 963
Episode: 964
Episode: 965
Episode: 966
Episode: 967
Episode: 968
Episode: 969
Episode: 970
Episode: 971
Episode: 972
Episode: 973
Episode: 974
Episode: 975
Episode: 976
Episode: 977
Episode: 978
Episode: 979
Episode: 980
Episode: 981
Episode: 982
Episode: 983
Episode: 984
Episode: 985
Episode: 986
Episode: 987
Episode: 988
Episode: 989
Episode: 990
Episode: 991
Episode: 992
Episode: 993
Episode: 994
Episode: 995
Episode: 996
Episode: 997
Episode: 998
Episode: 999
Episode: 1000
Episode: 1001
Episode: 1002
Episode: 1003
Episode: 1004
Episode: 1005
Episode: 1006
Episode: 1007
Episode: 1008
Episode: 1009
Episode: 1010
Episode: 1011
Episode: 1012
Episode: 1013
Episode: 1014
Episode: 1015
Episode: 1016
Episode: 1017
Episode: 1018
Episode: 1019
Episode: 1020
Episode: 1021
Episode: 1022
Episode: 1023
Episode: 1024
Episode: 1025
Episode: 1026
Episode: 1027
Episode: 1028
Episode: 1029
Episode: 1030
Episode: 1031
Episode: 1032
Episode: 1033
Episode: 1034
Episode: 1035
Ep

Episode: 1880
Episode: 1881
Episode: 1882
Episode: 1883
Episode: 1884
Episode: 1885
Episode: 1886
Episode: 1887
Episode: 1888
Episode: 1889
Episode: 1890
Episode: 1891
Episode: 1892
Episode: 1893
Episode: 1894
Episode: 1895
Episode: 1896
Episode: 1897
Episode: 1898
Episode: 1899
Episode: 1900
Episode: 1901
Episode: 1902
Episode: 1903
Episode: 1904
Episode: 1905
Episode: 1906
Episode: 1907
Episode: 1908
Episode: 1909
Episode: 1910
Episode: 1911
Episode: 1912
Episode: 1913
Episode: 1914
Episode: 1915
Episode: 1916
Episode: 1917
Episode: 1918
Episode: 1919
Episode: 1920
Episode: 1921
Episode: 1922
Episode: 1923
Episode: 1924
Episode: 1925
Episode: 1926
Episode: 1927
Episode: 1928
Episode: 1929
Episode: 1930
Episode: 1931
Episode: 1932
Episode: 1933
Episode: 1934
Episode: 1935
Episode: 1936
Episode: 1937
Episode: 1938
Episode: 1939
Episode: 1940
Episode: 1941
Episode: 1942
Episode: 1943
Episode: 1944
Episode: 1945
Episode: 1946
Episode: 1947
Episode: 1948
Episode: 1949
Episode: 1950
Episod

Episode: 2750
Episode: 2751
Episode: 2752
Episode: 2753
Episode: 2754
Episode: 2755
Episode: 2756
Episode: 2757
Episode: 2758
Episode: 2759
Episode: 2760
Episode: 2761
Episode: 2762
Episode: 2763
Episode: 2764
Episode: 2765
Episode: 2766
Episode: 2767
Episode: 2768
Episode: 2769
Episode: 2770
Episode: 2771
Episode: 2772
Episode: 2773
Episode: 2774
Episode: 2775
Episode: 2776
Episode: 2777
Episode: 2778
Episode: 2779
Episode: 2780
Episode: 2781
Episode: 2782
Episode: 2783
Episode: 2784
Episode: 2785
Episode: 2786
Episode: 2787
Episode: 2788
Episode: 2789
Episode: 2790
Episode: 2791
Episode: 2792
Episode: 2793
Episode: 2794
Episode: 2795
Episode: 2796
Episode: 2797
Episode: 2798
Episode: 2799
Episode: 2800
Episode: 2801
Episode: 2802
Episode: 2803
Episode: 2804
Episode: 2805
Episode: 2806
Episode: 2807
Episode: 2808
Episode: 2809
Episode: 2810
Episode: 2811
Episode: 2812
Episode: 2813
Episode: 2814
Episode: 2815
Episode: 2816
Episode: 2817
Episode: 2818
Episode: 2819
Episode: 2820
Episod