# imports

In [1]:
!pip install gymnasium
!pip install gymnasium[classic-control]

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [19]:
import numpy as np
import random
import gymnasium as gym
from collections import deque
from matplotlib import pyplot as plt

# classes for nn

## actiovations and derivatives

In [20]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
def ReLU(x):
    return max(0,x.all())

def d_Sigmoid(x):
    return x * (1 - x)
    
def d_ReLU(x):
    if (x) < 0:
        return 0
    else:
        return 1

activations_dict = {
'Sigmoid': [Sigmoid, d_Sigmoid],
'ReLU': [ReLU, d_ReLU],  
}

## layer

In [53]:
class layer:
    def __init__(s, lr = 0.1, prev_size = 2, my_size=2, activation_type = "Sigmoid"):
        s.lr = lr
        s.size = my_size
        s.prev_size = prev_size
        s.weights = np.random.random((prev_size, s.size))
        s.funcs = activations_dict.get(activation_type)
        s.activation_f = s.funcs[0]
        s.d_activation_f = s.funcs[1]


    def activate(s, x):
        return s.activation_f(x)
        
    def d_activate(s, x):
        return s.d_activation_f(x)  

    def forward(s, inputs):
        s.input = np.asarray(inputs)
        s.neurons = np.dot(s.input, s.weights)
        s.neurons_activated = s.activate(s.neurons)
        
        return s.neurons_activated

    def backprop(s, layer_error): # for output here layer_error = target_val - s.forward
        s.delta = layer_error * s.d_activate(s.neurons_activated)
        
        s.prev_layer_error = np.dot(s.delta, s.weights.T)
        return s.prev_layer_error
        
    def update_weights(s):
        s.input_t = s.input.T
        #print("input_t: ", input_t.shape)
        #print("delta: ", s.delta.shape)
        
        s.v = s.input_t.reshape((s.input_t.shape[0], 1))
        #print("v: ", v.shape)

        s.d = s.delta.reshape((1, s.delta.shape[0]))
        s.weights += s.lr * np.dot(s.v, s.d)
        
    def print_info(s):
        print("my size: ", s.size)
        print("w: ", s.weights, "\n")
        
    def print_pic(s):
        print_size = min(2, s.size)
        print("\n╻...\nv...")
        for i in range(print_size):
            #if (i == print_size/2 and print_size != s.size):
                #print ("--", s.size, "--\t", end='')
            print('O\t', end='')
        print ("--", format(s.size, ' 5d') , "--\t", end='')

    def add_neuron(s):     
        add_w = np.zeros(shape=(s.prev_size, 1), dtype=float) + 0.1
        s.weights = np.concatenate((s.weights, add_w.T))
        s.size+=1


class input_layer:
    def __init__(s, size):
        s.size = size
        s.values = np.zeros(shape=(size), dtype = float)

    def print_info(s):
        print("IN LAYER\nsize: ", s.size)
        print(s.values)

    def print_pic(s):
        print_size = min(2, s.size)

        for i in range(print_size):
            #if (i == print_size/2 and print_size != s.size):
                #print ("--", s.size, "--\t", end='')
                
            print("| |\t", end='')
        print("")
        for i in range(print_size):
            #if (i == print_size/2 and print_size != s.size):
                #print ("--", s.size, "--\t", end='')
            print(" v \t", end='')
        print("")
        for i in range(print_size):
            #if (i == print_size/2 and print_size != s.size):
                #print ("--", s.size, "--\t", end='')
            print(' @\t', end='')
        print ("--", format(s.size, ' 5d') , "--\t", end='')

    def forward(s, x):
        return x
        
        


## NN

In [22]:

class plastic_nn:
    def __init__(s):
        s.layers = []
        s.n_of_layers = 0
        
        pass

    def forward(s, data):
        for lay in layers:
            data = lay.forward(data)
        s.last_result = data
        return s.last_result

    def backprop(s, correct):
        err = correct-s.last_result

        for lay in reversed(layers[1:]):
            err = lay.backprop(err)

    def update_w(s):
        for lay in reversed(layers[1:]):
            lay.update_weights()

    def learn_one(s, in_data, target_data):
        pnn.forward(in_data)

        pnn.backprop(target_data)
        pnn.update_w()   
        
    def append_one(s, new_layer, check = False):
        if check and s.n_of_layers!=0:
            last_layer_size = s.layers[-1].size
            if last_layer_size != new_layer.prev_size:
                print("size not match, layer ", s.n_of_layers)
                return
        s.layers.append(new_layer)
        s.n_of_layers+=1
        pass

    def check_layers_sizes(s, check_layers):
        #print("CHECK")
        for i in range(1, len(check_layers)):
            #print(i)
            #print("check_layers[i-1].size ",check_layers[i-1].size)
            #print("check_layers[i].prev_size ",check_layers[i].prev_size)
            if (check_layers[i-1].size != check_layers[i].prev_size):
                print("error between ", i-1, "and ", i)
                return False
        return True
        
    def append(s, new_layers):
        test_layers = np.array([])
        if s.n_of_layers != 0:
            test_layers = s.layers[-1]
            
        test_layers = np.append(test_layers, new_layers)
                
        if (s.check_layers_sizes(test_layers)):
            for lay in new_layers:
                s.append_one(lay)

            print("added succesfully")
            return True
        else:
            print("ERROR adding layers, check info above")
            return False

    def print_info(s):
        print("Num of layers: ", s.n_of_layers)
        for cnt in range(s.n_of_layers):
            print("#", cnt)
            s.layers[cnt].print_info()
            print("")
    
    def print_pic(s):
        print("Num of layers: ", s.n_of_layers)
        cnt = 0
        for lay in s.layers:
            lay.print_pic()
            print("#", cnt, end='')
            cnt+=1
        print("\nOUT |#|\nOUT  v")




# memory

In [4]:
def cart_pole_action_to_human(val):
    if (val):
        return "right";
    else:
        return "left";

def cart_pole_state_to_human(val):
    return "Cart Position ", val[0], "Cart Velocity ", val[1], "Pole Angle ", val[2], "Pole Angular Velocity ", val[3]



class replay_memory:
    def __init__(s, maxlen):
        s.memory = deque([], maxlen=maxlen)

    def append(s, val):
        s.memory.append(val)
        
    def clear(s):
        s.memory.clear()

    def get_sample(s, sample_size):
        return random.sample(s.memry, sample_size)

    def __len__(s):
        return len(s.memory)

    #cur_state, action, next_state, reward, terminated
    def print(s):
        for m in s.memory:
            print("state: ", cart_pole_state_to_human(m[0]))
            print("action: ", cart_pole_action_to_human(m[1]))
            print("next state: ", cart_pole_state_to_human(m[2]))
            print("reward: ", m[3])
            print("termninated?: ", m[4])
            print("\n")


# env run

In [61]:
class test_env():
    def __init__(s, env_name, render_mode = ''): 
        s.brain = plastic_nn()
        
        s.render_mode = render_mode
        s.env = gym.make(env_name, render_mode="human")
    
        s.memory = replay_memory(10)
        s.explore_prob = 0.5 # epsilon
        
    def set_brain_layers(s, layers):
        s.brain.append(layers)
        
    def close_env(s):
        s.env.close()
        
    def choose_action(s, state):
        if (random.random() < s.explore_prob): 
            action = s.env.action_space.sample() # explore
        else:
            action = s.brain.forward(state)
            # this is where you would insert your policy
            #action = 1 # right
        return action

    def save_memory(s, prev_state, action, state, reward, terminated):
        s.memory.append((prev_state, action, state, reward, terminated)) 

    def run_one_game(s):
        prev_state, info = s.env.reset() # seed=42
        s.memory.clear()
        terminated = False
        truncated = False
        while (not terminated and not truncated):
            action = s.choose_action()
            
            state, reward, terminated, truncated, info = s.env.step(action) 
            s.save_memory(prev_state, action, state, reward, terminated)
            prev_state = state


    def learn(s):
        epochs = 20
        for e in range(epochs):
            s.run_one_game()
            







            
            

In [62]:
np.random.seed(5)
num_of_inputs = 4
hidden1 = 8
hidden2 = 8
#hidden3 = 5
out_n = 2

lr = 0.01


layers = [input_layer(num_of_inputs), 
          layer(lr = lr, prev_size = num_of_inputs, my_size=hidden1), 
          layer(lr = lr, prev_size = hidden1, my_size=hidden2), 
          layer(lr = lr, prev_size = hidden2, my_size=out_n)]


model = test_env('CartPole-v1')

model.set_brain_layers(layers)

model.learn()
model.close_env()


added succesfully


TypeError: choose_action() missing 1 required positional argument: 'state'