Задача 1

Построить поле из 0 размерностью 10х10. Тигр будет обозначаться 1 и стартовать из левого верхнего угла. В координатах (3,5)(4,5)(5,5)(6,5) находятся враги тигра - их необходимо обойти (можно обозначить врагов цифрой 3). В координатах (7,9)(8,9)(9,9) находятся зайцы (обозначаются цифрой 2). Тигру необходимо дойти до зайцев, съесть 1 и вернуться назад, в начальную точку. Тигра, зайцев и врагов выполнить в виде классов, аналогично заданию 1 с семинара 1.

In [42]:
import numpy as np
import pandas as pd

In [43]:
np.random.seed(0)

In [45]:
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def dist(self, other):
        # return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
        return abs(self.x - other.x) + abs(self.y - other.y)
    
    def __eq__(self, other):
        return self.x == other.x and self.y == other.y
    
    def __lt__(self, other):
        return self.x < other.x and self.y < other.y
    
    def __repr__(self):
        return '(' + str(self.x) + ', ' + str(self.y) + ')'

In [46]:
class Environment:
    """
    0 - "Пустая клетка", 
    1 - "Тигр", 
    2 - "Добыча",
    3 - "Враг".
    """
    def __init__(self, size_x=10, size_y=10, n_enemy=4, n_pray=3, random=True):
        self.size_x = size_x
        self.size_y = size_y
        self.n_enemy = n_enemy
        self.n_pray = n_pray
        self.place = {'Tiger': Point(0, 0), 'Enemy': [], 'Pray': []}
        if random:
            self.create_field_rnd()
        else:
            self.create_field()
    
    
    def create_field_rnd(self):
        n = self.size_y
        m = self.size_x
        field = np.zeros((n, m)).astype(int)
        field[0][0] = 1
        for pray in range(self.n_pray):
            i = np.random.randint(0, n)
            j = np.random.randint(0, m)
            while field[i][j] != 0:
                i = np.random.randint(0, n)
                j = np.random.randint(0, m)
            field[i][j] = 2
            self.place['Pray'].append(Point(j, i))
            
        for enemy in range(self.n_enemy):
            i = np.random.randint(0, n)
            j = np.random.randint(0, m)
            while field[i][j] != 0:
                i = np.random.randint(0, n)
                j = np.random.randint(0, m)
            field[i][j] = 3
            self.place['Enemy'].append(Point(j, i))
            
        self.field = field
        
    
    def create_field(self):
        n = self.size_y
        m = self.size_x
        field = np.zeros((n, m)).astype(int)
        field[0][0] = 1
        
        field[5][3] = 3
        field[5][4] = 3
        field[5][5] = 3
        field[5][6] = 3
        
        field[9][7] = 2
        field[9][8] = 2
        field[9][9] = 2
        
        self.place['Enemy'] = [Point(x, y) for x, y in [(3,5), (4,5), (5,5), (6,5)]]
        self.place['Pray']  = [Point(x, y) for x, y in [(7,9), (8,9), (9,9)]]
        self.field = field
        
        
    def cell_color(self, val):
        color = 'white'
        if val == 1:
            color = 'blue'
        if val == 2:
            color = 'green'
        if val == 3:
            color = 'red'
        return 'color: %s' % color

        
    def __repr__(self):
        return str(pd.DataFrame(self.field))
    
    
    def print(self):
        return pd.DataFrame(self.field).style.applymap(self.cell_color)

In [47]:
class Rabbit:
    def __init__(self):
        self.escape = np.random.rand

In [48]:
class Tiger:
    """
    0 - "Поиск добычи", 
    1 - "Выследить добычу", 
    2 - "Атака добычи",
    3 - "Убежать от врага"
    4 - "Бежать домой"
    """
    def __init__(self, environment):
        self.env = environment
        self.pray = False
        self.home = False
        self.moment = np.random.rand
        self.attack = np.random.rand
        self.hist = [Point(0, 0)]
             
    def pray_loc(self, loc):
        pray_list = self.env.place['Pray']
        pray = (pray_list[0], loc.dist(pray_list[0]))
        for p in pray_list:
            if loc.dist(p) < pray[1]:
                pray = (p, loc.dist(p))
        return pray[0]
     
    def home_loc(self):
        return Point(0, 0)
    
    def go_attack(self, pray_chance):
        moment = self.moment()
        attack = self.attack()
        print({'Moment': moment, 'Attack': attack, 'Pray': pray_chance})
        if moment > 0.5:
            if pray_chance < attack:
                self.pray = True
                self.hist = []
                
    def check_location(self, loc):
        border1 = Point(self.env.size_x, self.env.size_y)
        border2 = Point(-1, -1)
        #enemy_dist = [loc.dist(point) for point in self.env.place['Enemy']] 
        
        if not border2 < loc < border1:
            return False
        if self.env.field[loc.y][loc.x] == 3:
            return False
        if self.hist.count(loc) > 1:
            return False
        return True
    
    def go(self, t, p):
        locations = []
        
        loc = Point(t.x + 1, t.y)
        if self.check_location(loc):
            locations.append((loc.dist(p), loc))
            
        loc = Point(t.x - 1, t.y)
        if self.check_location(loc):
            locations.append((loc.dist(p), loc))
            
        loc = Point(t.x, t.y + 1)
        if self.check_location(loc):
            locations.append((loc.dist(p), loc))
            
        loc = Point(t.x, t.y - 1)
        if self.check_location(loc):
            locations.append((loc.dist(p), loc))
            
        if len(locations) == 0:
            print('Ops...')
            return t
        
        return sorted(locations)[0][1]
        
    def step(self, pray_chance=-1):
        if self.home and self.pray:
            return 'Win!'
        
        if not self.pray and len(self.env.place['Pray']) == 0:
            return 'Lose!'
        
        cur_location = self.env.place['Tiger']
        pur_location = -1
        home_location = self.home_loc()
        pray_location = self.pray_loc(cur_location)
        
        
        if not self.pray:
            if pray_location == cur_location:
                self.go_attack(pray_chance)
                if not self.pray:
                    self.env.place['Pray'].remove(pray_location)
                    self.env.field[pray_location.y][pray_location.x] = 1
                return 'Attack'
            pur_location = pray_location
            
                
        if not self.home and self.pray:
            if cur_location == home_location:
                self.home = True
                return 'Home'
            pur_location = home_location
            
        nex_location = self.go(cur_location, pur_location)
        self.env.place['Tiger'] = nex_location
        self.env.field[cur_location.y][cur_location.x] = 0
        self.env.field[nex_location.y][nex_location.x] = 1
        self.hist.append(nex_location)
        return self.env.print()

In [91]:
env = Environment(n_enemy=20, random=True)
rabbit = Rabbit()
tiger = Tiger(env)
env.print()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,3,0,0,0,0,3,0
1,0,0,0,2,0,0,3,0,0,3
2,0,0,0,3,0,2,0,3,0,0
3,0,0,0,0,0,0,0,0,0,2
4,0,3,0,0,0,0,0,0,0,0
5,3,0,0,0,0,0,0,3,0,0
6,0,3,0,0,0,0,0,3,0,0
7,3,0,0,0,3,0,0,0,0,0
8,0,0,0,3,3,0,3,0,3,3
9,0,0,3,0,0,0,3,0,0,0


In [100]:
tiger.step(rabbit.escape())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,3,0,0,0,0,3,0
1,0,0,0,0,0,0,3,0,0,3
2,0,0,0,3,0,2,0,3,0,0
3,0,0,0,0,0,0,0,0,0,2
4,0,3,0,0,0,0,0,0,0,0
5,3,0,0,0,0,0,0,3,0,0
6,0,3,0,0,0,0,0,3,0,0
7,3,0,0,0,3,0,0,0,0,0
8,0,0,0,3,3,0,3,0,3,3
9,0,0,3,0,0,0,3,0,0,0


____
Задача 2  
Выполнять действия тигром на основе базового уравнения Q-learning. https://habr.com/ru/post/443240/ 

In [101]:
import numpy as np
import pandas as pd

In [102]:
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def dist(self, other):
        # return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
        return abs(self.x - other.x) + abs(self.y - other.y)
    
    def __eq__(self, other):
        return self.x == other.x and self.y == other.y
    
    def __lt__(self, other):
        return self.x < other.x and self.y < other.y
    
    def __repr__(self):
        return '(' + str(self.x) + ', ' + str(self.y) + ')'

In [103]:
class Environment:
    """
    actions: right, left, down, up, attack
    """
    def __init__(self, size_x=10, size_y=10, n_enemy=4, n_pray=3, random=False):
        self.actions = np.array([0, 1, 2, 3, 4])
        self.state = Point(0, 0)
        self.place = {'Enemy': [], 'Pray': []}
        self.field = np.zeros((10, 10)).astype(int)
        if random:
            self.create_rnd(size_y, size_x, n_enemy, n_pray)
        else:
            self.create()
        
        
    def create(self):
        self.n = 10
        self.m = 10
        self.field[0][0] = 1
        self.field[5][3] = 3
        self.field[5][4] = 3
        self.field[5][5] = 3
        self.field[5][6] = 3
        self.field[9][7] = 2
        self.field[9][8] = 2
        self.field[9][9] = 2
        self.place['Enemy'] = [Point(x, y) for x, y in [(3,5), (4,5), (5,5), (6,5)]]
        self.place['Pray']  = [Point(x, y) for x, y in [(7,9), (8,9), (9,9)]]
  

    def create_rnd(self, n, m, n_enemy, n_pray):
        self.n = n
        self.m = m
        field = np.zeros((n, m)).astype(int)
        field[0][0] = 1
        
        for pray in range(n_pray):
            i = np.random.randint(0, n)
            j = np.random.randint(0, m)
            while field[i][j] != 0:
                i = np.random.randint(0, n)
                j = np.random.randint(0, m)
            field[i][j] = 2
            self.place['Pray'].append(Point(j, i))
            
        for enemy in range(n_enemy):
            i = np.random.randint(0, n)
            j = np.random.randint(0, m)
            while field[i][j] != 0:
                i = np.random.randint(0, n)
                j = np.random.randint(0, m)
            field[i][j] = 3
            self.place['Enemy'].append(Point(j, i))
            
        self.field = field
        
        
    def reset(self):
        self.state = Point(0, 0)
        field = np.zeros((self.n, self.m)).astype(int)
        field[0][0] = 1
        for pray in self.place['Pray']:
            field[pray.y, pray.x] = 2
        for enemy in self.place['Enemy']:
            field[enemy.y, enemy.x] = 3
        self.field = field
        return 0

        
    def check(self, location):
        border1 = Point(10, 10)
        border2 = Point(-1, -1)
        return border2 < location < border1 and self.field[location.y][location.x] != 3
    
    
    def pray(self, location):
        pray_list = self.place['Pray']
        pray = [(location.dist(pray), pray) for pray in pray_list]
        return min(pray)[1]
    
    
    def num_state(self, state):
        return state.y * 10 + state.x
    
    
    def step(self, action, purpose):
        p = Point(0, 0)
        if purpose == 0:
            p = self.pray(self.state)
        
        if action == 4:
            if purpose == 0 and p == self.state:
                return self.num_state(self.state), 100, True
            else:
                return self.num_state(self.state), -100, False
            
        if action == 0:
            state = Point(self.state.x + 1, self.state.y)
        if action == 1:
            state = Point(self.state.x - 1, self.state.y)
        if action == 2:
            state = Point(self.state.x, self.state.y + 1)
        if action == 3:
            state = Point(self.state.x, self.state.y - 1)
            
        if not self.check(state):
            return self.num_state(self.state), -100, False
        
        self.field[self.state.y, self.state.x] = 0
        self.field[state.y, state.x] = 1
        self.state = state
        
        if state == p:
            if purpose == 0:
                return self.num_state(state), 100, False
            if purpose == 1:
                return self.num_state(state), 100, True
        
        return self.num_state(state), -p.dist(state), False
    
    
    def sample(self):
        idx = np.random.randint(0, 5)
        return self.actions[idx]
    
    
    def cell_color(self, val):
        color = 'white'
        if val == 1:
            color = 'blue'
        if val == 2:
            color = 'green'
        if val == 3:
            color = 'red'
        return 'color: %s' % color
    
    
    def print(self):
        return pd.DataFrame(self.field).style.applymap(self.cell_color)
    
    
    def __repr__(self):
        return str(pd.DataFrame(self.field))

In [172]:
size_x = 10
size_y = 10
env = Environment(size_x=size_x, size_y=size_y, n_enemy=30, random=True)
q_table = np.zeros((2, size_x * size_y, 5))
display(env.print())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,0,3,0,0,3,0,3
1,0,3,0,0,0,0,3,3,0,3
2,0,3,3,0,0,0,0,0,0,3
3,0,0,0,0,3,0,0,3,0,0
4,3,3,3,0,0,0,0,0,3,3
5,2,3,0,0,3,3,0,0,0,0
6,3,3,0,0,0,0,0,0,0,0
7,0,3,3,0,0,2,0,3,0,0
8,0,0,0,0,0,3,3,0,0,0
9,0,0,3,2,0,0,3,0,0,3


In [173]:
alpha = 0.1
gamma = 0.6
epsilon = 0.2

for i in range(0, 100):
    clear_output()
    print('Epoch %d/%d: |' % (i, 99), end='')
    for _ in range(i):
        print('-', end='')
    for _ in range(100 - i -1):
        print(' ', end='')
    print('|')
        
    state = env.reset()
    for purpose, table in zip([0, 1], q_table):
        done = False
        while not done:
            action = env.sample() if np.random.uniform(0, 1) < epsilon else np.argmax(table[state])
            next_state, reward, done = env.step(action, purpose) 
            old_value = table[state, action]
            next_max = np.max(table[next_state])
            new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
            table[state, action] = new_value
            state = next_state
    

Epoch 99/99: |---------------------------------------------------------------------------------------------------|


In [175]:
from IPython.display import display
from IPython.display import clear_output
from time import sleep


state = env.reset()
display(env.print())
sleep(1)

for purpose, table in zip([0, 1], q_table):
    done = False
    while not done:
        clear_output()
        action = np.argmax(table[state])
        state, reward, done = env.step(action, purpose)
        display(env.print())
        sleep(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,0,3,0,0,3,0,3
1,0,3,0,0,0,0,3,3,0,3
2,0,3,3,0,0,0,0,0,0,3
3,0,0,0,0,3,0,0,3,0,0
4,3,3,3,0,0,0,0,0,3,3
5,2,3,0,0,3,3,0,0,0,0
6,3,3,0,0,0,0,0,0,0,0
7,0,3,3,0,0,0,0,3,0,0
8,0,0,0,0,0,3,3,0,0,0
9,0,0,3,2,0,0,3,0,0,3
