<a href="https://colab.research.google.com/github/cardstdani/practica-par/blob/main/PracticaParadigmas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Práctica Paradigmas 1**

In [None]:
!pip install pyvis==0.3.1

In [None]:
import random
import math
import copy
import pandas as pd
import numpy as np
from IPython.display import display, HTML
import matplotlib.pyplot as plt

class TrieNode:
    def __init__(self, inputChar):
        self.char = inputChar
        self.end = False
        self.children = {}
 
class Trie():
    def __init__(self, startingElements=None):
        self.root = TrieNode("")
        if startingElements!=None:
          for i in startingElements: self.insert(i)
    def insert(self, word):
        node = self.root
        for char in word:
            if char in node.children:
                node = node.children[char]
            else:
                new_node = TrieNode(char)
                node.children[char] = new_node
                node = new_node
        node.end = True      
    def searchAndSplit(self, x):
        node = self.root
        output = ["", ""]
        for char in x:
            if char in node.children:
                node = node.children[char]            
            else:
                return []        
            output[1 if node.end else 0] += node.char
        return output if node.end else []
    def toGraph(self):
      from pyvis.network import Network
      g = Network(directed =True)      
      g.show_buttons()

      nodeIndex = 1
      currentNode = 0
      q = [self.root]      
      g.add_node(currentNode, label="", color="red")
      tempLabels = {0:""}
      while q!=[]:
        n = q.pop(0)                     
        for i in n.children.values():
          tempLabels[nodeIndex] = tempLabels[currentNode]+i.char
          g.add_node(nodeIndex, label=tempLabels[currentNode]+i.char, color="#48e073" if i.end else "blue")
          g.add_edge(currentNode, nodeIndex)
          nodeIndex+=1
          q.append(i)
        currentNode+=1
      g.show('nx.html')

class MainGame:
  def __init__(self):
    #random.seed(368) #311
    self.objects = {".":[".",0],"a":["b",1],"b":["c",5],"c":["d",25],"d":["e",125],"e":["e",625],"1":["1",-25],"2":["3",-5],"3":["4",50],"4":["4",500],"x":["x",-50]}
    self.checkAndLoadFiles()    
    self.turn = 0
    self.score = []
    self.storage = "."
    self.bigFoots = [[(i,j), 0, False] for i in range(len(self.matrix)) for j in range(len(self.matrix[0])) if self.matrix[i][j]=="1"]
    self.updateActual()
    self.tr = Trie((''.join(chr(97+int(j)) for j in str(i))+str(k) for k in range(len(self.matrix[0])) for i in range(len(self.matrix))))
    self.tr.insert("exit")
    self.tr.insert("hint")
    self.tr.insert("*")
    self.tr.toGraph()

  def checkAndLoadFiles(self):
    try:
      with open("tablero.txt", "r") as f:
        self.matrix=[]        
        for i in f.readlines():
          self.matrix.append([])
          for j in i.replace("\n", ""):
            if not j in self.objects: raise
            self.matrix[-1].append(j)
    except:
      self.matrix = [random.sample(["."]*45+["a"]*18+["b"]*4+["c"]*3+["1"]*2, 6) for i in range(6)]
      print("Error al cargar el fichero tablero, usando tablero aleatorio...")      
    try:
      with open("secuencia.txt", "r") as f:
        self.seq=""
        for i in f.readline():
          if i not in self.objects and i != "w": raise
          self.seq += i
    except:
      self.seq = ""
      print("Error al cargar el fichero secuencia, usando secuencia aleatoria...")      

  def main(self):
    print("Que empiece el juego:\U0001F609")
    self.showGame()    
    while any("." in x for x in self.matrix):
      message = self.validarEntrada(input("Mover a casilla: ").lower().replace(" ", ""))
      while not message[0]: message = self.validarEntrada(input("Jugada errónea\nMover a casilla: ").lower().replace(" ", ""))

      if "".join(message[1])=="exit": break
      if "".join(message[1])=="*": self.storage=self.actual; self.updateActual(); self.showGame(); continue;
      coordinates = (lambda l: (int("".join([str(ord(i)%97) for i in l[0]])), int(l[1])))(message[1]) if "".join(message[1])!="hint" else self.getHint()
      if not ((self.matrix[coordinates[0]][coordinates[1]]==".") ^ (self.actual=="w")): print("Jugada errónea"); continue;

      self.updateMatrix(coordinates)
      self.updateActual()
      self.turn+=1
      self.bigFoots = [[i[0], i[1]+1, i[2]] for i in self.bigFoots]
      
      self.showGame()
    print("Partida terminada, GG:\U0001F44F")
    self.plotScore()

  def getHint(self):
    prevObjs = sum(1 - self.objects[b][1] for a in self.matrix for b in a if b!=".")
    tempValues=[-math.inf, (0,0)]
    #debugValues = [[0]*len(self.matrix[0]) for i in range(len(self.matrix))]
    for i in range(len(self.matrix)):
      for j in range(len(self.matrix[0])):
        if (self.matrix[i][j] == ".") ^ (self.actual=="w"): 
          newObj = copy.deepcopy(g)
          newObj.updateMatrix((i,j))
          newScore = sum(sum(self.objects[b][1] - (b != "." or 0) for b in a) for a in newObj.matrix) + prevObjs - 12*newObj.minDistanceToElement((i,j), ["1", "2", newObj.matrix[i][j]])
          if newScore>tempValues[0]: tempValues = [newScore, (i,j)]
          #debugValues[i][j] = newScore
    #plt.imshow(debugValues, cmap='hot', interpolation='nearest')
    #plt.show()
    return tempValues[1]
  
  def plotScore(self):
    x = np.array(list(range(len(self.score))))
    m,c = self.linReg(x, self.score)
    fig = plt.figure(figsize=(12, 12))
    plt.plot(x, self.score)
    plt.plot(x, m*x + c)
    plt.show()
  
  def linReg(self, x, y):
    return np.linalg.lstsq(np.vstack([x, np.ones(len(x))]).T, y)[0]

  def minDistanceToElement(self, coordinates, elements):
    visited = set()
    q = [(coordinates, 0)]
    while q:
        n, distance = q.pop(0)
        if n in visited:
            continue
        visited.add(n)
        for i, j in ((n[0]-1, n[1]), (n[0]+1, n[1]), (n[0], n[1]-1), (n[0], n[1]+1)):
            if i < 0 or i >= len(self.matrix) or j < 0 or j >= len(self.matrix[0]): continue
            if self.matrix[i][j] in elements: return distance+1
            q.append(((i,j), distance+1))
    return 0   
  
  def updateActual(self):
    self.actual = random.choice(["a"]*30+["b"]*5+["c"]*1+["1"]*6+["w"]*1) if self.seq=="" else self.seq[self.turn%len(self.seq)]
  
  def updateMatrix(self, coordinates):
    if self.actual=="w": self.matrix[coordinates[0]][coordinates[1]] = "."; self.deleteBigFoot(coordinates); return;
    self.matrix[coordinates[0]][coordinates[1]] = self.actual
    if self.actual=="1": self.bigFoots.append([coordinates, 0, False]); self.updateBigFoots(); return;

    self.checkAndColapse(coordinates)
    self.updateBigFoots()

  def checkAndColapse(self, coordinates):
    g = (self.getGroup(coordinates), self.matrix[coordinates[0]][coordinates[1]])
    if g[1]=="2": coordinates = max(g[0], key=lambda x:[k for k in self.bigFoots if k[0]==x][0][1])
    while len(g[0])>2:
      for i in g[0]: 
        if self.matrix[i[0]][i[1]] == "2": self.deleteBigFoot(i);
        self.matrix[i[0]][i[1]] = "."
      self.matrix[coordinates[0]][coordinates[1]] = self.objects[g[1]][0]
      g = (self.getGroup(coordinates), self.matrix[coordinates[0]][coordinates[1]])
  
  def deleteBigFoot(self, coordinates):
    for i in range(len(self.bigFoots)):
      if self.bigFoots[i][0] == coordinates: del self.bigFoots[i]; break;

  def updateBigFoots(self):
    for i in enumerate(self.bigFoots):
      n = i[1][:]
      if not n[2] and n[1]>0:
        for j in (lambda l:[k for k in l if k[0]>=0 and k[1]>=0])(((n[0][0]-1, n[0][1]), (n[0][0], n[0][1]+1), (n[0][0]+1, n[0][1]), (n[0][0], n[0][1]-1))):
          try:
            if self.matrix[j[0]][j[1]]==".": self.matrix[j[0]][j[1]]="1"; self.matrix[n[0][0]][n[0][1]]="x" if n[1]>10 else "."; self.bigFoots[i[0]][0]=j[:]; break;
          except: pass

      if n[0]==self.bigFoots[i[0]][0]: 
        g = self.getGroup(n[0], True)
        if not "." in (self.matrix[k[0]][k[1]] for k in g):
          for j in g: self.matrix[j[0]][j[1]] = "2"; bfIndex = [k for k in range(len(self.bigFoots)) if self.bigFoots[k][0]==j][0]; self.bigFoots[bfIndex][2]=True;  
          self.checkAndColapse(n[0])

  def getGroup(self, coordinates, bigFootMode=False):    
    visited = set()
    output = [coordinates]
    q = [coordinates]
    while q!=[]:
      n = q.pop(0)
      visited.add(n)
      for i in ((n[0]-1, n[1]), (n[0], n[1]+1), (n[0]+1, n[1]), (n[0], n[1]-1)):
        if i[0] < 0 or i[0] >= len(self.matrix) or i[1] < 0 or i[1] >= len(self.matrix[0]): continue
        if (not i in visited) and (self.matrix[i[0]][i[1]]==self.matrix[coordinates[0]][coordinates[1]] or (bigFootMode and self.matrix[i[0]][i[1]]==".")): q.append(i); output.append(i);
    return output

  def validarEntrada(self, s):
    s = self.tr.searchAndSplit(s)
    return (True if s else False, s)

  def showGame(self):
    df = pd.DataFrame(self.matrix, columns=list(range(len(self.matrix[0]))), index=[''.join(chr(65+int(j)) for j in str(i)) for i in range(len(self.matrix))])
    display(HTML(df.to_html()))
    self.score.append(sum(self.objects[j][1] for i in self.matrix for j in i))
    print(f"\nTurno: {self.turn} Puntos:{self.score[-1]}\nAlmacen: [{self.storage}] Actual: [{self.actual}]")

if __name__=="__main__":
  g = MainGame()
  g.main()  

**Bot Encoding**

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import random
import numpy as np
from collections import deque

BATCH_SIZE = 500

class QNet(nn.Module):
    def __init__(self, input_size=37, hidden_size=62, output_size=36):
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.softmax(self.linear2(x))
        return x

    def save(self, file_name='model.pth'):
        model_folder_path = './model'
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)
        file_name = os.path.join(model_folder_path, file_name)
        torch.save(self.state_dict(), file_name)


class QTrainer:
    def __init__(self, model, lr, gamma):
        self.lr = lr
        self.gamma = gamma
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

    def train_step(self, state, action, reward, next_state, done):
        state = torch.tensor(state, dtype=torch.float)
        next_state = torch.tensor(next_state, dtype=torch.float)
        action = torch.tensor(action, dtype=torch.long)
        reward = torch.tensor(reward, dtype=torch.float)

        if len(state.shape) == 1:
            state = torch.unsqueeze(state, 0)
            next_state = torch.unsqueeze(next_state, 0)
            action = torch.unsqueeze(action, 0)
            reward = torch.unsqueeze(reward, 0)
            done = (done, )

        # 1: predicted Q values with current state
        pred = self.model(state)
        target = pred.clone()
        for idx in range(len(done)):
            Q_new = reward[idx]
            if not done[idx]:
                Q_new = reward[idx] + self.gamma * torch.max(self.model(next_state[idx]))
            target[idx][torch.argmax(action[idx]).item()] = Q_new
    
        self.optimizer.zero_grad()
        loss = self.criterion(target, pred)
        loss.backward()

        self.optimizer.step()

class Agent:
    def __init__(self):
        self.n_games = 0
        self.epsilon = 0 # randomness
        self.gamma = 0.9 # discount rate
        self.memory = deque(maxlen=100000) # popleft()
        self.model = QNet()
        self.trainer = QTrainer(self.model, lr=0.001, gamma=self.gamma)

    def get_state(self, game):        
        return np.array([ord(i) for i in np.array(game.matrix).reshape(36).tolist()]+[ord(game.actual)])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached

    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE) # list of tuples
        else:
            mini_sample = self.memory
        states, actions, rewards, next_states, dones = zip(*mini_sample)
        self.trainer.train_step(states, actions, rewards, next_states, dones)

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done)

    def get_action(self, state):
        # random moves: tradeoff exploration / exploitation
        self.epsilon = 80 - self.n_games
        final_move = [0,0]
        if random.randint(0, 200) < self.epsilon:
            final_move = [random.randint(0, 5), random.randint(0, 5)]
        else:
            prediction = self.model(torch.tensor(state, dtype=torch.float))
            final_move = torch.argmax(prediction).item()
            final_move = [final_move//6 if final_move!=36 else 5, final_move%6]
        return final_move

def train():
    total_score = 0
    record = 0
    agent = Agent()
    game = MainGame()
    while True:
        # get old state
        state_old = agent.get_state(game)
        # get move
        coordinates = tuple(agent.get_action(state_old))
        # perform move and get new state
        reward = sum(game.objects[j][1] for i in game.matrix for j in i)
        if not (not ((game.matrix[coordinates[0]][coordinates[1]]==".") ^ (game.actual=="w"))):
          game.updateMatrix(coordinates)
          game.updateActual()
          game.turn+=1
          game.bigFoots = [[i[0], i[1]+1, i[2]] for i in game.bigFoots]
          game.score.append(sum(game.objects[j][1] for i in game.matrix for j in i))
          reward = game.score[-1]-reward+game.linReg(np.array(list(range(len(game.score)))), game.score)[0]*len(game.score)
        else:
          reward = -50                
        done = not (any("." in x for x in game.matrix))
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, coordinates, reward, state_new, done)
        # remember
        agent.remember(state_old, coordinates, reward, state_new, done)

        if done:
            # train long memory, plot result, RESET GAME
            random.seed()
            game.checkAndLoadFiles()
            game.turn = 0
            game.score = []
            game.bigFoots = [[(i,j), 0, False] for i in range(len(game.matrix)) for j in range(len(game.matrix[0])) if game.matrix[i][j]=="1"]
            game.updateActual()
            
            agent.n_games += 1
            agent.train_long_memory()
            agent.model.save()

            print('Game', agent.n_games, 'Record:', record)

if __name__ == '__main__':
    train()

Error al cargar el fichero tablero, usando tablero aleatorio...
Error al cargar el fichero secuencia, usando secuencia aleatoria...
(5, 1) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['a', '.', '.', 'b', '.', '.'], ['.', '.', 'a', '.', '.', 'b'], ['.', 'b', '.', 'a', '.', '.'], ['.', '.', 'b', '.', 'c', '.']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['a', '.', '.', 'b', '.', '.'], ['.', '.', 'a', '.', '.', 'b'], ['.', 'b', '.', 'a', '.', '.'], ['.', 'a', 'b', '.', 'c', '.']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['a', '.', '.', 'b', '.', '.'], ['.', '.', 'a', '.', 'a', 'b'], ['.', 'b', '.', 'a', '.', '.'], ['.', 'a', 'b', '.', 'c', '.']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['a', '.', '.', 'b', '.', '.'], ['.', '.', 'a', '.', 'a', 'b'], ['.', 'b', '.', 'a', '.', '.'], ['.', 'a', 'b', '.', 'c', '.']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.

  return np.linalg.lstsq(np.vstack([x, np.ones(len(x))]).T, y)[0]
  x = F.softmax(self.linear2(x))


(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['.', '.', '.', 'b', '.', '.'], ['b', '.', '.', 'b', 'a', 'b'], ['.', 'b', '.', '.', 'a', '1'], ['.', 'a', 'b', '.', 'c', '.']]
(2, 5) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['.', '.', '.', 'b', '.', '.'], ['b', '.', '.', 'b', 'a', 'b'], ['.', 'b', '.', '.', 'a', '1'], ['.', 'a', 'b', '.', 'c', '.']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['.', '.', '.', 'b', '.', 'a'], ['b', '.', '.', 'b', 'a', 'b'], ['.', 'b', '.', '.', 'a', '.'], ['.', 'a', 'b', '.', 'c', '1']]
(3, 4) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['.', '.', '.', 'b', '.', 'a'], ['b', '.', '.', 'b', 'a', 'b'], ['.', 'b', '.', '.', 'a', '.'], ['.', 'a', 'b', '.', 'c', '1']]
(1, 1) [['b', 'a', '.', 'c', '.', 'a'], ['.', '.', '.', 'a', 'a', '.'], ['.', '.', '.', 'b', '.', 'a'], ['b', '.', '.', 'b', 'a', 'b'], ['.', 'b', '.', '.', 'a', '.'], ['.', 'a', 'b', '.', 'c', '1']]


  state = torch.tensor(state, dtype=torch.float)


Game 1 Record: 0
(3, 4) [['c', '1', '.', '1', '.', 'a'], ['.', '.', '.', '.', 'a', '.'], ['.', '.', '.', '.', '.', 'c'], ['.', 'b', 'c', 'a', '.', '.'], ['a', '.', '.', 'a', '.', 'a'], ['a', '.', '.', 'a', 'a', 'a']]
(3, 4) [['c', '1', '.', '1', '.', 'a'], ['.', '.', '.', '.', 'a', '.'], ['.', '.', '.', '.', '.', 'c'], ['.', 'b', 'c', '.', 'b', '.'], ['a', '.', '.', '.', '.', '.'], ['a', '.', '.', '.', '.', '.']]
(4, 3) [['c', '1', '.', '1', '.', 'a'], ['.', '.', '.', '.', 'a', '.'], ['.', '.', '.', '.', '.', 'c'], ['.', 'b', 'c', '.', 'b', '.'], ['a', '.', '.', '.', '.', '.'], ['a', '.', '.', '.', '.', '.']]
(3, 4) [['c', '.', '1', '.', '1', 'a'], ['.', '.', '.', '.', 'a', '.'], ['.', '.', '.', '.', '.', 'c'], ['.', 'b', 'c', '.', 'b', '.'], ['a', '.', '.', 'a', '.', '.'], ['a', '.', '.', '.', '.', '.']]
(3, 4) [['c', '.', '1', '.', '1', 'a'], ['.', '.', '.', '.', 'a', '.'], ['.', '.', '.', '.', '.', 'c'], ['.', 'b', 'c', '.', 'b', '.'], ['a', '.', '.', 'a', '.', '.'], ['a', '.', '.',

KeyboardInterrupt: ignored