In [1]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import math

BOARD_SIZE = 4
ACTIONS = [0, 1, 2, 3]  # up, down, left, right

In [3]:
#GAME ENVIORNEMENT

# notes: 
# 0 is empty cell, all tiles are displayed as log2(tile value) for readability


def add_tile(board):
    empty = list(zip(*np.where(board == 0)))
    if not empty:   # no empty cells
        return board
    y, x = random.choice(empty)
    board[y][x] = 1 if random.random() < 0.9 else 2
    return board

def move_right(board):
    new_board = np.zeros_like(board)
    reward = 0
    for row in range(BOARD_SIZE):
        tiles = board[row][board[row] != 0] # collect non-zero tiles
        merged = []
        skip = False
        for i in range(len(tiles)):
            if skip:
                skip = False
                continue
            if i + 1 < len(tiles) and tiles[i] == tiles[i+1]:
                merged.append(tiles[i] + 1)
                reward += 2 ** (tiles[i] + 1)  # calculate reward
                skip = True
            else:
                merged.append(tiles[i])
        new_board[row][:len(merged)] = merged
    return new_board, reward

def move(board, direction): 
    if direction == 0:  # up
        board = np.rot90(board, 1)
        new_board, reward = move_right(board)   #reuse this func to death bc im lazy lmao
        new_board = np.rot90(new_board, -1)
    elif direction == 1:  # down
        board = np.rot90(board, -1)
        new_board, reward = move_right(board)
        new_board = np.rot90(new_board)
    elif direction == 2:  # left
        new_board, reward = move_right(board)
    elif direction == 3:  # right
        board = np.fliplr(board)
        new_board, reward = move_right(board)
        new_board = np.fliplr(new_board)
    else:
        raise ValueError("Invalid direction")
    return new_board, reward

def is_game_over(board):
    for a in ACTIONS:
        new_board, _ = move(board, a)
        if not np.array_equal(new_board, board):
            return False
    return True

class Game2048Env:
    def reset(self):
        self.board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
        self.board = add_tile(add_tile(self.board))
        return self.get_state()

    def step(self, action):
        old_board = self.board.copy()
        self.board, reward = move(self.board, action)
        changed = not np.array_equal(self.board, old_board)
        if changed: # only add a tile if the board changed
            self.board = add_tile(self.board)
        done = is_game_over(self.board)
        return self.get_state(), reward, done

    def get_state(self):
        return self.board.flatten() / 17.0  # normalize log2(2^17), max tile