# 00 Init

## Mount

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Setting to use py files

In [2]:
import os

In [3]:
os.chdir('/content/drive/MyDrive/Minesweeper [RL]')

In [4]:
# check that os is in right directory
os.getcwd()

'/content/drive/MyDrive/Minesweeper [RL]'

In [5]:
! pip install codes

Collecting codes
  Downloading codes-0.1.5-py3-none-any.whl.metadata (582 bytes)
Downloading codes-0.1.5-py3-none-any.whl (5.5 kB)
Installing collected packages: codes
Successfully installed codes-0.1.5


## Import py files

In [6]:
# baseline : Env, Agent
# from codes.environment.reward5 import *
from codes.environment.reward5 import *
from codes.agent.vectorDQN import *
from codes.net.basic import *
from codes.trainer.validShutDown import *
from codes.tester.basic import *
# import codes.trainer.trainerWithValidShutDown as Trainer


## 01 Train

## level dictionary

In [7]:
level = {'easy' : {'map_size':(9,9), 'n_mines' : 10},
         'medium' : {'map_size':(16,16), 'n_mines':40},
         'expert' : {'map_size':(16,30), 'n_mines':99}}

## HYPER PARAMETERS

In [8]:
# Environment settings
MEM_SIZE = 200000
MEM_SIZE_MIN = 1000

# Learning settings
BATCH_SIZE = 64
LEARNING_RATE = 0.001
LEARN_DECAY = 0.9999975
LEARN_MIN = 0.0001 # 0.001
DISCOUNT = 0.1

# Exploration settings
EPSILON = 0.95
EPSILON_DECAY = 0.999975
EPSILON_MIN = 0.01

# DQN settings
CONV_UNITS = 64
UPDATE_TARGET_EVERY = 5

## Agents

In [11]:
class PrefixedAgent(Agent):
    def __init__(self, env, net, **kwargs):
        super().__init__(env, net, **kwargs)

    def get_action(self, state):
        '''
        get_action은 하나의 state_img만을 받는다.
        '''
        return np.random.choice([0,8,72,80])

In [10]:
class PrefixedAgent(Agent):
    def __init__(self, env, net, **kwargs):
        super().__init__(env, net, **kwargs)

        numbers = list(range(81))

        # 제외할 숫자 리스트
        exclude = [0, 8, 72, 80]

        # 제외할 숫자를 제외한 나머지 숫자 리스트 생성
        self.result = [num for num in numbers if num not in exclude]

    def get_action(self, state):
        '''
        get_action은 하나의 state_img만을 받는다.
        '''

        return np.random.choice(self.result)

In [13]:
class RandomAgent(Agent):
    def __init__(self, env, net, **kwargs):
        super().__init__(env, net, **kwargs)

    def get_action(self, state):
        '''
        get_action은 하나의 state_img만을 받는다.
        '''
        # take random action
        action = np.random.choice(range(self.env.total_tiles))

        return action

In [14]:
env = MinesweeperEnv(map_size=level['easy']['map_size'],
                     n_mines=level['easy']['n_mines'])

net = Net(input_dims=env.state.shape,
          n_actions=env.total_tiles,
          conv_units=CONV_UNITS)

prefixed_agent = PrefixedAgent(env=env,
                                net=net,
                                MEM_SIZE=MEM_SIZE,
                                MEM_SIZE_MIN=MEM_SIZE_MIN,
                                BATCH_SIZE=BATCH_SIZE,
                                LEARNING_RATE=LEARNING_RATE,
                                LEARN_DECAY=LEARN_DECAY,
                                LEARN_MIN=LEARN_MIN,
                                DISCOUNT=DISCOUNT,
                                EPSILON=EPSILON,
                                EPSILON_DECAY=EPSILON_DECAY,
                                EPSILON_MIN=EPSILON_MIN,
                                UPDATE_TARGET_EVERY=UPDATE_TARGET_EVERY)

random_agent = RandomAgent(env=env,
                        net=net,
                        MEM_SIZE=MEM_SIZE,
                        MEM_SIZE_MIN=MEM_SIZE_MIN,
                        BATCH_SIZE=BATCH_SIZE,
                        LEARNING_RATE=LEARNING_RATE,
                        LEARN_DECAY=LEARN_DECAY,
                        LEARN_MIN=LEARN_MIN,
                        DISCOUNT=DISCOUNT,
                        EPSILON=EPSILON,
                        EPSILON_DECAY=EPSILON_DECAY,
                        EPSILON_MIN=EPSILON_MIN,
                        UPDATE_TARGET_EVERY=UPDATE_TARGET_EVERY)

## 02 Test

In [20]:
EPISODES = 100000
num_revealed = []

for i in range(EPISODES):

    env.reset()

    n_revealed = 0

    current_state = env.state

    action = random_agent.get_action(current_state)
    # action = prefixed_agent.get_action(current_state)

    next_state, reward, done = env.step(action)

    num_revealed.append(np.sum(next_state != env.unrevealed))

In [16]:
vertex = pd.DataFrame(num_revealed)
vertex.describe()

Unnamed: 0,0
count,100000.0
mean,18.8739
std,19.295322
min,1.0
25%,1.0
50%,12.0
75%,34.0
max,71.0


In [19]:
np.sum(vertex[0] != 1) / len(vertex)

0.66643

In [None]:
without_vertex = pd.DataFrame(num_revealed)
without_vertex.describe()

Unnamed: 0,0
count,10000.0
mean,18.4453
std,19.252997
min,1.0
25%,1.0
50%,10.0
75%,33.0
max,69.0


In [21]:
random = pd.DataFrame(num_revealed)
random.describe()

Unnamed: 0,0
count,100000.0
mean,15.19361
std,20.070597
min,1.0
25%,1.0
50%,1.0
75%,31.0
max,71.0


In [24]:
np.sum(random[0] != 1) / len(vertex)

0.40333

In [22]:
table = pd.concat([vertex, random], axis=1)
table.columns = ['vertex', 'radom']
table.head()

Unnamed: 0,vertex,radom
0,1,33
1,20,61
2,18,42
3,47,32
4,35,52


In [23]:
table.to_csv('firstReveal.csv')