# Samurai Gunn map tile classification

#### First we define the window capture function
On it's own, it seems to barely have any performance impact, outputting up to 800-900 fps when capturing the windowed version of Samurai Gunn, a 320x240 px window. 

In [51]:
import cv2 as cv
import numpy as np
from time import time
import win32gui
import win32ui
import win32con

def window_capture():
    hwnd = win32gui.FindWindow(None, 'Samurai Gunn')
    
    # Getting the window's size and accounting for window screenshot borders
    # does not work consistently?
    #window_rect = win32gui.GetWindowRect(hwnd)
    #dimensions of titlebar=31 and border=8 for fer's computer
    titlebar_px = 31
    border_px = 8

    # For samurai Gunn, the non-fullscreen dimensions should be:
    w = 320
    h = 240
    
    crop_x = border_px
    crop_y = titlebar_px
    
    wDC = win32gui.GetWindowDC(hwnd)
    dcObj = win32ui.CreateDCFromHandle(wDC)
    cDC = dcObj.CreateCompatibleDC()
    dataBitMap = win32ui.CreateBitmap()
    dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
    cDC.SelectObject(dataBitMap)
    cDC.BitBlt((0, 0), (w, h) , dcObj, (crop_x, crop_y), win32con.SRCCOPY)
    
    # To save screenshot to file, uncomment the 2 lines below 
    # bmpfilenamename = "prueba.jpg" #set this
    # dataBitMap.SaveBitmapFile(cDC, bmpfilenamename)
    
    # Converting to format useful for opencv
    signedIntsArray = dataBitMap.GetBitmapBits(True)
    img = np.frombuffer(signedIntsArray, dtype='uint8')
    img.shape = (h, w, 4)

    # Free Resources
    dcObj.DeleteDC()
    cDC.DeleteDC()
    win32gui.ReleaseDC(hwnd, wDC)
    win32gui.DeleteObject(dataBitMap.GetHandle())
    
    
    # Dropping alpha channel may be useful for some applications, like cv.matchTemplate()
    # which may throw an error otherwise
    
    #img = img[...,:3]   # this drops alpha channel 
    
    return img

## Defining Tile classification functions
We can notice that each map tile sits in a grid of 15x20 tiles, with each tile being (16x16)px, with some exceptions that are offset vertically by half a tile.
Taking advantage of this, we can simplify the input given to our neural network by reducing each tile to a single pixel that represents it.

It's important to note that even if the map tiles can only ever be on this discrete grid, the players are free to move in the full continuous plane. This must be taken into consideration when classifying tiles that contain players.


In [52]:

row_step = 2
col_step = 2
def classify_tile(x, y, img, sliced):
    '''
    Tiles are (16 x 16)px
    Parameters:
    x, y  : coords of the upper left corner of the tile
    img: input image
    sliced: int, either 0 or 1, letting us know if it's a full tile or a half tile at the 
             upper/lower edges of the map. 
             0 means it's a full tile
             1 means it's a half tile
    Returns:
    int: Returns a single int that will classify the tile
    '''
    px_count = 0
    total = np.zeros((1, img.shape[2]), dtype='uint32')
    
    corrected_height_range = int(16/(1+sliced))
    for i in range(0, corrected_height_range, row_step):
        for j in range(0, 16, col_step):
            total += img[x+i][y+j] # [r, g, b, a]
            px_count += 1           
    
    #result = result.astype('uint8') 
    # for some reason this doesn't work. It's fine tho, it works if
    # we convert the data type before returning on the simplify() funct
    result = total * ((1+sliced)/(px_count))
    
    return result
    

In [53]:

def simplify(img, offset):
    '''Reduces the input image resolution by classifying the tiles on the screen and 
    reducing them to 1 px per tile.
    The tiles in every map can be aligned with a 20 x 15 grid of (16 x 16)px cells.
    Iterates over each tile, calling classify_tile() for each of them
    
    Parameters:
    img: input image
    offset: in case the map tiles do not align perfectly with the grid, an offset of 8px down does 
            the trick. (Maps with different offsets have yet to be found, 
            this should allow for different offsets in the X direction)
            
    Returns:
    numpy 3D array: (15 x 20 x num_channels) numpy array
    '''
    sliced = 0
    simple_img = np.zeros((15, 20, img.shape[2]))
    for x in range(15):
        if offset == 8 and (x == 0 or x == 19):
            sliced = 1
        else:
            sliced = 0
        for y in range(20):
            # pass the coordinates of each tile with corrections to accomodate for the grid offset
            simple_img[x, y] = classify_tile(x*16 - (offset*(1-sliced)), 
                                             y*16 - (offset*(1-sliced)), 
                                             img, sliced)
    simple_img = simple_img.astype('uint8')
    return simple_img

In [54]:
def lifes(img):
    '''
    We settled a standar player for the agent which is the player 1 and the green character. 
    His lifes are static in the top left area of the map. For now this part of the coding seek
    a pair of coordinates on the image matrix that will represent each of the ten lifes for the green player, 
    so the agent know the lifes he has at every moment. It also works for the rewarding in future plans. 

    Coordinates of the 10 lifes at the distance of 3 pixels high and 3 pixels width
    from the first pixel on the top left side of the "leaf" are:

    (7,8)       (7,15)
    (14,8)      (14,15)
    (21,8)      (21,15)
    (28,8)      (28,15)
    (35,8)      (35,15)

   # Hay un problema porque hay que recordar que hay imagenes que van a tener medio bloque 
   # de distancia  y esos son 8 pixeles creo. 

    For green characters leafs are on 208 in green intensity
    '''
    img_without_alpha= img[...,:3]
    b_mapa,g_mapa,r_mapa = cv.split(img_without_alpha)

    life=0      #contador de vidas

    for fila in range (7,36,7):
        for col in range (8,16,7):
            intensity= g_mapa[fila,col]

            if intensity == 208:
                life+=1
    return(life)
        # Aquí el else deberia ser una respuesta negativa, como un reward negativo (?)

In [4]:
def enemy_lifes(img):
    '''
    For finding the enemy lifes, we used the cv.matchTemplate function which review the template in the corresponding image
    but in gray scale, which was previously converted. The template was created manually with black background and 
    the white skull, that represents the lifes of the enemy in survival mode. We reduced the area were the template
    was evaluated from the entire image to the section were the lifes of the enemy stayed static. For avoiding 
    multiple detection the threshold for the mathcTemplate was 0.6 and determined arbitray according to various scenarios. 
    
    '''
    
    gray_img=cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    
    count_enemy=0
    template= cv.imread("template_mini.bmp",0) 
    
    line=np.zeros((20,80),dtype='uint8')
    for fila in range(0,20):
        for col in range(0,80):
            line[fila,col]=gray_img[fila+215, col+120]
    enemy_lv= np.zeros((20,80),dtype='uint8')
    
    locations= cv.matchTemplate(line, template, cv.TM_CCOEFF_NORMED)
    thresh=0.6
    locations_w= np.where(locations >= thresh)
    locations_f= list(zip(*locations_w[::-1]))
    #print(locations_f)
    
    count_enemy=len(locations_f)
    
   # The section below shows a window with the section of the screenshot with the lives of the enemy (which is a skull) 
   # and a rectangle that surrounds the skull.

   # if locations_f:
   #     needle_w= template.shape[1]
   #     needle_h= template.shape[0]
   #     line_color=(255,0,0)
   #     line_type=cv.LINE_4
   #     # loop over all locations and draw their rectangle
   #     for loc in locations_f:
   #         #box position
   #         top_left=loc
   #         bottom_right= (top_left[0] + needle_w, top_left[1] + needle_h)

            #draw box
   #         enemy_lv=cv.rectangle(line,top_left, bottom_right, line_color, line_type)

   # else:
   #     print('no se llama')
        
    print('Enemy lifes:', count_enemy)
    
    return(enemy_lv) 

## Main Loop

In [68]:
frame_count = 1
cumulative_fps = 0

# offset will depend on the map
# for Ice cube, offset is 0
offset = 0

while(True):
    prev_time = time()
    
    screenshot = window_capture()
    #screenshot = cv.imread('sample0.jpg')
    img = simplify(screenshot, offset)
    dim = (img.shape[1] * 16, img.shape[0] * 16)
    resized = cv.resize(img, dim, interpolation = cv.INTER_AREA) #  interpolation = cv.INTER_AREA

    cv.imshow('Simplified', resized)
    cv.imshow('Screenshot', screenshot)
    #print('FPS {}'.format(1 / (time() - prev_time)))
     
    cumulative_fps += 1 / (time() - prev_time)    
    frame_count += 1    
    
    pj_lifes=lifes(screenshot)
    
    if frame_count % 32 == 0:
        print('Avg FPS {}'.format(cumulative_fps/frame_count))          
        print("Player lifes:",  pj_lifes)
        
        enemy= enemy_lifes(screenshot)
        cv.imshow('Area of enemy lifes', enemy)
    
    if frame_count > 1000:
        frame_count = 1
        cumulative_fps = 0
    
    if cv.waitKey(1) & 0xFF == ord('q'):
        cv.destroyAllWindows()
        break

Avg FPS 9.032481508497185
vidas: 3
vidas enemigas 5
Avg FPS 9.467683128525843
vidas: 3
vidas enemigas 5
Avg FPS 9.648191695369865
vidas: 2
vidas enemigas 5
Avg FPS 9.728714163824604
vidas: 2
vidas enemigas 5
Avg FPS 9.803666883440862
vidas: 2
vidas enemigas 4
Avg FPS 9.804062649824585
vidas: 2
vidas enemigas 4
Avg FPS 9.842448970076829
vidas: 2
vidas enemigas 3
Avg FPS 9.71653642829965
vidas: 2
vidas enemigas 3
Avg FPS 9.690022615780993
vidas: 2
vidas enemigas 2
Avg FPS 9.701228150560265
vidas: 1
vidas enemigas 2
Avg FPS 9.715955350745194
vidas: 1
vidas enemigas 1
Avg FPS 9.729636710682604
vidas: 0
vidas enemigas 1
Avg FPS 9.727434173999464
vidas: 8
vidas enemigas 1
Avg FPS 9.74768014431393
vidas: 4
vidas enemigas 1
Avg FPS 9.74139440482786
vidas: 0
vidas enemigas 1
Avg FPS 9.775566893720415
vidas: 0
no se llama
vidas enemigas 0
Avg FPS 9.806556730781
vidas: 0
no se llama
vidas enemigas 0
Avg FPS 9.787297034091964
vidas: 5
vidas enemigas 5
Avg FPS 9.792344527010371
vidas: 5
vidas enemi