In [1]:
import os
import cv2
import time
import keyboard
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from tqdm import tqdm
from PIL import ImageGrab, Image
import matplotlib.pyplot as plt
from pynput.keyboard import Key, Controller

kb = Controller()
lst = {0:'w', 1:'a', 2:'s', 3:'d', 4:'nop'}

In [2]:
class Net(nn.Module):
    def __init__(self):
        # just run the init of parent class (nn.Module)
        super().__init__() 
        # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv1 = nn.Conv2d(1, 32, 5) 
        # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64, 5) 
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 5) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)

In [3]:
# Load trained model

PATH = "Jul30_0940_model.pt"

model = Net()
model.load_state_dict(torch.load(PATH))
model.eval()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=5, bias=True)
)

In [4]:
def move(opt):
    if opt < 4:
        opt = lst[opt]
        hit_key(opt)

def hit_key(key):
    kb.press(key)
    time.sleep(0.1)
    kb.release(key)

# def roi(img, vertices):
#     mask = np.zeros_like(img)
#     cv2.fillPoly(mask, vertices, 255)
#     masked = cv2.bitwise_and(img, mask)
#     return masked

def process_img(original_image):
    processed_img = original_image
    # Grey scale
    processed_img = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
    # Zeropadding the image
    processed_img = np.pad(processed_img, ((0,144),(0,0)), 'constant')
    # ROI
    # vertices = np.array([[440,37],[158,597], [425,664], [1001,665], [1229,247]], np.int32)
    # processed_img = roi(processed_img, [vertices])
    # Straighten the image
    pts_src = np.array([[440,37], [1229,247], [158,597],[947,807]])
    pts_dst = np.array([[0,0],[817, 0],[0,627],[817, 627]])
    im_dst = np.zeros((627, 817, 3), np.uint8)
    h, status = cv2.findHomography(pts_src, pts_dst)
    processed_img = cv2.warpPerspective(processed_img, h, (im_dst.shape[1],im_dst.shape[0]))
    
    return processed_img

In [8]:
for i in range(3):
    print('{}...'.format(3-i))
    time.sleep(1)

while True:
    screen =  np.array(ImageGrab.grab(bbox=(0,33,1280,699)))
    new_screen = process_img(screen)
    cv2.imshow('monitor', new_screen)
    
    tmp = cv2.resize(new_screen, (50, 50))
    tmp = torch.Tensor(tmp)
    output = model((tmp.view(-1, 1, 50, 50)))
    try:
        prediction = ((output == 1).nonzero().numpy()[0][1])
    except Exception as e:
        prediction = 4
        pass
    # print(prediction)
    
    move(prediction)
    # time.sleep(0.1)
    
    if cv2.waitKey(25) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break

3...
2...
1...
