In [5]:
#canny training
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import cv2
import random

# 특정 키들
valid_keys = ['w', 'a', 's', 'd']
special_labels = ['no_keys_pressed']


# 데이터셋 클래스 정의
class KeypressDataset(Dataset):
    def __init__(self, base_folders, transform=None, w_ratio=0.1, nokey_ratio = 0.3):
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.speeds = []

        for base_folder in base_folders:
            # 속도 값을 저장할 경로
            speed_file_path = os.path.join(base_folder, 'text', 'speed.txt')

            # 속도 값 읽기
            with open(speed_file_path, 'r') as f:
                speed_values = [float(line.strip()) for line in f]

            # 유효한 폴더 이름만 필터링
            label_folders = [f for f in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, f)) and self.is_valid_label(f)]

            # 각 폴더 내의 모든 이미지 경로를 수집
            all_image_paths = []
            for label_folder in label_folders:
                folder_path = os.path.join(base_folder, label_folder)
                image_paths = glob.glob(os.path.join(folder_path, '*.png'))
                for img_path in image_paths:
                    all_image_paths.append((img_path, label_folder))

            # 파일 이름을 기준으로 정렬
            all_image_paths.sort(key=lambda x: int(os.path.basename(x[0]).split('.')[0]))

            # 정렬된 이미지 경로와 라벨, 속도 값을 각각 저장
            for i, (img_path, label_folder) in enumerate(all_image_paths):
                label_array = self.label_to_array(label_folder)
                # 'w' 데이터의 비율을 조정
                if label_array == [1, 0, 0, 0]:
                    if random.random() > w_ratio:
                        continue
                # if label_array == [1, 1, 0, 0]:
                #     if random.random() > w_ratio:
                #         continue
                # if label_array == [1, 0, 0, 1]:
                #     if random.random() > w_ratio:
                #         continue
                if label_array == [0, 0, 0, 0]:
                    if random.random() > nokey_ratio:
                        continue
                self.image_paths.append(img_path)
                self.labels.append(label_array)
                self.speeds.append(speed_values[i])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')

        # Canny Edge Detection 적용
        image = np.array(image)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 50, 150)
        image = Image.fromarray(edges).convert('L')

        if self.transform:
            image = self.transform(image)

        label = self.labels[idx]
        speed = self.speeds[idx]

        return image, torch.tensor(label, dtype=torch.float), torch.tensor(speed, dtype=torch.float).unsqueeze(0)


    def is_valid_label(self, label):
        if label in special_labels:
            return True
        for char in label.split('_'):
            if char not in valid_keys:
                return False
        return True

    def label_to_array(self, label):
        if label == 'no_keys_pressed':
            return [0, 0, 0, 0]
        array = [0, 0, 0, 0]
        for char in label.split('_'):
            if char in valid_keys:
                index = valid_keys.index(char)
                array[index] = 1
        return array


# 데이터 변환 정의
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # 이미지 크기 조정
    transforms.ToTensor(),  # 텐서로 변환
    transforms.Normalize(mean=[0.5], std=[0.5])  # 정규화
])

# CNN 모델 정의 (속도 값 추가)
class ModifiedCNNModelWithSpeed(nn.Module):
    def __init__(self):
        super(ModifiedCNNModelWithSpeed, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(256 * 8 * 8, 512)
        self.fc2 = nn.Linear(512 + 1, 4)  # 속도 값을 추가한 노드 수 (512 + 1)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()  # 시그모이드 활성화 함수

    def forward(self, x, speed):
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = self.pool(self.relu(self.bn3(self.conv3(x))))
        x = self.pool(self.relu(self.bn4(self.conv4(x))))
        x = x.view(-1, 256 * 8 * 8)
        x = self.relu(self.fc1(x))

        # 속도 값을 추가하여 입력으로 사용
        x = torch.cat((x, speed), dim=1)
        x = self.dropout(x)
        x = self.sigmoid(self.fc2(x))  # 시그모이드 활성화
        return x

# 학습 루프 정의
num_epochs = 100

def train_model(base_folders):
    # 데이터셋 및 데이터로더 생성
    dataset = KeypressDataset(base_folders=base_folders, transform=transform, w_ratio=0.5)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0)  # num_workers=2로 설정

    # 모델, 손실 함수, 최적화 알고리즘 설정
    model = ModifiedCNNModelWithSpeed()
    criterion = nn.BCELoss()  # 이진 크로스 엔트로피 손실 함수
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels, speeds in dataloader:
            # 모델 예측 초기화
            optimizer.zero_grad()

            # 순전파 + 역전파 + 최적화
            outputs = model(images, speeds)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # 손실 누적
            running_loss += loss.item()

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(dataloader)}')

    print('Finished Training')

    # 모델 저장
    torch.save(model.state_dict(), 'canny_cnn_model_with_speed.pth')
    print('Model saved to modified_cnn_model_with_speed.pth')

def predict_image(image_path, speed, model, transform):
    image = Image.open(image_path).convert('RGB')  # 흑백 이미지로 변환
    image = np.array(image)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    image = Image.fromarray(edges).convert('L')
    
    image = transform(image)
    image = image.unsqueeze(0)  # 배치 차원 추가

    speed = torch.tensor([[speed]], dtype=torch.float)  # 속도 값을 텐서로 변환

    with torch.no_grad():
        output = model(image, speed)
        output = output.squeeze().numpy()

    return output

# 모델 로드 및 추론
def load_model_and_predict(image_path, speed):
    # 모델 인스턴스 생성 및 가중치 로드
    model = ModifiedCNNModelWithSpeed()
    model.load_state_dict(torch.load('canny_cnn_model_with_speed.pth'))
    model.eval()

    # 이미지 예측
    prediction = predict_image(image_path, speed, model, transform)
    print(f'Prediction for {image_path} with speed {speed}: {prediction}')

if __name__ == '__main__':
    # 현재 디렉토리의 모든 폴더를 base_folders로 설정
    current_directory = os.getcwd()
    base_folders = [os.path.join(current_directory, name) for name in os.listdir(current_directory) if os.path.isdir(os.path.join(current_directory, name))]

    # 학습 모델
    train_model(base_folders)


Epoch 1/100, Loss: 5.426036196947098
Epoch 2/100, Loss: 1.2774269104003906
Epoch 3/100, Loss: 0.6213228866457939
Epoch 4/100, Loss: 0.459464368224144
Epoch 5/100, Loss: 0.4292375653982162
Epoch 6/100, Loss: 0.4215790256857872
Epoch 7/100, Loss: 0.41119040697813036
Epoch 8/100, Loss: 0.4082477703690529
Epoch 9/100, Loss: 0.41265960335731505
Epoch 10/100, Loss: 0.3925954535603523
Epoch 11/100, Loss: 0.3911722630262375
Epoch 12/100, Loss: 0.3917572647333145
Epoch 13/100, Loss: 0.37806845009326934
Epoch 14/100, Loss: 0.366902656853199
Epoch 15/100, Loss: 0.3634263128042221
Epoch 16/100, Loss: 0.34569570422172546
Epoch 17/100, Loss: 0.3394492655992508
Epoch 18/100, Loss: 0.34503253996372224
Epoch 19/100, Loss: 0.3301431715488434
Epoch 20/100, Loss: 0.33617582321166994
Epoch 21/100, Loss: 0.31042805761098863
Epoch 22/100, Loss: 0.3053630590438843
Epoch 23/100, Loss: 0.2968664221465588
Epoch 24/100, Loss: 0.29202721118927
Epoch 25/100, Loss: 0.2867175221443176
Epoch 26/100, Loss: 0.2703568287

In [7]:


# test
example_image_path = 'screenshots_20240607-024658/a/19.png'
example_speed = 80
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024658/a_w/33.png'
example_speed = 88
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024658/d_w/12.png'
example_speed = 94
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024058/w/15.png'
example_speed = 86
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024058/s/77.png'
example_speed = 35
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024822/d/50.png'
example_speed = 83
load_model_and_predict(example_image_path, example_speed)
example_image_path = 'screenshots_20240607-024822/no_keys_pressed/6.png'
example_speed = 47


Prediction for screenshots_20240607-024658/a/19.png with speed 80: [9.9588267e-09 9.9997604e-01 1.8373146e-07 9.4943038e-12]
Prediction for screenshots_20240607-024658/a_w/33.png with speed 88: [9.7978961e-01 8.6037475e-01 1.7116236e-09 1.0352658e-08]
Prediction for screenshots_20240607-024658/d_w/12.png with speed 94: [9.9998951e-01 1.9691676e-14 1.0033234e-13 9.9999988e-01]
Prediction for screenshots_20240607-024058/w/15.png with speed 86: [5.1354098e-01 9.1393513e-06 2.6515499e-07 5.7524145e-01]
Prediction for screenshots_20240607-024058/s/77.png with speed 35: [4.5626420e-16 1.2318348e-07 9.9990678e-01 2.5492294e-11]
Prediction for screenshots_20240607-024822/d/50.png with speed 83: [1.9615683e-04 3.9473969e-05 2.2515645e-03 9.8425722e-01]


In [21]:
#eval
from keras.models import load_model
from pynput.keyboard import Key, Controller, Listener
from PIL import ImageGrab, Image
# 속도 예측 모델 로드
speed_model = load_model('speed_rec.h5')

# 키보드 컨트롤러
keyboard = Controller()

# 화면 및 속도 영역 정의
screen_region = (320, 170, 1650, 1080)
speed_region = (145, 905, 202, 940)

# ESC 키 종료 플래그
exit_flag = False

def on_press(key):
    global exit_flag
    if key == Key.esc:
        exit_flag = True
        return False

def get_speed(speed_image):
    speed_image = cv2.cvtColor(speed_image, cv2.COLOR_BGR2GRAY)  # Grayscale로 변환
    speed_image = speed_image.astype('float32') / 255  # 정규화
    speed_image = np.expand_dims(speed_image, axis=0)
    speed_image = np.expand_dims(speed_image, axis=-1)  # 채널 차원 추가
    # speed = speed_model.predict(speed_image)[0][0]
    # return speed
    prediction = speed_model.predict(speed_image)
    predicted_label = np.argmax(prediction, axis=1)[0]

    return predicted_label

with Listener(on_press=on_press) as listener:
    while not exit_flag:
        # 화면 캡처
        screen = ImageGrab.grab(bbox=screen_region)
        screen = np.array(screen)
        screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(screen, 50, 150)
        image = Image.fromarray(edges).convert('L')
        screen_image = Image.fromarray(screen)
        
        # 속도 캡처
        speed_screen = ImageGrab.grab(bbox=speed_region)
        speed_screen = np.array(speed_screen)
        speed_screen = cv2.cvtColor(speed_screen, cv2.COLOR_BGR2RGB)
        
        # 속도 예측
        speed = get_speed(speed_screen)
        speed_tensor = torch.tensor([[speed]], dtype=torch.float)

        # 이미지 변환
        screen_image = transform(screen_image)
        screen_image = screen_image.unsqueeze(0)  # 배치 차원 추가

        # 모델 예측
        # with torch.no_grad():
        #     output = model(screen_image, speed_tensor)
        #     output = output.squeeze().numpy()

        model = ModifiedCNNModelWithSpeed()
        model.load_state_dict(torch.load('canny_cnn_model_with_speed.pth'))
        model.eval()
    
        # 이미지 예측
        with torch.no_grad():
            output = model(screen_image, speed_tensor)
            output = output.squeeze().numpy()

        # 예측 확률에 따른 키 입력
        pressed_keys = []
        for i, prob in enumerate(output):
            if prob >= 0.7:
                key = valid_keys[i]
                pressed_keys.append(key)
                keyboard.press(key)
            else:
                key = valid_keys[i]
                keyboard.release(key)

        # 콘솔에 속도 값과 입력된 키 표시
        print(f'Speed: {speed}, Keys: {" ".join(pressed_keys)}')
        print(f'[ {output[0]:.4f} ] [ {output[1]:.4f} ] [ {output[2]:.4f} ] [ {output[3]:.4f} ]')

        # ESC 키로 프로그램 종료
        if exit_flag:
            break

    listener.join()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Speed: 94, Keys: w
[ 1.0000 ] [ 0.0000 ] [ 0.0000 ] [ 0.0000 ]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m