Полная репетиция использования разгадывателя капчи. 15 классов.

In [1]:
import torch
import torchvision
import numpy as np
import pandas as pd
import os
import cv2
import time
from google.colab.patches import cv2_imshow
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.ops.boxes import nms

import pickle
from PIL import Image
from torchvision import transforms
from torch import nn
from sklearn.preprocessing import LabelEncoder

from google.colab import drive
drive.mount ('/content/gdrive')

Mounted at /content/gdrive


In [35]:
weights_dir_1 = '/content/gdrive/MyDrive/Weights_ML/smartsolver_weights_1_2.pth'

class AlexNet(nn.Module):
    def __init__(self, num_classes=15):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 2048),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(2048, 1024),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(1024, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

def predict_one_sample(model, inputs):
    with torch.no_grad():
        inputs = inputs
        model.eval()
        logit = model(inputs).cpu()
        probs = torch.nn.functional.softmax(logit, dim=-1).numpy()
    return probs


your_classes = ['bucket', 'clock', 'face', 'factory', 'fire', 'flag', 'hand', 'key',
                'lock', 'monitor', 'paper', 'person', 'scissors', 't-shirt', 'wrench']
label_encoder = LabelEncoder()
label_encoder.fit(your_classes)
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

# Загрузите вашу обученную модель
alexnet = AlexNet()
alexnet.load_state_dict(torch.load(weights_dir_1, map_location='cpu'))
alexnet.eval()

# Загрузите label_encoder, если используете его
label_encoder = pickle.load(open("label_encoder.pkl", 'rb'))

def classify_image_path(image_path, model, label_encoder=None):
    image = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.Resize((227, 227)),  # Измените на ожидаемый размер
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(image).unsqueeze(0)
    probs = predict_one_sample(model, input_tensor)
    predicted_class_idx = np.argmax(probs, axis=1)[0]

    if label_encoder:
        predicted_class = label_encoder.classes_[predicted_class_idx]
    else:
        predicted_class = str(predicted_class_idx)

    return predicted_class

In [36]:
weights_dir_2 = '/content/gdrive/MyDrive/Weights_ML/smartsolver_weights_2_4.pth'

def create_model(num_classes, pretrained=False):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pretrained)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

device = torch.device('cpu')
model = create_model(15 + 1)
model.load_state_dict(torch.load(weights_dir_2, map_location=device))
model.eval()

def detect(class_name, path,iou_threshold=0.1, threshold=0.8):
    model.eval()
    img = cv2.imread(path)
    img_ = img / 255.
    img_ = torch.from_numpy(img_).permute(2, 0, 1).unsqueeze(0).to(torch.float)
    predict = model(img_)
    ind = nms(predict[0]['boxes'], predict[0]['scores'], iou_threshold).detach().cpu().numpy()

    class_names = ["", "bucket", "clock", "face", "factory", "fire", "flag", "hand", "key", "lock",
                   "monitor", "paper", "person", "scissors", "t-shirt", "wrench"]

    class_idx = class_names.index(class_name)

    for i, box in enumerate(predict[0]['boxes'][ind]):
        if predict[0]['scores'][i] > threshold and int(predict[0]['labels'][i]) == class_idx:
            center_x = int((box[0] + box[2]) / 2)
            center_y = int((box[1] + box[3]) / 2)
            return center_x, center_y

    return None, None



In [37]:
pics_path = ['/content/img68.jpg', '/content/img14.jpg', '/content/img6.jpg']

start = time.time()
# Пример использования 42 flag-68 monitor-14 factory-6
for i in range(len(pics_path)):
    classify = classify_image_path(pics_path[i], alexnet, label_encoder)
    coords = detect(classify,'/content/0042.jpg')
    print(coords)
end = time.time()
print(f"Потрачено {round((end - start) , 1)} секунд")

(313, 329)
(221, 700)
(1261, 350)
Потрачено 18.3 секунд
