In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K

# 데이터셋 로딩 및 전처리
def load_data(captcha_folder):
    images = []
    labels = []
    
    for filename in os.listdir(captcha_folder):
        if filename.endswith('.jpg'):  # 이미지 파일만 처리
            image_path = os.path.join(captcha_folder, filename)
            # 이미지 읽기
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (150, 40))  # 크기 조정
            image = image / 255.0  # 정규화
            
            # 레이블 추출 (파일명에서 확장자를 제거하고, 문자를 레이블로 사용)
            label = filename.split('.')[0]  # 파일명에서 확장자 제거
            labels.append(label)
            images.append(image)
    
    images = np.array(images)
    images = images.reshape(-1, 40, 150, 1)  # CNN 입력 형태에 맞게 차원 변경
    
    return images, labels

def encode_label(label):
    encoded_label = []
    for c in label:
        if c.isdigit():
            encoded_label.append(ord(c) - ord('0'))  # 숫자
        elif c.islower():
            encoded_label.append(ord(c) - ord('a') + 10)  # 소문자
        else:
            encoded_label.append(ord(c) - ord('A') + 36)  # 대문자
    return encoded_label

def preprocess_labels(y):
    y_encoded = []
    for label in y:
        encoded_label = encode_label(label)
        while len(encoded_label) < 5:
            encoded_label.append(62)  # 공백을 나타내는 62번 인덱스를 사용
        y_encoded.append(encoded_label)

    y_encoded = np.array(y_encoded)

    # 레이블을 one-hot 인코딩
    y_encoded_onehot = []
    for label in y_encoded:
        onehot_label = [to_categorical(l, num_classes=62) for l in label]  # 62개의 클래스에 대해 one-hot 인코딩
        y_encoded_onehot.append(onehot_label)

    y_encoded_onehot = np.array(y_encoded_onehot)
    
    # 레이블을 5개의 출력에 맞게 분리
    y_split = [y_encoded_onehot[:, i] for i in range(5)]  # 각 문자에 대해 one-hot 인코딩
    return y_split

def build_model():
    input_layer = Input(shape=(40, 150, 1))

    # 첫 번째 Convolutional 블록
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    
    # 두 번째 Convolutional 블록
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    # 세 번째 Convolutional 블록
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Flatten 층과 드롭아웃
    x = Flatten()(x)
    x = Dropout(0.2)(x)
    
    # 출력층 (5개의 문자에 대해 각각 62개의 클래스를 예측)
    outputs = [Dense(62, activation='softmax')(x) for _ in range(5)]
    
    # 모델 정의
    model = Model(inputs=input_layer, outputs=outputs)
    
    # 학습률 조정
    optimizer = Adam(learning_rate=0.001)
    
    # 손실 함수 (다중 출력 손실 합산)
    def custom_loss(y_true, y_pred):
        total_loss = 0
        for i in range(5):
            total_loss += K.categorical_crossentropy(y_true[i], y_pred[i])
        return total_loss / 5
    
    # 모델 컴파일 (각 출력에 대해 정확도 계산)
    model.compile(optimizer=optimizer, loss=custom_loss, metrics=['accuracy'] * 5)  # 각 출력에 대해 accuracy를 설정
    
    return model

# 데이터 로딩
captcha_folder = './CAPTCHA'
X, y = load_data(captcha_folder)

# 레이블 전처리
y_split = preprocess_labels(y)

# 모델 학습
model = build_model()
model.fit(X, y_split, epochs=20, batch_size=32, validation_split=0.2, verbose=1)



Epoch 1/20
[1m2827/2827[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m544s[0m 190ms/step - dense_1_accuracy: 0.0157 - dense_1_loss: 4.1108 - dense_2_accuracy: 0.0170 - dense_2_loss: 4.1101 - dense_3_accuracy: 0.0168 - dense_3_loss: 4.1113 - dense_4_accuracy: 0.0171 - dense_4_loss: 4.1109 - dense_accuracy: 0.0217 - dense_loss: 3.9252 - loss: 20.3683 - val_dense_1_accuracy: 0.0175 - val_dense_1_loss: 4.0985 - val_dense_2_accuracy: 0.0169 - val_dense_2_loss: 4.0986 - val_dense_3_accuracy: 0.0157 - val_dense_3_loss: 4.1025 - val_dense_4_accuracy: 0.0168 - val_dense_4_loss: 4.0963 - val_dense_accuracy: 0.0000e+00 - val_dense_loss: 8.3722 - val_loss: 24.7680
Epoch 2/20
[1m2827/2827[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m525s[0m 186ms/step - dense_1_accuracy: 0.0265 - dense_1_loss: 4.0583 - dense_2_accuracy: 0.0238 - dense_2_loss: 4.0676 - dense_3_accuracy: 0.0244 - dense_3_loss: 4.0682 - dense_4_accuracy: 0.0256 - dense_4_loss: 4.0546 - dense_accuracy: 0.0331 - dense_loss: 3.823

KeyboardInterrupt: 

In [3]:
# modelv2

import os
import cv2
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization, LSTM, TimeDistributed, Bidirectional, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K

# 데이터셋 로딩 및 전처리
def load_data(captcha_folder):
    images = []
    labels = []
    
    for filename in os.listdir(captcha_folder):
        if filename.endswith('.jpg'):  # 이미지 파일만 처리
            image_path = os.path.join(captcha_folder, filename)
            # 이미지 읽기
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (150, 40))  # 크기 조정
            image = image / 255.0  # 정규화
            
            # 레이블 추출 (파일명에서 확장자를 제거하고, 문자를 레이블로 사용)
            label = filename.split('.')[0]  # 파일명에서 확장자 제거
            labels.append(label)
            images.append(image)
    
    images = np.array(images)
    images = images.reshape(-1, 40, 150, 1)  # CNN 입력 형태에 맞게 차원 변경
    
    return images, labels

def encode_label(label):
    encoded_label = []
    for c in label:
        if c.isdigit():
            encoded_label.append(ord(c) - ord('0'))  # 숫자
        elif c.islower():
            encoded_label.append(ord(c) - ord('a') + 10)  # 소문자
        else:
            encoded_label.append(ord(c) - ord('A') + 36)  # 대문자
    return encoded_label

def preprocess_labels(y):
    y_encoded = []
    for label in y:
        encoded_label = encode_label(label)
        while len(encoded_label) < 5:
            encoded_label.append(62)  # 공백을 나타내는 62번 인덱스를 사용
        y_encoded.append(encoded_label)

    y_encoded = np.array(y_encoded)

    # 레이블을 one-hot 인코딩
    y_encoded_onehot = []
    for label in y_encoded:
        onehot_label = [to_categorical(l, num_classes=62) for l in label]  # 62개의 클래스에 대해 one-hot 인코딩
        y_encoded_onehot.append(onehot_label)

    y_encoded_onehot = np.array(y_encoded_onehot)
    
    # 레이블을 5개의 출력에 맞게 분리
    y_split = [y_encoded_onehot[:, i] for i in range(5)]  # 각 문자에 대해 one-hot 인코딩
    return y_split

def build_model():
    input_layer = Input(shape=(40, 150, 1))

    # 첫 번째 Convolutional 블록
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # 두 번째 Convolutional 블록
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # 세 번째 Convolutional 블록
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # 네 번째 Convolutional 블록
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.3)(x)

    x = Reshape((-1, x.shape[-1]))(x) 
    # LSTM 레이어
    x = Bidirectional(LSTM(128, return_sequences=False))(x)
    x = Dropout(0.2)(x)
    
    # 출력층 (5개의 문자에 대해 각각 62개의 클래스를 예측)
    outputs = [Dense(62, activation='softmax', name=f'char_{i+1}')(x) for i in range(5)]
    
    # 모델 정의
    model = Model(inputs=input_layer, outputs=outputs)
    
    # 모델 컴파일
    optimizer = Adam(learning_rate=0.001)
    model.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy', 
        metrics=['accuracy'] * 5
    )
    
    return model

# 데이터 로딩
captcha_folder = './cap'
X, y = load_data(captcha_folder)

# 레이블 전처리
y_split = preprocess_labels(y)

# 모델 학습
model = build_model()
model.fit(X, y_split, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

model.save('captcha_model_v2.h5')




Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 546ms/step - char_1_accuracy: 0.6554 - char_1_loss: 2.4175 - char_2_accuracy: 0.0360 - char_2_loss: 4.0379 - char_3_accuracy: 0.0073 - char_3_loss: 4.2480 - char_4_accuracy: 0.0118 - char_4_loss: 4.2570 - char_5_accuracy: 0.0184 - char_5_loss: 4.2722 - loss: 19.2556 - val_char_1_accuracy: 0.4478 - val_char_1_loss: 4.0605 - val_char_2_accuracy: 0.0000e+00 - val_char_2_loss: 4.1503 - val_char_3_accuracy: 0.0000e+00 - val_char_3_loss: 4.1380 - val_char_4_accuracy: 0.0000e+00 - val_char_4_loss: 4.1289 - val_char_5_accuracy: 0.0299 - val_char_5_loss: 4.1181 - val_loss: 20.5838
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 350ms/step - char_1_accuracy: 1.0000 - char_1_loss: 0.0859 - char_2_accuracy: 0.1554 - char_2_loss: 2.8469 - char_3_accuracy: 0.0182 - char_3_loss: 4.0954 - char_4_accuracy: 0.0446 - char_4_loss: 4.1248 - char_5_accuracy: 0.0317 - char_5_loss: 4.0728 - loss: 15.2278 - val_c

<keras.src.callbacks.history.History at 0x22f65c8a210>

In [5]:
model.summary()


In [13]:
model.save('captcha_model_v2.h5')




In [15]:
model.save('captcha_model_v2.keras')

In [172]:
# 숫자 라벨을 알파벳과 숫자로 변환하는 함수
def label_to_char(label):
    if label < 10:
        return chr(label + ord('0'))  # 숫자 '0'~'9'
    elif label < 36:
        return chr(label - 10 + ord('a'))  # 소문자 'a'~'z'
    else:
        return chr(label - 36 + ord('A'))  # 대문자 'A'~'Z'

# 이미지 예측 후 알파벳/숫자 변환
def predict_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (150, 40))  # 이미지 크기 변경
    image = image / 255.0  # 정규화
    image = image.reshape(1, 40, 150, 1)  # 배치 차원 추가

    # 예측하기
    predictions = model.predict(image)
    
    predicted_labels = []
    for i in range(5):
        predicted_class = np.argmax(predictions[i][0])  # 첫 번째 문자 예측
        predicted_labels.append(predicted_class)

    # 숫자 라벨을 문자로 변환
    predicted_chars = [label_to_char(label) for label in predicted_labels]
    
    return ''.join(predicted_chars)

# 예시: 이미지 예측
predicted_text = predict_image('./CAPTCHA/QSbwI.jpg')  # 예측할 이미지 경로
print(f"Predicted text for the image: {predicted_text}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Predicted text for the image: 1KAHZ


In [25]:
from tensorflow.keras.models import load_model
import cv2
import numpy as np

# 숫자 라벨을 알파벳과 숫자로 변환하는 함수
def label_to_char(label):
    if label < 10:
        return chr(label + ord('0'))  # 숫자 '0'~'9'
    elif label < 36:
        return chr(label - 10 + ord('a'))  # 소문자 'a'~'z'
    else:
        return chr(label - 36 + ord('A'))  # 대문자 'A'~'Z'

# 이미지 예측 후 알파벳/숫자 변환
def predict_image(image_path, model):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (150, 40))  # 이미지 크기 변경
    image = image / 255.0  # 정규화
    image = image.reshape(1, 40, 150, 1)  # 배치 차원 추가

    # 예측하기
    predictions = model.predict(image)
    
    predicted_labels = []
    for i in range(5):
        predicted_class = np.argmax(predictions[i][0])  # 첫 번째 문자 예측
        predicted_labels.append(predicted_class)

    # 숫자 라벨을 문자로 변환
    predicted_chars = [label_to_char(label) for label in predicted_labels]
    
    return ''.join(predicted_chars)

# 저장된 모델 로드
model = load_model('captcha_model_v2.h5')

# 예시: 이미지 예측
predicted_text = predict_image('./22222.png', model)  # 예측할 이미지 경로
print(f"Predicted text for the image: {predicted_text}")








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 896ms/step
Predicted text for the image: swrEk


In [None]:
#old code 70%

# import os
# import cv2
# import numpy as np
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, GlobalAveragePooling2D
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.preprocessing.image import img_to_array

# # 데이터셋 로딩 및 전처리
# def load_data(captcha_folder):
#     images = []
#     labels = []
    
#     for filename in os.listdir(captcha_folder):
#         if filename.endswith('.jpg'):  # 이미지 파일만 처리
#             image_path = os.path.join(captcha_folder, filename)
#             # 이미지 읽기
#             image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
#             image = cv2.resize(image, (150, 40))  # 크기 조정
#             image = image / 255.0  # 정규화
            
#             # 레이블 추출 (파일명에서 확장자를 제거하고, 문자를 레이블로 사용)
#             label = filename.split('.')[0]  # 파일명에서 확장자 제거
#             labels.append(label)
#             images.append(image)
    
#     images = np.array(images)
#     images = images.reshape(-1, 40, 150, 1)  # CNN 입력 형태에 맞게 차원 변경
    
#     return images, labels

# # 모델 정의
# def build_model():
#     input_img = Input(shape=(40, 150, 1))

#     x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
#     x = BatchNormalization()(x)
#     x = MaxPooling2D(pool_size=(2, 2))(x)

#     x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
#     x = BatchNormalization()(x)
#     x = MaxPooling2D(pool_size=(2, 2))(x)

#     x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
#     x = BatchNormalization()(x)
#     x = MaxPooling2D(pool_size=(2, 2))(x)
#     x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
#     x = BatchNormalization()(x)
#     x = MaxPooling2D(pool_size=(2, 2))(x)
#     x = GlobalAveragePooling2D()(x)
#     x = Dense(256, activation='relu')(x)
#     x = Dropout(0.5)(x)

#     # 각 문자를 별도로 예측하기 위한 5개의 출력층
#     output = []
#     for _ in range(5):
#         output.append(Dense(62, activation='softmax')(x))

#     model = Model(inputs=input_img, outputs=output)
#     model.compile(optimizer=Adam(learning_rate=0.0001 ), 
#                   loss='categorical_crossentropy', 
#                   metrics=['accuracy'] * 5)

#     return model
# # 데이터 로딩
# captcha_folder = './CAPTCHA'
# X, y = load_data(captcha_folder)

# # 레이블을 one-hot 인코딩
# y_encoded = []
# for label in y:
#     encoded_label = []
#     for c in label:
#         if c.isdigit():
#             encoded_label.append(ord(c) - ord('0'))  # 숫자
#         elif c.islower():
#             encoded_label.append(ord(c) - ord('a') + 10)  # 소문자
#         else:
#             encoded_label.append(ord(c) - ord('A') + 36)  # 대문자
#     # 레이블을 길이가 5로 고정
#     while len(encoded_label) < 5:
#         encoded_label.append(62)  # 공백을 나타내는 62번 인덱스를 사용
#     y_encoded.append(encoded_label)

# y_encoded = np.array(y_encoded)
# # 각 문자의 레이블을 one-hot 인코딩
# y_split = [to_categorical(y_encoded[:, i], num_classes=62) for i in range(5)]

# # 모델 학습
# model = build_model()
# model.fit(X, y_split, epochs=15, batch_size=32, validation_split=0.2, verbose=1)

# 현재 코드는 다음과 같아