# Treinamento de modelo próprio para reconhecimento de texto manuscrito



<a href="https://colab.research.google.com/github/fabiobento/ocr-sis-emb-2024-2/blob/main/4-OCR_Reconhecimento_de_texto_manuscrito_Treinamento_do_modelo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    import google.colab
    IN_COLAB = True
    print("Executando no Google Colab")
except ImportError:
    IN_COLAB = False
    print("Não executando no Google Colab")

In [367]:
if IN_COLAB:
    # Baixar imagens para o laboratório.
    !wget https://github.com/fabiobento/ocr-sis-emb-2024-2/raw/refs/heads/main/imagens.zip
    !unzip -n -q imagens.zip

# Importando as bibliotecas

In [None]:
import tensorflow
tensorflow.__version__

In [369]:
import numpy as np
import zipfile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
if IN_COLAB:
    from google.colab.patches import cv2_imshow

In [370]:
# Definir função para exibição de imagens
def imshow(imagem):
    if IN_COLAB:
        cv2_imshow(imagem)
    else:
        # Exibir a imagem com matplotlib
        if len(imagem.shape) == 2:  # Se a imagem tiver apenas 2 dimensões (níveis de cinza)
            plt.imshow(imagem, cmap='gray')
            plt.axis('off')  # Oculta os eixos
            plt.show()               
        else:
            plt.imshow(imagem)        
            plt.axis('off')  # Oculta os eixos
            plt.show()   

# Carregando as bases de dados

## Conjunto MNIST 0-9

In [371]:
from tensorflow.keras.datasets import mnist

In [372]:
(train_data, train_labels), (test_data, test_labels) = mnist.load_data()

In [None]:
train_data.shape, test_data.shape

In [None]:
28 * 28

In [None]:
train_labels.shape, test_labels.shape

In [None]:
train_data[0]

In [None]:
train_data[0].shape

In [None]:
train_labels[0]

In [379]:
digitos_data = np.vstack([train_data, test_data])
digitos_labels = np.hstack([train_labels, test_labels])

In [None]:
digitos_data.shape

In [None]:
digitos_labels.shape

In [None]:
digitos_labels

In [None]:
np.random.randint(0, digitos_data.shape[0])

In [None]:
indice = np.random.randint(0, digitos_data.shape[0])
plt.imshow(digitos_data[indice], cmap='gray')
plt.title('Classe: ' + str(digitos_labels[indice]));

In [None]:
sns.countplot(x=digitos_labels,hue=digitos_labels, palette="Set2", legend=False)

## Conjunto Kaggle A-Z

In [386]:
if IN_COLAB:
  # Baixar imagens do conjunto de dados
  %pip install gdown
  !gdown --id 1udg4POqbFlozb9OemBNYKw4prQ4Dmh9s -O alfabeto_A-Z.zip
  !unzip -n -q alfabeto_A-Z.zip

In [387]:
dataset_az = pd.read_csv('./Datasets/A_Z Handwritten Data.csv').astype('float32')

In [388]:
#dataset_az

In [389]:
alfabeto_data = dataset_az.drop('0', axis = 1)
alfabeto_labels = dataset_az['0']

In [None]:
alfabeto_data.shape, alfabeto_labels.shape

In [391]:
#alfabeto_labels

In [392]:
alfabeto_data = np.reshape(alfabeto_data.values, (alfabeto_data.shape[0], 28, 28))

In [None]:
alfabeto_data.shape

In [None]:
indice = np.random.randint(0, alfabeto_data.shape[0])
plt.imshow(alfabeto_data[indice], cmap = 'gray')
plt.title('Classe ' + str(alfabeto_labels[indice]))

In [None]:
alfabeto_labels_int = alfabeto_labels.astype(int)
plt.figure(figsize=(12, 6))
sns.countplot(x=alfabeto_labels_int,hue=alfabeto_labels_int, palette="Set2", legend=False)

## Junção das bases de dados

In [None]:
digitos_labels, np.unique(digitos_labels)

In [None]:
alfabeto_labels, np.unique(alfabeto_labels)

In [398]:
alfabeto_labels += 10

In [None]:
alfabeto_labels, np.unique(alfabeto_labels)

In [400]:
data = np.vstack([alfabeto_data, digitos_data])
labels = np.hstack([alfabeto_labels, digitos_labels])

In [None]:
data.shape, labels.shape

In [402]:
data = np.array(data, dtype='float32')

In [None]:
data.shape

In [404]:
data = np.expand_dims(data, axis = -1)

In [None]:
data.shape

# Pré-processamento dos dados

In [None]:
data[0].min(), data[0].max()

In [407]:
data /= 255.0

In [None]:
data[0].min(), data[0].max()

In [None]:
np.unique(labels)

In [410]:
le = LabelBinarizer()
labels = le.fit_transform(labels)

In [None]:
np.unique(labels)

In [None]:
labels

In [None]:
labels[0]

In [None]:
labels[30000], len(labels[30000]), 26 + 10

In [415]:
# OneHotEncoder
# A, B, C, Z
# 0, 1, 2, 25

# A, B, C
# 1, 0, 0
# 0, 1, 0
# 0, 0, 1

In [None]:
classes_total = labels.sum(axis=0)
classes_total

In [None]:
classes_total.max()

In [None]:
plt.imshow(data[30000].reshape(28,28), cmap='gray')
plt.title(str(labels[30000]));

In [419]:
classes_peso = {}
for i in range(0, len(classes_total)):
  #print(i)
  classes_peso[i] = classes_total.max() / classes_total[i]

In [None]:
classes_peso

In [421]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, stratify=labels, random_state = 1)

In [None]:
X_train.shape, X_test.shape

In [None]:
y_train.shape, y_test.shape

In [424]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [425]:
augmentation = ImageDataGenerator(rotation_range=10, zoom_range=0.05, width_shift_range=0.1,
                                  height_shift_range=0.1, horizontal_flip = False)

# Criação da estrutura da rede neural

- Padding: https://www.pico.net/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow

In [426]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
rede_neural = Sequential()

rede_neural.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
rede_neural.add(MaxPool2D(pool_size=(2,2)))

rede_neural.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
rede_neural.add(MaxPool2D(pool_size=(2,2)))

rede_neural.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='valid'))
rede_neural.add(MaxPool2D(pool_size=(2,2)))

rede_neural.add(Flatten())

rede_neural.add(Dense(64, activation='relu'))
rede_neural.add(Dense(128, activation='relu'))

rede_neural.add(Dense(36, activation='softmax'))

rede_neural.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
2 * 2 * 128

In [None]:
rede_neural.summary()

In [430]:
nomes_labels = '0123456789'
nomes_labels += 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
nomes_labels = [l for l in nomes_labels]

In [None]:
print(nomes_labels)

# Treinamento da rede neural

In [432]:
arquivo_modelo = 'manuscrito.model'
epochs = 20
batch_size = 128

In [433]:
checkpointer = ModelCheckpoint(arquivo_modelo + '.keras', monitor='val_loss', verbose=1, save_best_only=True) 

In [None]:
len(X_train) // batch_size

In [None]:
# Perguntar ao usuário
opcao = input("Deseja treinar o modelo do zero ou carregar um modelo pré-treinado? (treinar/carregar): ").lower()

if opcao == 'treinar':
    # Treinar o modelo do zero
    history = rede_neural.fit(
        augmentation.flow(X_train, y_train, batch_size=batch_size),
        validation_data=(X_test, y_test),
        steps_per_epoch=len(X_train) // batch_size,
        epochs=epochs,
        class_weight=classes_peso,
        verbose=1,
        callbacks=[checkpointer]
    )
    print("Modelo treinado com sucesso!")

elif opcao == 'carregar':
    if IN_COLAB:
        # Instalar gdown no Colab se necessário
        %pip install gdown
        import gdown
        # Baixar o arquivo do modelo
        print("Baixando o modelo pré-treinado...")
        gdown.download('https://drive.google.com/uc?id=1-8PIRKmba7_p1DGDIvzWCcPZiT-4g6C-', 'rede_neural.keras', quiet=False)
    else:
      !wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1-8PIRKmba7_p1DGDIvzWCcPZiT-4g6C-' -O rede_neural.keras   
    # Carregar o modelo pré-treinado
    rede_neural = load_model('rede_neural.keras')
    print("Modelo pré-treinado carregado com sucesso!")

else:
    print("Opção inválida. Por favor, escolha 'treinar' ou 'carregar'.")


# Avaliação da rede neural

In [None]:
X_test.shape

In [None]:
previsoes = rede_neural.predict(X_test, batch_size=batch_size)

In [None]:
previsoes

In [None]:
previsoes[0]

In [None]:
len(previsoes[0])

In [None]:
np.argmax(previsoes[0])

In [None]:
nomes_labels[24]

In [None]:
y_test[0]

In [None]:
np.argmax(y_test[0])

In [None]:
nomes_labels[np.argmax(y_test[0])]

In [None]:
rede_neural.evaluate(X_test, y_test)

In [None]:
print(classification_report(y_test.argmax(axis=1), previsoes.argmax(axis=1), target_names=nomes_labels))

In [None]:
history.history.keys()

In [None]:
plt.plot(history.history['val_loss']);

In [None]:
plt.plot(history.history['val_accuracy']);

# Salvando o modelo

In [451]:
rede_neural.save('rede_neural.keras')    
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    !mkdir -p /content/drive/MyDrive/sistemas_embarcados
    !cp rede_neural.keras /content/drive/MyDrive/sistemas_embarcados/rede_neural.keras

# Teste com imagens

In [452]:
from tensorflow.keras.models import load_model

In [453]:
rede_neural_carregada = load_model('rede_neural.keras')

In [None]:
rede_neural_carregada

In [None]:
rede_neural_carregada.summary()

In [None]:
img = cv2.imread('./Imagens/letra-m.jpg')
imshow(img)

In [None]:
img.shape

In [None]:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray.shape

In [None]:
imshow(gray)

In [None]:
val, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
imshow(thresh)

In [None]:
val

In [None]:
thresh.shape

In [None]:
img = cv2.resize(thresh, (28, 28))
imshow(img)

In [None]:
img.shape

In [None]:
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis = -1)
img.shape

In [None]:
img = np.reshape(img, (1,28,28,1))
img.shape

In [None]:
previsao = rede_neural_carregada.predict(img)

In [None]:
previsao

In [None]:
np.argmax(previsao)

In [None]:
nomes_labels[22]