<a href="https://colab.research.google.com/github/boussouralisa/IA-JALON4/blob/main/IA_projet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importer les bibliothèques nécessaires
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Reshape, TimeDistributed, Bidirectional, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# Définir les chemins d'accès
word_images_path = "/content/data/words"
xml_path = "/content/data/xml"

# Fonction pour charger les images et les étiquettes
def load_images_and_labels(word_images_path, xml_path, image_size=(128, 32), limit=None):
    images = []
    labels = []
    count = 0
    for root_dir, dirs, files in os.walk(word_images_path):
        for file in files:
            if file.endswith(".png"):
                image_path = os.path.join(root_dir, file)
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                if image is not None:
                    resized_image = cv2.resize(image, image_size)
                    resized_image = np.transpose(resized_image)  # Transposer l'image pour correspondre aux dimensions attendues
                    images.append(resized_image)

                    xml_file = file.split('-')[0] + "-" + file.split('-')[1] + ".xml"
                    xml_file_path = os.path.join(xml_path, xml_file)

                    label_found = False
                    if os.path.exists(xml_file_path):
                        tree = ET.parse(xml_file_path)
                        root = tree.getroot()
                        for elem in root.iter('word'):
                            if elem.attrib['id'] == file.split('.')[0]:
                                labels.append(elem.attrib['text'])
                                label_found = True
                                break
                    if not label_found:
                        labels.append("")
                    count += 1
                    if limit and count >= limit:
                        return images, labels
    return images, labels

# Limiter le nombre d'images chargées pour éviter la surcharge de RAM
images, labels = load_images_and_labels(word_images_path, xml_path, limit=10000)

# Supprimer les paires (image, label) où le label est vide
filtered_images = []
filtered_labels = []
for img, lbl in zip(images, labels):
    if lbl:
        filtered_images.append(img)
        filtered_labels.append(lbl)

# Convertir les listes en tableaux NumPy
images = np.array(filtered_images)
labels = np.array(filtered_labels)

# Normaliser les images
images = images / 255.0
images = np.expand_dims(images, axis=-1)  # Ajouter une dimension pour les canaux

# Encoder les labels
tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(labels)
sequences = tokenizer.texts_to_sequences(labels)
max_label_length = max([len(seq) for seq in sequences])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_label_length, padding='post')

# Définir la longueur fixe pour les séquences de sortie du modèle
output_sequence_length = 32  # Par exemple

# Ajuster les séquences de labels à la longueur de sortie du modèle
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=output_sequence_length, padding='post')

# Fractionner les données en ensembles d'entraînement et de validation
X_train, X_val, y_train, y_val = train_test_split(images, padded_sequences, test_size=0.2, random_state=42)

# Utiliser tf.data.Dataset pour la génération par lot
def augment_image(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
    return image, label

def create_dataset(X, y, batch_size=32, augment=False):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if augment:
        dataset = dataset.map(augment_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

# Augmenter la taille du lot
batch_size = 32

train_dataset = create_dataset(X_train, y_train, batch_size, augment=True)
val_dataset = create_dataset(X_val, y_val, batch_size)

# Définir le modèle LSTM avec des couches de régularisation
input_data = Input(name='inputs', shape=(128, 32, 1))
conv_1 = Conv2D(32, (3,3), activation='relu', padding='same')(input_data)
pool_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = Conv2D(64, (3,3), activation='relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(128, (3,3), activation='relu', padding='same')(pool_2)
pool_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
flattened = Flatten()(pool_3)
dense_flat = Dense(32*128, activation='relu')(flattened)
dropout_1 = Dropout(0.5)(dense_flat)
reshaped = Reshape((32, 128))(dropout_1)  # Adapter la forme pour LSTM
dense_1 = Dense(64, activation='relu')(reshaped)
dropout_2 = Dropout(0.5)(dense_1)
lstm_1 = Bidirectional(LSTM(128, return_sequences=True))(dropout_2)
lstm_2 = Bidirectional(LSTM(128, return_sequences=True))(lstm_1)
batch_norm = BatchNormalization()(lstm_2)
dense_2 = TimeDistributed(Dense(len(tokenizer.word_index) + 1, activation='softmax'))(batch_norm)

model = Model(inputs=input_data, outputs=dense_2)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Adapter la sortie du modèle pour qu'elle corresponde à la taille maximale des séquences des étiquettes
y_train = np.expand_dims(y_train, axis=-1)
y_val = np.expand_dims(y_val, axis=-1)

# Entraîner le modèle avec 5 époques
history = model.fit(train_dataset, epochs=5, validation_data=val_dataset)

# Faire des prédictions sur de nouvelles images
def predict_image(image_path, model, tokenizer):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 32))
    image = np.transpose(image)  # Transposer l'image pour correspondre aux dimensions attendues
    image = image / 255.0
    image = np.expand_dims(image, axis=-1)
    image = np.expand_dims(image, axis=0)
    prediction = model.predict(image)
    predicted_indices = np.argmax(prediction, axis=-1)[0]
    predicted_label = ''.join([tokenizer.index_word.get(idx, '') for idx in predicted_indices if idx != 0])
    return predicted_label

# Afficher les résultats de la prédiction
test_image_path = os.path.join(word_images_path, "a01/a01-000u/a01-000u-01-01.png")
predicted_label = predict_image(test_image_path, model, tokenizer)
print("Texte prédit:", predicted_label)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Texte prédit: wad


In [None]:
from google.colab import drive
drive.mount('/content/drive')