  **OmniVec-like**
  More advanced model architecture that resembles the functionality and complexity of OmniVec. Since OmniVec is not a standard model, we'll implement a complex Convolutional Neural Network (CNN) with multiple layers, similar to how OmniVec would be structured.


I'll provide a detailed model architecture using TensorFlow/Keras.

In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, concatenate

# Set the correct paths for train_dir and test_dir
train_dir = '/Users/asmae/Documents/GitHub/detecting-dyslexia/Gambo/Train'
test_dir = '/Users/asmae/Documents/GitHub/detecting-dyslexia/Gambo/Test'

# Prepare data generators
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='training')

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary')

# Define the OmniVec model
input_layer = Input(shape=(150, 150, 3))

# First block
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Second block
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Third block
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Fourth block
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Global Average Pooling
x = GlobalAveragePooling2D()(x)

# Fully connected layers
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)

# Output layer
output_layer = Dense(1, activation='sigmoid')(x)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // 32,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // 32,
    epochs=3
)

# Evaluate the model on the test data
loss, accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {accuracy}')


In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
# Set the correct paths for train_dir and test_dir
train_dir = '/Users/asmae/Documents/GitHub/detecting-dyslexia/Gambo/Train'
test_dir = '/Users/asmae/Documents/GitHub/detecting-dyslexia/Gambo/Test'

# Prepare data generators
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='training')

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary')

# Define the OmniVec model
input_layer = Input(shape=(150, 150, 3))

# First block
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Second block
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Third block
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Fourth block
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Global Average Pooling
x = GlobalAveragePooling2D()(x)

# Fully connected layers
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)

# Output layer
output_layer = Dense(1, activation='sigmoid')(x)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early Stopping Callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // 32,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // 32,
    epochs=3,
    callbacks=[early_stopping]
)

# Save the model
model.save('my_model.keras')

In [None]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {accuracy}')

# Evaluate the model and print confusion matrix and classification report
test_steps = test_generator.samples // test_generator.batch_size
predictions = model.predict(test_generator, steps=test_steps+1)
predicted_classes = (predictions > 0.5).astype("int32")
true_classes = test_generator.classes

# Confusion Matrix
conf_matrix = confusion_matrix(true_classes, predicted_classes)
print("Confusion Matrix:")
print(conf_matrix)

# Classification Report
report = classification_report(true_classes, predicted_classes, target_names=test_generator.class_indices.keys())
print("Classification Report:")
print(report)

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(test_generator.class_indices))
plt.xticks(tick_marks, test_generator.class_indices.keys(), rotation=45)
plt.yticks(tick_marks, test_generator.class_indices.keys())

fmt = 'd'
thresh = conf_matrix.max() / 2.
for i, j in np.ndindex(conf_matrix.shape):
    plt.text(j, i, format(conf_matrix[i, j], fmt),
             ha="center", va="center",
             color="white" if conf_matrix[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

In [None]:
# Inference function
def predict_image(model, img_path):
    img = load_img(img_path, target_size=(150, 150), color_mode='rgb')
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Create batch dimension
    img_array /= 255.0  # Normalize

    prediction = model.predict(img_array)
    if prediction > 0.5:
        return 'Dyslexia Detected'
    else:
        return 'No Dyslexia Detected'

# Example usage
print(predict_image(model, '/Users/asmae/Documents/GitHub/detecting-dyslexia/Gambo/Test/Normal/A-42.png'))

# Count images in each directory
def count_images(directory):
    class_counts = {}
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            class_counts[class_dir] = len(os.listdir(class_path))
    return class_counts

# Count images in train and test directories
train_class_counts = count_images(train_dir)
test_class_counts = count_images(test_dir)

print("Images per class in train:")
for class_name, count in train_class_counts.items():
    print(f"  {class_name}: {count}")

print("Images per class in test:")
for class_name, count in test_class_counts.items():
    print(f"  {class_name}: {count}")

# Perform data augmentation if classes are imbalanced
min_class_count = min(train_class_counts.values())

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def augment_class(directory, class_name, target_count):
    class_path = os.path.join(directory, class_name)
    current_count = len(os.listdir(class_path))
    if current_count < target_count:
        datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        )
        batch_size = 32
        target_batches = (target_count - current_count) // batch_size + 1
        generator = datagen.flow_from_directory(
            directory,
            classes=[class_name],
            target_size=(150, 150),
            batch_size=batch_size,
            class_mode='binary',
            save_to_dir=class_path,
            save_prefix='aug',
            save_format='jpeg'
        )
        for i in range(target_batches):
            generator.next()
            print(f"Augmented batch {i+1} for class {class_name}")

# Augment data for classes in train if needed
for class_name, count in train_class_counts.items():
    augment_class(train_dir, class_name, min_class_count)

print("Data augmentation completed if needed.")