<a href="https://colab.research.google.com/github/elgharbiyoussef/CNN/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CNN

In [None]:
# Python packages to manipulate files
import os
import shutil
import pathlib
from pathlib import Path
import datetime
# Tensorflow and Numpy packages
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, MaxPool2D, Activation, Flatten, Dropout
import numpy as np

# Display related packages
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image
import PIL
import PIL.Image

%matplotlib inline

In [None]:
my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')
# #tf.config.set_visible_devices([], 'GPU')

In [None]:
! git clone https://github.com/minus--/arabic-letters-tutorial.git

Cloning into 'arabic-letters-tutorial'...
remote: Enumerating objects: 50, done.[K
remote: Counting objects: 100% (50/50), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 50 (delta 25), reused 31 (delta 12), pack-reused 0[K
Unpacking objects: 100% (50/50), done.


In [None]:
! [ ! -d ./data ] && tar xvzf ./arabic-letters-tutorial/arabic_handwritten_data.tgz || echo "Dataset folder already exists"

[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
data/train_data/id_8608_label_12.png
data/train_data/id_2106_label_12.png
data/train_data/id_10590_label_8.png
data/train_data/id_2818_label_17.png
data/train_data/id_7349_label_23.png
data/train_data/id_5289_label_18.png
data/train_data/id_3050_label_18.png
data/train_data/id_11114_label_18.png
data/train_data/id_192_label_24.png
data/train_data/id_6258_label_27.png
data/train_data/id_6579_label_11.png
data/train_data/id_9058_label_13.png
data/train_data/id_10945_label_25.png
data/train_data/id_8889_label_20.png
data/train_data/id_4557_label_10.png
data/train_data/id_5785_label_24.png
data/train_data/id_5074_label_19.png
data/train_data/id_7812_label_25.png
data/train_data/id_12978_label_27.png
data/train_data/id_325_label_13.png
data/train_data/id_9146_label_24.png
data/train_data/id_5488_label_14.png
data/train_data/id_7108_label_21.png
data/train_data/id_9379_label_25.png
data/train_data/id_

In [None]:
arabic_characters = ['alef', 'beh', 'teh', 'theh', 'jeem', 'hah', 'khah', 'dal', 'thal',
                    'reh', 'zain', 'seen', 'sheen', 'sad', 'dad', 'tah', 'zah', 'ain',
                    'ghain', 'feh', 'qaf', 'kaf', 'lam', 'meem', 'noon', 'heh', 'waw', 'yeh']

In [None]:
logdir = os.path.join("./logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir $logdir

Reusing TensorBoard on port 6006 (pid 104), started 0:00:05 ago. (Use '!kill 104' to kill it.)

<IPython.core.display.Javascript object>

In [None]:
batch_size = 32
img_height = 32
img_width = 32

def get_dataset(dataset_dir):
    
    def process_filename(file_path):
        label = tf.strings.regex_replace(input=file_path,pattern=r".+_label_(\d+)\.png", rewrite=r"\1")
        label = tf.strings.to_number(label, tf.int32)-1
        #label = tf.one_hot(label, depth=29)
        return label

    def process_img(file_path):

        img = tf.io.read_file(file_path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.resize(img, size=(32, 32))
        img = tf.image.convert_image_dtype(img, tf.float32)
        img = tf.cast(img, tf.float32) / 255.0
        return img
    
    data_dir = pathlib.Path(dataset_dir)
    file_list = [str(path.absolute()) for path in Path(data_dir).glob("*.png")]
    files_ds = tf.data.Dataset.from_tensor_slices((file_list))
    files_ds = files_ds.map(lambda x: (process_img(x), process_filename(x)))
    return files_ds

In [None]:
train_dataset_path = "data/train_data"
test_dataset_path = "data/test_data"

train_ds = get_dataset(train_dataset_path).shuffle(buffer_size=batch_size*10).batch(batch_size)
valid_ds = get_dataset(test_dataset_path).batch(batch_size)


In [None]:
Model=Sequential()

In [None]:
Model.add(Conv2D(64,(3,3),input_shape=(32,32,3)))
Model.add(Activation('relu'))
Model.add(MaxPool2D(pool_size=(2,2)))

Model.add(Conv2D(128,(3,3)))
Model.add(Activation('relu'))
Model.add(MaxPool2D(pool_size=(2,2)))

In [None]:
Model.add(Flatten())

In [None]:
Model.add(Dense(512))
Model.add(Activation('relu'))
Model.add(Dropout(0.5))
Model.add(Dense(28))
Model.add(Activation('softmax'))

In [None]:
Model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 64)        1792      
_________________________________________________________________
activation (Activation)      (None, 30, 30, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 128)       73856     
_________________________________________________________________
activation_1 (Activation)    (None, 13, 13, 128)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0

In [None]:
Model.compile(
    # Optimizer
    optimizer=tf.keras.optimizers.RMSprop(),  
    # Loss function to minimize
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    # List of metrics to monitor
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [None]:
# callbacks = [
#     tf.keras.callbacks.EarlyStopping(
#         # Stop training when `val_loss` is no longer improving
#         monitor="val_loss",
#         # "no longer improving" being defined as "no better than 1e-2 less"
#         min_delta=1e-2,
#         # "no longer improving" being further defined as "for at least 2 epochs"
#         patience=2,
#         verbose=1,
#     )
# ]
callbacks = [
    tf.keras.callbacks.TensorBoard(
        log_dir=logdir,
        histogram_freq=0,  # How often to log histogram visualizations
        embeddings_freq=0,  # How often to log embedding visualizations
        update_freq="epoch",
    ) 
]

In [None]:
print("Fit model on training data")
history = Model.fit(train_ds, epochs = 50 ,validation_data=valid_ds, callbacks = callbacks)


Fit model on training data


NameError: ignored

# CNN ImageDataGenerator

In [None]:
os.chdir("/content/data")

In [None]:
os.mkdir("class")

In [None]:
os.chdir('/content/data/class/')

In [None]:
for i in range (len(arabic_characters)):
    os.mkdir(arabic_characters[i])

In [None]:
os.chdir('/content/data/train_data/')

In [None]:
images = os.listdir()
for I in images:
    file_name,file_ext=os.path.splitext(I) 
    file_name= file_name.split('_')
    
    lab_nbr=file_name[-1]
    if lab_nbr=='1':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/alef')
    elif lab_nbr=='2':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/beh')
    elif lab_nbr=='3':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/teh')
    elif lab_nbr=='4':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/theh')
    elif lab_nbr=='5':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/jeem')
    elif lab_nbr=='6':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/hah')
    elif lab_nbr=='7':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/khah')
    elif lab_nbr=='8':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/dal')
    elif lab_nbr=='9':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/thal')
    elif lab_nbr=='10':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/reh')
    elif lab_nbr=='11':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/zain')
    elif lab_nbr=='12':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/seen')
    elif lab_nbr=='13':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/sheen')
    elif lab_nbr=='14':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/sad')
    elif lab_nbr=='15':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/dad')
    elif lab_nbr=='16':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/tah')
    elif lab_nbr=='17':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/zah')
    elif lab_nbr=='18':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/ain')
    elif lab_nbr=='19':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/ghain')
    elif lab_nbr=='20':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/feh')
    elif lab_nbr=='21':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/qaf')
    elif lab_nbr=='22':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/kaf')
    elif lab_nbr=='23':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/lam')
    elif lab_nbr=='24':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/meem')
    elif lab_nbr=='25':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/noon')
    elif lab_nbr=='26':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/heh')
    elif lab_nbr=='27':
        shutil.move('/content/data/train_data/'+I,'/content/data/class/waw')
    else:
        shutil.move('/content/data/train_data/'+I,'/content/data/class/yeh')
    

In [None]:
for i in range (len(arabic_characters)):
  print(len(os.listdir("/content/data/class/"+arabic_characters[i])))

480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
480
481


In [None]:
os.chdir("/content/data")

In [None]:
os.mkdir("class_test")

In [None]:
os.chdir('/content/data/class_test/')

In [None]:
for i in range (len(arabic_characters)):
    os.mkdir(arabic_characters[i])

In [None]:
os.chdir('/content/data/test_data/')

In [None]:
images = os.listdir()
for I in images:
    file_name,file_ext=os.path.splitext(I) 
    file_name= file_name.split('_')
    lab_nbr=file_name[-1]
    if lab_nbr=='1':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/alef')
    elif lab_nbr=='2':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/beh')
    elif lab_nbr=='3':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/teh')
    elif lab_nbr=='4':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/theh')
    elif lab_nbr=='5':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/jeem')
    elif lab_nbr=='6':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/hah')
    elif lab_nbr=='7':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/khah')
    elif lab_nbr=='8':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/dal')
    elif lab_nbr=='9':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/thal')
    elif lab_nbr=='10':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/reh')
    elif lab_nbr=='11':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/zain')
    elif lab_nbr=='12':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/seen')
    elif lab_nbr=='13':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/sheen')
    elif lab_nbr=='14':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/sad')
    elif lab_nbr=='15':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/dad')
    elif lab_nbr=='16':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/tah')
    elif lab_nbr=='17':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/zah')
    elif lab_nbr=='18':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/ain')
    elif lab_nbr=='19':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/ghain')
    elif lab_nbr=='20':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/feh')
    elif lab_nbr=='21':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/qaf')
    elif lab_nbr=='22':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/kaf')
    elif lab_nbr=='23':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/lam')
    elif lab_nbr=='24':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/meem')
    elif lab_nbr=='25':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/noon')
    elif lab_nbr=='26':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/heh')
    elif lab_nbr=='27':
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/waw')
    else:
        shutil.move('/content/data/test_data/'+I,'/content/data/class_test/yeh')
    

In [None]:
for i in range (len(arabic_characters)):
  print(len(os.listdir("/content/data/class_test/"+arabic_characters[i])))

120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120
120


In [None]:
Train_ds = ImageDataGenerator(rescale=1./255,
                                   rotation_range=45 ,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='reflect', cval=125) 

In [None]:
Test_ds= ImageDataGenerator(rescale=1./255)

In [None]:
Train_ds=Train_ds.flow_from_directory(directory='/content/data/class/',
                                                batch_size=32,
                                                target_size=(32, 32),
                                                classes = arabic_characters,
                                                class_mode='categorical'
                                                )

Found 13440 images belonging to 28 classes.


In [None]:
Test_ds=Test_ds.flow_from_directory(directory='/content/data/class_test/', 
                                    batch_size=16,  
                                    target_size=(32, 32),
                                    class_mode='categorical')

Found 3360 images belonging to 28 classes.


In [None]:
Model=Sequential()

In [None]:
Model.add(Conv2D(64,(3,3),input_shape=(32,32,3)))
Model.add(Activation('relu'))
Model.add(MaxPool2D(pool_size=(2,2)))

Model.add(Conv2D(128,(3,3)))
Model.add(Activation('relu'))
Model.add(MaxPool2D(pool_size=(2,2)))

In [None]:
Model.add(Flatten())

In [None]:
Model.add(Dense(512))
Model.add(Activation('relu'))
Model.add(Dropout(0.5))
Model.add(Dense(28))
Model.add(Activation('softmax'))

In [None]:
Model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 30, 30, 64)        1792      
_________________________________________________________________
activation_28 (Activation)   (None, 30, 30, 64)        0         
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 13, 13, 128)       73856     
_________________________________________________________________
activation_29 (Activation)   (None, 13, 13, 128)       0         
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 6, 6, 128)         0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 4608)             

In [None]:
Model.compile(
    # Optimizer
    optimizer=tf.keras.optimizers.Adam(),  
    # Loss function to minimize
    loss='categorical_crossentropy',
    # List of metrics to monitor
    metrics=["accuracy"],
)

In [None]:
Model.fit(Train_ds,
          steps_per_epoch= int(13440/32),
          validation_data=Test_ds,
          
          epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f9fe261cef0>