In [4]:
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [5]:
import pandas as pd
import zipfile
import rarfile

In [6]:
rarfile.UNRAR_TOOL = "unrar"
rar = rarfile.RarFile('/content/train.rar')
rar.extractall('/content/train')
print("Extracted to /content/train")

Extracted to /content/train


In [7]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.datasets.folder import make_dataset

hebrew_letters = ['א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י',
                  'כ','ך','ל', 'מ',"ם", 'נ',"ן", 'ס', 'ע', 'פ',"ף", 'צ',"ץ", 'ק', 'ר', 'ש', 'ת']
index_to_letter = {str(i): hebrew_letters[i] for i in range(len(hebrew_letters))}

In [8]:
import os
import pandas as pd
import numpy as np
from PIL import Image

image_size = (28, 28)
data = []

dataset_path = '/content/train/TRAIN'

for folder_name in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder_name)
    if not os.path.isdir(folder_path) or folder_name not in index_to_letter:
        continue

    label = index_to_letter[folder_name]  # Hebrew letter

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        img = Image.open(img_path).convert('L').resize(image_size)
        img_array = np.array(img).flatten()
        data.append([*img_array, label])

columns = [f'pixel{i}' for i in range(28*28)] + ['label']
df = pd.DataFrame(data, columns=columns)

In [11]:
len(df['label'].unique())

27

In [18]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping

X = df.drop(columns=['label']).values
y = df['label'].values
X = X.reshape(-1, 28, 28, 1)
X = X.astype('float32') / 255
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y = to_categorical(y_encoded)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

input_shape = (28, 28, 1)
num_classes = y.shape[1]

model = Sequential()

model.add(Conv2D(96, kernel_size=(11,11), strides= 4,
                padding= 'valid', activation= 'relu',
                input_shape= input_shape,
                kernel_initializer= 'he_normal'))
model.add(MaxPooling2D(pool_size=(3,3), strides= (2,2),
                       padding= 'valid', data_format= None))
model.add(Conv2D(256, kernel_size=(5,5), strides= 1,
                 padding= 'same', activation= 'relu',
                 kernel_initializer= 'he_normal'))
model.add(MaxPooling2D(pool_size=(2,2), strides= (2,2),
                       padding= 'valid', data_format= None))

model.add(Conv2D(384, kernel_size=(3,3), strides= 1,
                 padding= 'same', activation= 'relu',
                 kernel_initializer= 'he_normal'))

model.add(Conv2D(384, kernel_size=(3,3), strides= 1,
                 padding= 'same', activation= 'relu',
                 kernel_initializer= 'he_normal'))

model.add(Conv2D(256, kernel_size=(3,3), strides= 1,
                 padding= 'same', activation= 'relu',
                 kernel_initializer= 'he_normal'))

model.add(MaxPooling2D(pool_size=(3,3), strides= (2,2),
                       padding= 'same', data_format= None))

model.add(Flatten())
model.add(Dense(4096, activation= 'relu'))
model.add(Dense(4096, activation= 'relu'))
model.add(Dense(1000, activation= 'relu'))
model.add(Dense(num_classes, activation= 'softmax'))

model.compile(optimizer= tf.keras.optimizers.Adam(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
early_stopping = EarlyStopping(
    monitor='val_accuracy',  # Monitor validation accuracy
    patience=15,              # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True # Restore model weights from the epoch with the best value of the monitored quantity.
)
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 633ms/step - accuracy: 0.0311 - loss: 3.2926 - val_accuracy: 0.0739 - val_loss: 3.0488
Epoch 2/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 672ms/step - accuracy: 0.1006 - loss: 2.9385 - val_accuracy: 0.1389 - val_loss: 2.8037
Epoch 3/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 693ms/step - accuracy: 0.1232 - loss: 2.7675 - val_accuracy: 0.1379 - val_loss: 2.6912
Epoch 4/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 669ms/step - accuracy: 0.1591 - loss: 2.5595 - val_accuracy: 0.1498 - val_loss: 2.5450
Epoch 5/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 689ms/step - accuracy: 0.1982 - loss: 2.3506 - val_accuracy: 0.2768 - val_loss: 2.3750
Epoch 6/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 680ms/step - accuracy: 0.2885 - loss: 2.0710 - val_accuracy: 0.3143 - val_loss: 2.080