In [36]:
import pandas as pd
import zipfile
import rarfile

In [37]:
rarfile.UNRAR_TOOL = "unrar"  # or full path to unrar if not in PATH

# Open and extract
rar = rarfile.RarFile('/content/train.rar')
rar.extractall('/content/train')
print("Extracted to /content/train")

Extracted to /content/train


In [14]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.datasets.folder import make_dataset

# Define Hebrew letters in correct order
hebrew_letters = ['א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י',
                  'כ','ך','ל', 'מ',"ם", 'נ',"ן", 'ס', 'ע', 'פ',"ף", 'צ',"ץ", 'ק', 'ר', 'ש', 'ת']
index_to_letter = {str(i): hebrew_letters[i] for i in range(len(hebrew_letters))}

In [39]:
import os
import pandas as pd
import numpy as np
from PIL import Image

image_size = (28, 28)
data = []

dataset_path = '/content/train/TRAIN'

for folder_name in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder_name)
    if not os.path.isdir(folder_path) or folder_name not in index_to_letter:
        continue

    label = index_to_letter[folder_name]  # Hebrew letter

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        try:
            img = Image.open(img_path).convert('L').resize(image_size)
            img_array = np.array(img).flatten()
            data.append([*img_array, label])
        except Exception as e:
            print(f"⚠️ Failed to load {img_path}: {e}")

# Create DataFrame
columns = [f'pixel{i}' for i in range(28*28)] + ['label']
df = pd.DataFrame(data, columns=columns)

In [40]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping


X = df.drop(columns=['label']).values
y = df['label'].values
X = X.reshape(-1, 28, 28, 1)
X = X.astype('float32') / 255
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y = to_categorical(y_encoded)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(
    monitor='val_accuracy',  # Monitor validation accuracy
    patience=15,              # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True # Restore model weights from the epoch with the best value of the monitored quantity.
)

model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.2392 - loss: 2.7096 - val_accuracy: 0.6512 - val_loss: 1.2895
Epoch 2/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.6938 - loss: 1.1082 - val_accuracy: 0.7340 - val_loss: 1.0142
Epoch 3/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - accuracy: 0.7513 - loss: 0.8860 - val_accuracy: 0.7754 - val_loss: 0.8725
Epoch 4/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7842 - loss: 0.7486 - val_accuracy: 0.7783 - val_loss: 0.8384
Epoch 5/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.8174 - loss: 0.6079 - val_accuracy: 0.7842 - val_loss: 0.8407
Epoch 6/100
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - accuracy: 0.8274 - loss: 0.5854 - val_accuracy: 0.7911 - val_loss: 0.8348
Epoch 7/100
[1