Load the Data

In [22]:
from google.colab import drive
drive.mount('/content/drive')

import os
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score
from PIL import Image

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Mount Drive & set paths

In [23]:
DIR_A   = "/content/drive/MyDrive/Syncrones/Syncrones_A"      # අ images
DIR_NOT = "/content/drive/MyDrive/Syncrones/Syncrones_not"    # NOT අ images

IMG_SIZE   = (64, 64)
BATCH_SIZE = 32
SEED       = 42

Collect image paths & labels[Label අ as 1/ Label not අ as 0]

In [24]:
def get_pngs(folder):
    return sorted(
        glob.glob(os.path.join(folder, "*.png")) +
        glob.glob(os.path.join(folder, "*.PNG"))
    )

a_paths   = get_pngs(DIR_A)
not_paths = get_pngs(DIR_NOT)

print("Number of අ images:     ", len(a_paths))
print("Number of NOT අ images: ", len(not_paths))

if len(a_paths) == 0 or len(not_paths) == 0:
    raise ValueError("One of the folders is empty. Check DIR_A and DIR_NOT.")

Number of අ images:      60
Number of NOT අ images:  22


In [25]:
# Create labels: 1 for අ, 0 for not අ
a_labels   = np.ones(len(a_paths), dtype=np.int32)
not_labels = np.zeros(len(not_paths), dtype=np.int32)

all_paths  = np.array(a_paths + not_paths)
all_labels = np.concatenate([a_labels, not_labels])

print("Total samples:", len(all_paths))
print("Total අ:", (all_labels == 1).sum())
print("Total not අ:", (all_labels == 0).sum())


Total samples: 82
Total අ: 60
Total not අ: 22


Shuffle & train/validation split

In [26]:
rng = np.random.default_rng(SEED)
indices = np.arange(len(all_paths))
rng.shuffle(indices)

all_paths  = all_paths[indices]
all_labels = all_labels[indices]

num_total  = len(all_paths)
train_size = int(0.8 * num_total)

train_paths  = all_paths[:train_size]
train_labels = all_labels[:train_size]

val_paths    = all_paths[train_size:]
val_labels   = all_labels[train_size:]

print("Train samples:", len(train_paths))
print("Val samples:  ", len(val_paths))

Train samples: 65
Val samples:   17


tf.data pipelines

In [27]:
def load_and_preprocess(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_image(image, channels=1, expand_animations=False)
    image = tf.image.resize(image, IMG_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = (
    train_ds
    .map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .shuffle(1000, seed=SEED)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_ds = (
    val_ds
    .map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)


Quick sanity check:

In [28]:
for images, labels in train_ds.take(1):
    print("Batch image shape:", images.shape)
    print("Batch labels shape:", labels.shape)
    print("Labels:", labels.numpy())

Batch image shape: (32, 64, 64, 1)
Batch labels shape: (32,)
Labels: [1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1]


Build micro CNN model (with data augmentation)

In [29]:
input_shape = (IMG_SIZE[0], IMG_SIZE[1], 1)

data_augmentation = tf.keras.Sequential([
    layers.RandomRotation(0.1),
    layers.RandomTranslation(0.1, 0.1),
    layers.RandomZoom(0.1),
])

model = models.Sequential([
    layers.Input(shape=input_shape),
    data_augmentation,

    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # binary output
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

Train

In [30]:
EPOCHS = 50

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)

Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 248ms/step - accuracy: 0.7442 - loss: 0.6336 - val_accuracy: 0.7059 - val_loss: 0.5982
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 245ms/step - accuracy: 0.7559 - loss: 0.5146 - val_accuracy: 0.7059 - val_loss: 0.6027
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 246ms/step - accuracy: 0.7325 - loss: 0.5742 - val_accuracy: 0.7059 - val_loss: 0.6003
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 284ms/step - accuracy: 0.7325 - loss: 0.5349 - val_accuracy: 0.7059 - val_loss: 0.7372
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 183ms/step - accuracy: 0.7481 - loss: 0.6131 - val_accuracy: 0.7059 - val_loss: 0.7862
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 243ms/step - accuracy: 0.7364 - loss: 0.6681 - val_accuracy: 0.7059 - val_loss: 0.6170
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━

Evaluate on validation set

In [31]:
val_loss, val_acc = model.evaluate(val_ds)
print("Validation loss:", val_loss)
print("Validation accuracy:", val_acc)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step - accuracy: 0.7059 - loss: 0.6539
Validation loss: 0.6538763046264648
Validation accuracy: 0.7058823704719543


Collect probabilities & true labels for threshold tuning

In [32]:
y_true = []
y_prob = []

for images, labels in val_ds:
    probs = model.predict(images)
    probs = probs.reshape(-1)
    y_prob.extend(probs)
    y_true.extend(labels.numpy())

y_true = np.array(y_true)
y_prob = np.array(y_prob)

print("First 10 probs:", y_prob[:10])
print("First 10 true labels:", y_true[:10])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
First 10 probs: [0.86333853 0.8577392  0.8139435  0.8481167  0.8499319  0.8338654
 0.8139506  0.88708353 0.86143494 0.86577445]
First 10 true labels: [1 0 1 1 1 1 0 1 1 1]


Threshold tuning

In [33]:
thresholds = np.linspace(0.1, 0.9, 17)

best_thr = 0.5
best_acc = 0.0

print("\nThreshold sweep:")
for thr in thresholds:
    y_pred_thr = (y_prob >= thr).astype(int)
    acc  = accuracy_score(y_true, y_pred_thr)
    prec = precision_score(y_true, y_pred_thr, zero_division=0)
    rec  = recall_score(y_true, y_pred_thr, zero_division=0)
    print(f"thr={thr:.2f}  acc={acc:.3f}  precision(අ)={prec:.3f}  recall(අ)={rec:.3f}")

    if acc > best_acc:
        best_acc = acc
        best_thr = thr

print("\nBest threshold by accuracy:", best_thr, "acc:", best_acc)



Threshold sweep:
thr=0.10  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.15  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.20  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.25  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.30  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.35  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.40  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.45  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.50  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.55  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.60  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.65  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.70  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.75  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.80  acc=0.706  precision(අ)=0.706  recall(අ)=1.000
thr=0.85  acc=0.529  precision(අ)=0.700  recall(අ)=0.583
thr=0.90  acc=0.294  precision(අ)=0.000  recall(අ)=0.000

Best thresho

In [34]:
# You can choose to override this if you care more about higher precision for අ:
THRESHOLD = best_thr
print("Using THRESHOLD =", THRESHOLD)

# Final report with chosen threshold
y_pred_final = (y_prob >= THRESHOLD).astype(int)
print("\nClassification report (val set, tuned threshold):")
print(classification_report(y_true, y_pred_final, target_names=["Not අ", "අ"]))

print("Confusion matrix:")
print(confusion_matrix(y_true, y_pred_final))

Using THRESHOLD = 0.1

Classification report (val set, tuned threshold):
              precision    recall  f1-score   support

       Not අ       0.00      0.00      0.00         5
           අ       0.71      1.00      0.83        12

    accuracy                           0.71        17
   macro avg       0.35      0.50      0.41        17
weighted avg       0.50      0.71      0.58        17

Confusion matrix:
[[ 0  5]
 [ 0 12]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Single-image prediction helper

In [35]:
class_names = {1: "අ", 0: "Not අ"}

def preprocess_single_image(image_path):
    img = Image.open(image_path).convert("L")
    img = img.resize(IMG_SIZE)
    img = np.array(img).astype("float32") / 255.0
    img = np.expand_dims(img, axis=(0, -1))  # (1, H, W, 1)
    return img

def predict_image(image_path, threshold=THRESHOLD):
    img = preprocess_single_image(image_path)
    prob = model.predict(img)[0][0]
    label = 1 if prob >= threshold else 0
    return prob, class_names[label]

In [39]:
test_img = "/content/drive/MyDrive/Syncrones/testing/test_e.png"  # or from not folder
prob, pred = predict_image(test_img)
print("Predicted probability of අ:", prob)
print("Prediction:", pred)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Syncrones/testing/test_f.png'