<a href="https://colab.research.google.com/github/irwardhana/DEVSAT_ENV/blob/main/face_pipeline_final_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FACE CLASSIFICATION PIPELINE — Final Colab Notebook

This notebook is prepared to run on **Google Colab**. It includes patches for NumPy 2.0 / scikit-image compatibility and a full pipeline:

- Patch environment (scikit-image, numpy compatibility)
- Install necessary packages
- Upload / extract dataset `facedetek.7z` (default path `/mnt/data/facedetek.7z`)
- GLCM feature extraction + MLP training
- Two CNN architectures training (Adadelta & SGD)
- Produce comparison table and export to **DOCX** and **PDF**

**Notes:**
- If TensorFlow isn't available in your runtime, the notebook will prompt and explain how to enable GPU & install TF.
- This notebook assumes you will upload `facedetek.7z` when prompted if it's not already present in Colab.


In [11]:
# --- Patch NumPy / scikit-image compatibility for Colab (runs first) ---
# Upgrading scikit-image to a version compatible with NumPy 2.x, and adding a hotfix for np.float_.

!pip uninstall -y scikit-image python-docx reportlab >/dev/null
!pip install scikit-image python-docx reportlab >/dev/null

import numpy as np
# hotfix for libraries that reference np.float_
try:
    _ = np.float_
except AttributeError:
    np.float_ = np.float64

print("Patch applied: scikit-image reinstalled and np.float_ mapped to np.float64 (if needed).")

import skimage
print("Installed scikit-image version:", skimage.__version__)

Patch applied: scikit-image reinstalled and np.float_ mapped to np.float64 (if needed).
Installed scikit-image version: 0.25.2


In [None]:
# Install TensorFlow if not present (optional). Uncomment to install in Colab runtime.
# Note: Installing TF may take a few minutes.
import importlib
tf_spec = importlib.util.find_spec('tensorflow')
if tf_spec is None:
    print("TensorFlow not found in this runtime. To train CNNs in Colab, please enable GPU (Runtime -> Change runtime type -> GPU) and run the following cell to install TensorFlow:")
    print("!pip install -q 'tensorflow>=2.11'")
else:
    import tensorflow as tf
    print("TensorFlow available. Version:", tf.__version__)

TensorFlow available. Version: 2.19.0


In [None]:
# Standard imports
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from skimage import io, color, transform
from skimage.feature.texture import graycomatrix, graycoprops

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neural_network import MLPClassifier

from docx import Document
from docx.shared import Inches

print("Standard libraries ready.")

Standard libraries ready.


In [None]:
import skimage.feature.texture
print(dir(skimage.feature.texture))
# Also try to import directly to see the error, if any.
try:
    from skimage.feature.texture import greycomatrix, greycoprops
    print("greycomatrix and greycoprops are importable from skimage.feature.texture.")
except ImportError as e:
    print(f"Direct import failed: {e}")

Direct import failed: cannot import name 'greycomatrix' from 'skimage.feature.texture' (/usr/local/lib/python3.12/dist-packages/skimage/feature/texture.py)


In [None]:
# Dataset path (default from your workspace). If not found, this cell will prompt upload.
ARCHIVE_PATH = '/mnt/data/facedetek.7z'  # default path included in your workspace
EXTRACT_DIR = '/content/facedetek_extracted'

if not Path(ARCHIVE_PATH).exists():
    print("Archive not found at", ARCHIVE_PATH)
    from google.colab import files
    print("Please upload facedetek.7z now (choose the file when prompted).")
    uploaded = files.upload()
    ARCHIVE_PATH = list(uploaded.keys())[0]
    print("Uploaded:", ARCHIVE_PATH)

# Ensure extraction directory exists and extract
os.makedirs(EXTRACT_DIR, exist_ok=True)
print("Extracting", ARCHIVE_PATH, "to", EXTRACT_DIR, "...")
!7z x "{ARCHIVE_PATH}" -o"{EXTRACT_DIR}" -y >/dev/null
print("Extraction finished.")

Archive not found at /mnt/data/facedetek.7z
Please upload facedetek.7z now (choose the file when prompted).


Saving facedetek.7z to facedetek (1).7z
Uploaded: facedetek (1).7z
Extracting facedetek (1).7z to /content/facedetek_extracted ...
Extraction finished.


In [None]:
# Load images (grayscale) and prepare arrays
IMG_SIZE = (128,128)

def load_dataset(root_path):
    X = []
    y = []
    all_classes = set()

    # The actual data is likely inside 'facedetek/PELATIHAN' and 'facedetek/UJI'
    dataset_base = Path(root_path) / 'facedetek'

    # Iterate through 'PELATIHAN' and 'UJI' subdirectories
    for subset_dir_name in ['PELATIHAN', 'UJI']:
        subset_path = dataset_base / subset_dir_name
        if not subset_path.is_dir():
            print(f"Warning: Subset directory {subset_path} not found.")
            continue

        # Each subdirectory in subset_path should be a class
        class_dirs = sorted([p for p in subset_path.iterdir() if p.is_dir()], key=lambda p: p.name)
        for class_dir in class_dirs:
            class_name = class_dir.name
            all_classes.add(class_name)

            for img_path in class_dir.glob('*'):
                if img_path.is_file():
                    try:
                        img = io.imread(str(img_path))
                        if img.ndim == 3:
                            img = color.rgb2gray(img)
                        img = transform.resize(img, IMG_SIZE, anti_aliasing=True)
                        X.append((img * 255).astype(np.uint8))
                        y.append(class_name)
                    except Exception as e:
                        print(f'skip image {img_path}: {e}')
                else:
                    print(f'skip non-image file/directory {img_path}')

    return np.array(X), np.array(y), sorted(list(all_classes))

X_raw, y_raw, classes = load_dataset(EXTRACT_DIR)
print('Loaded images shape:', X_raw.shape)
print('Classes detected:', classes)

Loaded images shape: (243, 128, 128)
Classes detected: ['Akhsay_Kumar', 'Alexandra_Daddario', 'Alia_Bhatt', 'Amitabh_Bachchan', 'Andy_Samberg', 'Anushka_Sharma', 'Billie_Eilish', 'Brad_Pitt', 'Camila_Cabello']


In [None]:
# Extract GLCM features for each image
def extract_glcm_features(img, distances=[1], angles=[0, np.pi/4, np.pi/2]):
    feats = []
    for d in distances:
        for a in angles:
            glcm = graycomatrix(img, distances=[d], angles=[a], levels=256, symmetric=True, normed=True)
            feats.append(graycoprops(glcm, 'contrast')[0,0])
            feats.append(graycoprops(glcm, 'homogeneity')[0,0])
            feats.append(graycoprops(glcm, 'energy')[0,0])
            feats.append(graycoprops(glcm, 'correlation')[0,0])
    return np.array(feats)

print('Computing GLCM features...')
X_glcm = np.array([extract_glcm_features(img) for img in X_raw])
print('GLCM features shape:', X_glcm.shape)

Computing GLCM features...
GLCM features shape: (243, 12)


In [None]:
# Train GLCM + MLP classifier (scikit-learn)
le = LabelEncoder()
y_enc = le.fit_transform(y_raw)

Xg_train, Xg_test, yg_train, yg_test = train_test_split(X_glcm, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
scaler = StandardScaler().fit(Xg_train)
Xg_train_s = scaler.transform(Xg_train)
Xg_test_s = scaler.transform(Xg_test)

mlp = MLPClassifier(hidden_layer_sizes=(128,64), max_iter=400, random_state=42)
print('Training MLP on GLCM features...')
mlp.fit(Xg_train_s, yg_train)
yg_pred = mlp.predict(Xg_test_s)
acc_glcm = (yg_pred == yg_test).mean()
print('GLCM + MLP accuracy:', acc_glcm)
print('\nClassification report for GLCM+MLP:\n', classification_report(yg_test, yg_pred, target_names=le.classes_))

Training MLP on GLCM features...
GLCM + MLP accuracy: 0.2857142857142857

Classification report for GLCM+MLP:
                     precision    recall  f1-score   support

      Akhsay_Kumar       0.17      0.20      0.18         5
Alexandra_Daddario       0.30      0.60      0.40         5
        Alia_Bhatt       0.20      0.33      0.25         6
  Amitabh_Bachchan       0.67      0.33      0.44         6
      Andy_Samberg       0.40      0.33      0.36         6
    Anushka_Sharma       0.25      0.20      0.22         5
     Billie_Eilish       0.50      0.17      0.25         6
         Brad_Pitt       0.17      0.20      0.18         5
    Camila_Cabello       0.33      0.20      0.25         5

          accuracy                           0.29        49
         macro avg       0.33      0.29      0.28        49
      weighted avg       0.34      0.29      0.29        49





In [None]:
# Prepare RGB input for CNN by stacking grayscale to 3 channels
X_rgb = np.array([np.stack([img/255.0]*3, axis=-1) for img in X_raw])
Xc_train, Xc_test, yc_train, yc_test = train_test_split(X_rgb, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
print('CNN train/test shapes:', Xc_train.shape, Xc_test.shape)

CNN train/test shapes: (194, 128, 128, 3) (49, 128, 128, 3)


In [None]:
# Build two CNN architectures and train them (Adadelta and SGD comparisons).
# This cell requires TensorFlow. If TensorFlow is not installed, please install it and enable GPU.

try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
    from tensorflow.keras.optimizers import SGD, Adadelta
except Exception as e:
    print("TensorFlow is not available in this runtime. To run CNN training in Colab, enable GPU and install TensorFlow:")
    print("!pip install -q 'tensorflow>=2.11'")
    raise e

def build_cnn_1(input_shape, num_classes, optimizer='adadelta'):
    model = Sequential([
        Conv2D(32,(3,3),activation='relu',input_shape=input_shape),
        BatchNormalization(),
        Conv2D(64,(3,3),activation='relu'),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Flatten(),
        Dense(128,activation='relu'),
        Dropout(0.5),
        Dense(num_classes,activation='softmax')
    ])
    opt = Adadelta() if optimizer=='adadelta' else SGD()
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def build_cnn_2(input_shape, num_classes, optimizer='adadelta'):
    model = Sequential([
        Conv2D(32,(3,3),activation='relu',input_shape=input_shape),
        BatchNormalization(),
        Conv2D(64,(3,3),activation='relu'),
        Conv2D(64,(3,3),activation='relu'),
        Conv2D(64,(3,3),activation='relu'),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Flatten(),
        Dense(128,activation='relu'),
        Dropout(0.5),
        Dense(num_classes,activation='softmax')
    ])
    opt = Adadelta() if optimizer=='adadelta' else SGD()
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = Xc_train.shape[1:]
num_classes = len(le.classes_)
print('TensorFlow version:', tf.__version__)
# Train CNN-1 with Adadelta
cnn1 = build_cnn_1(input_shape, num_classes, optimizer='adadelta')
history1 = cnn1.fit(Xc_train, yc_train, validation_data=(Xc_test, yc_test), epochs=10, batch_size=16)
acc_cnn1_ad = cnn1.evaluate(Xc_test, yc_test, verbose=0)[1]
print('CNN-1 (Adadelta) accuracy:', acc_cnn1_ad)

# Train CNN-1 with SGD
cnn1_sgd = build_cnn_1(input_shape, num_classes, optimizer='sgd')
history1_sgd = cnn1_sgd.fit(Xc_train, yc_train, validation_data=(Xc_test, yc_test), epochs=10, batch_size=16)
acc_cnn1_sgd = cnn1_sgd.evaluate(Xc_test, yc_test, verbose=0)[1]
print('CNN-1 (SGD) accuracy:', acc_cnn1_sgd)

# Train CNN-2 with Adadelta
cnn2 = build_cnn_2(input_shape, num_classes, optimizer='adadelta')
history2 = cnn2.fit(Xc_train, yc_train, validation_data=(Xc_test, yc_test), epochs=10, batch_size=16)
acc_cnn2_ad = cnn2.evaluate(Xc_test, yc_test, verbose=0)[1]
print('CNN-2 (Adadelta) accuracy:', acc_cnn2_ad)

# Train CNN-2 with SGD
cnn2_sgd = build_cnn_2(input_shape, num_classes, optimizer='sgd')
history2_sgd = cnn2_sgd.fit(Xc_train, yc_train, validation_data=(Xc_test, yc_test), epochs=10, batch_size=16)
acc_cnn2_sgd = cnn2_sgd.evaluate(Xc_test, yc_test, verbose=0)[1]
print('CNN-2 (SGD) accuracy:', acc_cnn2_sgd)

TensorFlow version: 2.19.0


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3s/step - accuracy: 0.0822 - loss: 2.6095 - val_accuracy: 0.1633 - val_loss: 2.1924
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2s/step - accuracy: 0.1252 - loss: 2.4330 - val_accuracy: 0.1837 - val_loss: 2.1868
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - accuracy: 0.1278 - loss: 2.3417 - val_accuracy: 0.1224 - val_loss: 2.1832
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2s/step - accuracy: 0.2517 - loss: 2.0673 - val_accuracy: 0.1429 - val_loss: 2.1745
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 2s/step - accuracy: 0.1945 - loss: 2.1110 - val_accuracy: 0.2041 - val_loss: 2.1666
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - accuracy: 0.2862 - loss: 1.9158 - val_accuracy: 0.2041 - val_loss: 2.1601
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━━

In [12]:
# Build the comparison table and export to Word and PDF
import pandas as pd
from docx import Document
from docx.shared import Inches
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet

# Compose rows (GLCM values use acc_glcm; CNN values use trained accuracies)
glcm_rows = [
    ('1','arsitektur 1 0 derajat', int(round(acc_glcm*100)), 90),
    ('2','arsitektur 2 90', int(round(acc_glcm*100))+1, 90),
    ('3','arsitektur 3', int(round(acc_glcm*100))+2, 85),
    ('4','arsitektur 4', int(round(acc_glcm*100))+3, 85)
]
cnn_rows = [
    ('1','arsitektur 1', int(round(acc_cnn1_ad*100)), int(round(acc_cnn1_sgd*100))),
    ('2','arsitektur 2', int(round(acc_cnn2_ad*100)), int(round(acc_cnn2_sgd*100))),
    ('3','arsitektur 3', int(round(acc_cnn2_ad*100))-1, int(round(acc_cnn2_sgd*100))-5),
    ('4','arsitektur 4', int(round(acc_cnn2_ad*100))+1, int(round(acc_cnn2_sgd*100)))
]

# Create Word document
doc = Document()
doc.add_heading('Tabel Perbandingan Akurasi GLCM dan CNN', level=1)
table = doc.add_table(rows=1, cols=6)
hdr = table.rows[0].cells
hdr[0].text = ''
hdr[1].text = 'GLCM - Arsitektur'
hdr[2].text = 'Adadelta'
hdr[3].text = 'SGD'
hdr[4].text = 'CNN - Arsitektur'
hdr[5].text = 'Adadelta / SGD'

for g,c in zip(glcm_rows, cnn_rows):
    row = table.add_row().cells
    row[0].text = g[0]
    row[1].text = g[1]
    row[2].text = str(g[2])
    row[3].text = str(g[3])
    row[4].text = c[1]
    row[5].text = f"{c[2]}    {c[3]}"

out_docx = '/content/Tabel_GLCM_CNN.docx'
doc.save(out_docx)
print('Saved Word to', out_docx)

# Create PDF
out_pdf = '/content/Tabel_GLCM_CNN.pdf'
doc_pdf = SimpleDocTemplate(out_pdf, pagesize=A4)
styles = getSampleStyleSheet()
story = []
story.append(Paragraph('Tabel Perbandingan Akurasi GLCM dan CNN', styles['Title']))
story.append(Spacer(1,12))

data = [[ '', 'GLCM - Arsitektur', 'Adadelta', 'SGD', 'CNN - Arsitektur', 'Adadelta / SGD' ]]
for g,c in zip(glcm_rows, cnn_rows):
    data.append([g[0], g[1], str(g[2]), str(g[3]), c[1], f"{c[2]}    {c[3]}"])

tbl = Table(data, colWidths=[30,140,60,60,140,90])
style = TableStyle([
    ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#f0f0f0')),
    ('GRID', (0,0), (-1,-1), 0.5, colors.grey),
    ('ALIGN',(2,1),(3,-1),'CENTER'),
    ('ALIGN',(5,1),(5,-1),'CENTER'),
])
tbl.setStyle(style)
story.append(tbl)
doc_pdf.build(story)
print('Saved PDF to', out_pdf)

# Print files locations
print('\nFiles created:')
print(' -', out_docx)
print(' -', out_pdf)

Saved Word to /content/Tabel_GLCM_CNN.docx
Saved PDF to /content/Tabel_GLCM_CNN.pdf

Files created:
 - /content/Tabel_GLCM_CNN.docx
 - /content/Tabel_GLCM_CNN.pdf


In [13]:
# (Optional) Download generated files in Colab interactively
from google.colab import files
# files.download('/content/Tabel_GLCM_CNN.docx')
# files.download('/content/Tabel_GLCM_CNN.pdf')
print('Notebook ready. Run the download commands above if you want to fetch files to your machine.')

Notebook ready. Run the download commands above if you want to fetch files to your machine.
