<a href="https://colab.research.google.com/github/aweeeaja/Pengenalan-Pola/blob/main/Pengenalan_Digit_Tulisan_Tangan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from skimage.feature import hog

def load_images_from_folder(folder, img_size=(64, 64)):
    images = []
    labels = []
    for subdir, _, files in os.walk(folder):
        for file in files:
            img_path = os.path.join(subdir, file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, img_size)
                images.append(img)
                label = int(file.split('_')[0])  # Assuming label is the first part of the filename before '_'
                labels.append(label)
    return np.array(images), np.array(labels)

def extract_features_sift(images):
    sift = cv2.SIFT_create()
    descriptors_list = []
    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors_list.append(descriptors.flatten())
        else:
            descriptors_list.append(np.zeros(sift.descriptorSize()))
    return pad_descriptors(descriptors_list, sift.descriptorSize())

def extract_features_orb(images):
    orb = cv2.ORB_create()
    descriptors_list = []
    for img in images:
        keypoints, descriptors = orb.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors_list.append(descriptors.flatten())
        else:
            descriptors_list.append(np.zeros(32))
    return pad_descriptors(descriptors_list, 32)

def extract_features_hog(images):
    hog_features = []
    for img in images:
        features, _ = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
        hog_features.append(features)
    return np.array(hog_features)

def pad_descriptors(descriptors, size):
    max_length = max([desc.shape[0] for desc in descriptors])
    padded_descriptors = []
    for desc in descriptors:
        if desc.shape[0] < max_length:
            padding = np.zeros(max_length - desc.shape[0])
            desc = np.hstack((desc, padding))
        padded_descriptors.append(desc)
    return np.array(padded_descriptors)

def preprocess_and_extract_features(feature_extractor, images):
    images_normalized = [cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype('uint8') for img in images]
    descriptors_list = feature_extractor(images_normalized)
    scaler = StandardScaler().fit(descriptors_list)
    descriptors_list = scaler.transform(descriptors_list).astype(np.float32)
    return descriptors_list

# Load images and labels
data_folder = '/content/drive/MyDrive/Pepo/DataSet_05'  # Change to the path to your dataset
X, y = load_images_from_folder(data_folder)


In [None]:
# Extract HOG features
X_hog = preprocess_and_extract_features(extract_features_hog, X)

# Split data
X_train_hog, X_test_hog, y_train_hog, y_test_hog = train_test_split(X_hog, y, test_size=0.3, random_state=42)

# Train and evaluate models for HOG
models = {
    "SVM": SVC(gamma='auto'),
    "KNN": KNeighborsClassifier(n_neighbors=3),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_hog = {}
for name, model in models.items():
    model.fit(X_train_hog, y_train_hog)
    y_pred = model.predict(X_test_hog)
    accuracy = accuracy_score(y_test_hog, y_pred)
    results_hog[name] = accuracy

print("HOG Results:")
for model, accuracy in results_hog.items():
    print(f"HOG + {model} Accuracy:", accuracy)


HOG Results:
HOG + SVM Accuracy: 0.9642541924095323
HOG + KNN Accuracy: 0.9232127096204766
HOG + Random Forest Accuracy: 0.9655781112091791
HOG + Gradient Boosting Accuracy: 0.9523389232127096


In [None]:
# Extract HOG features
X_hog = preprocess_and_extract_features(extract_features_hog, X)

# Split data
X_train_hog, X_test_hog, y_train_hog, y_test_hog = train_test_split(X_hog, y, test_size=0.4, random_state=42)

# Train and evaluate models for HOG
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_hog = {}
for name, model in models.items():
    model.fit(X_train_hog, y_train_hog)
    y_pred = model.predict(X_test_hog)
    accuracy = accuracy_score(y_test_hog, y_pred)
    results_hog[name] = accuracy

print("HOG Results:")
for model, accuracy in results_hog.items():
    print(f"HOG + {model} Accuracy:", accuracy)


HOG Results:
HOG + SVM Accuracy: 0.9626075446724024


In [None]:
# Extract HOG features
X_hog = preprocess_and_extract_features(extract_features_hog, X)

# Split data
X_train_hog, X_test_hog, y_train_hog, y_test_hog = train_test_split(X_hog, y, test_size=0.2, random_state=42)

# Train and evaluate models for HOG
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_hog = {}
for name, model in models.items():
    model.fit(X_train_hog, y_train_hog)
    y_pred = model.predict(X_test_hog)
    accuracy = accuracy_score(y_test_hog, y_pred)
    results_hog[name] = accuracy

print("HOG Results:")
for model, accuracy in results_hog.items():
    print(f"HOG + {model} Accuracy:", accuracy)


HOG Results:
HOG + SVM Accuracy: 0.9629384513567174


In [None]:
# Extract HOG features
X_hog = preprocess_and_extract_features(extract_features_hog, X)

# Split data
X_train_hog, X_test_hog, y_train_hog, y_test_hog = train_test_split(X_hog, y, test_size=0.4, random_state=42)

# Train and evaluate models for HOG
models = {
    # "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_hog = {}
for name, model in models.items():
    model.fit(X_train_hog, y_train_hog)
    y_pred = model.predict(X_test_hog)
    accuracy = accuracy_score(y_test_hog, y_pred)
    results_hog[name] = accuracy

print("HOG Results:")
for model, accuracy in results_hog.items():
    print(f"HOG + {model} Accuracy:", accuracy)


HOG Results:
HOG + Random Forest Accuracy: 0.9665784248841827


In [None]:
# Extract HOG features
X_hog = preprocess_and_extract_features(extract_features_hog, X)

# Split data
X_train_hog, X_test_hog, y_train_hog, y_test_hog = train_test_split(X_hog, y, test_size=0.2, random_state=42)

# Train and evaluate models for HOG
models = {
    # "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_hog = {}
for name, model in models.items():
    model.fit(X_train_hog, y_train_hog)
    y_pred = model.predict(X_test_hog)
    accuracy = accuracy_score(y_test_hog, y_pred)
    results_hog[name] = accuracy

print("HOG Results:")
for model, accuracy in results_hog.items():
    print(f"HOG + {model} Accuracy:", accuracy)


HOG Results:
HOG + Random Forest Accuracy: 0.9636002647253474


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.3, random_state=42)

# Train and evaluate models for flattened images
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + SVM Accuracy: 0.8790820829655781


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.4, random_state=42)

# Train and evaluate models for flattened images
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + SVM Accuracy: 0.8749172733289212


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.2, random_state=42)

# Train and evaluate models for flattened images
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + SVM Accuracy: 0.8894771674387822


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.4, random_state=42)

# Train and evaluate models for flattened images
models = {
    # "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + Random Forest Accuracy: 0.8504301786896096


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.3, random_state=42)

# Train and evaluate models for flattened images
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + SVM Accuracy: 0.8790820829655781


In [None]:
# Flatten images for no feature extraction scenario
X_flattened = [img.flatten() for img in X]
X_flattened = np.array(X_flattened).astype(np.float32)
scaler_flattened = StandardScaler().fit(X_flattened)
X_flattened = scaler_flattened.transform(X_flattened).astype(np.float32)

# Split data
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, y, test_size=0.2, random_state=42)

# Train and evaluate models for flattened images
models = {
    "SVM": SVC(gamma='auto'),
    # "KNN": KNeighborsClassifier(n_neighbors=3),
    # "Random Forest": RandomForestClassifier(n_estimators=100),
    # "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

results_flat = {}
for name, model in models.items():
    model.fit(X_train_flat, y_train_flat)
    y_pred = model.predict(X_test_flat)
    accuracy = accuracy_score(y_test_flat, y_pred)
    results_flat[name] = accuracy

print("No Feature Extraction Results:")
for model, accuracy in results_flat.items():
    print(f"No Feature Extraction + {model} Accuracy:", accuracy)


No Feature Extraction Results:
No Feature Extraction + SVM Accuracy: 0.8894771674387822
