<a href="https://colab.research.google.com/github/byhaqii/Machine-Learning-2025/blob/main/PBL_Train_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
DATASET_PATH = "/content/drive/MyDrive/Fruit-Market"

IMG_SIZE = (128, 128)

# Import Libraries & Define Feature Extraction Functions

In [3]:
import os
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops, hog
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import joblib

# --- FEATURE EXTRACTION FUNCTIONS ---
def extract_hsv_features(image):
    # Extract Color features (Mean and Std Dev)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mean = np.mean(hsv, axis=(0, 1))
    std = np.std(hsv, axis=(0, 1))
    return np.concatenate([mean, std])

def extract_glcm_features(image):
    # Extract Texture features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray, [1], [0], 256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    return np.array([contrast, energy, homogeneity, correlation])

def extract_hog_features(image):
    # Extract Shape features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features = hog(gray, orientations=9, pixels_per_cell=(16, 16),
                   cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)
    return features

def extract_all_features(image_path):
    img = cv2.imread(image_path)
    if img is None: return None

    # Resize using the IMG_SIZE you defined in the previous cell
    img = cv2.resize(img, IMG_SIZE)

    # Combine 3 methods (Color, Texture, Shape)
    hsv = extract_hsv_features(img)
    glcm = extract_glcm_features(img)
    hog_feats = extract_hog_features(img)

    return np.hstack([hsv, glcm, hog_feats])

# Data Processing Loop

In [4]:
print(f"Reading data from: {DATASET_PATH}")

data_features = []
data_labels = []

if not os.path.exists(DATASET_PATH):
    print("ERROR: Folder path not found!")
else:
    # Loop through each fruit folder
    for label in os.listdir(DATASET_PATH):
        folder_path = os.path.join(DATASET_PATH, label)

        if os.path.isdir(folder_path):
            print(f"--> Processing class: {label}...")
            for file_name in os.listdir(folder_path):
                # Ensure we only read images
                if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, file_name)
                    features = extract_all_features(img_path)

                    if features is not None:
                        data_features.append(features)
                        data_labels.append(label)

    print("\nFeature extraction complete!")
    print(f"Total Images Collected: {len(data_features)}")

Reading data from: /content/drive/MyDrive/Fruit-Market
--> Processing class: Bali Orange...
--> Processing class: Peach...
--> Processing class: Tomato...
--> Processing class: Apple...

Feature extraction complete!
Total Images Collected: 1189


# Split and Normalize Data

In [5]:
# Convert list to numpy array
X = np.array(data_features)
y = np.array(data_labels)

# Split Data (80% Training, 20% Testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Data is ready for training.")
print(f"Training set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")

Data is ready for training.
Training set size: 951
Testing set size: 238


# Train SVM and Evaluate

In [6]:
print("Training SVM model... (Please wait)")

# Create SVM model (Linear kernel is usually best for HOG features)
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)

print("\n=== ACCURACY RESULTS ===")
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\nDetailed Report:")
print(classification_report(y_test, y_pred))

Training SVM model... (Please wait)

=== ACCURACY RESULTS ===
Accuracy: 78.99%

Detailed Report:
              precision    recall  f1-score   support

       Apple       0.66      0.73      0.69        62
 Bali Orange       0.85      0.87      0.86        93
       Peach       0.98      0.87      0.92        52
      Tomato       0.59      0.55      0.57        31

    accuracy                           0.79       238
   macro avg       0.77      0.75      0.76       238
weighted avg       0.80      0.79      0.79       238



# Save the Models

In [7]:
import os
import joblib

# 1. Define your Drive folder path
# This is the same folder where your images are
destination_folder = "/content/drive/MyDrive/Fruit-Market"

# 2. Define the full file names
model_path = os.path.join(destination_folder, 'model_fruit_svm.pkl')
scaler_path = os.path.join(destination_folder, 'scaler_fruit.pkl')

# 3. Save directly to Google Drive
print(f"Saving model to {destination_folder}...")

joblib.dump(model, model_path)
joblib.dump(scaler, scaler_path)

print("SUCCESS!")
print(f"Model saved at: {model_path}")
print(f"Scaler saved at: {scaler_path}")
print("Check your Google Drive folder now, the files should be there.")

Saving model to /content/drive/MyDrive/Fruit-Market...
SUCCESS!
Model saved at: /content/drive/MyDrive/Fruit-Market/model_fruit_svm.pkl
Scaler saved at: /content/drive/MyDrive/Fruit-Market/scaler_fruit.pkl
Check your Google Drive folder now, the files should be there.
