<a href="https://colab.research.google.com/github/ashen-pabasara/fabric-defect-detection/blob/main/model_train_svm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Configuration**

In [None]:
import os
import cv2
import numpy as np
import joblib
from skimage.feature import hog
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
from google.colab import drive

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

print("Google Drive mounted successfully!")

Mounted at /content/drive
Google Drive mounted successfully!


In [None]:
# Define Paths
zip_path = '/content/drive/My Drive/EEY9536 Data Science Project/Dataset/fabric_dataset.zip'
unzip_destination = '/content/'
dataset_root = '/content/fabric_dataset'

In [None]:
# Unzip the dataset
if not os.path.exists(dataset_root):
    print(f"Starting to unzip {zip_path}...")
    !unzip -q "{zip_path}" -d "{unzip_destination}"
    print(f"Dataset unzipped successfully to {dataset_root}")
else:
    print(f"Dataset already exists at {dataset_root}, skipping unzip.")

Starting to unzip /content/drive/My Drive/EEY9536 Data Science Project/Dataset/fabric_dataset.zip...
Dataset unzipped successfully to /content/fabric_dataset


In [None]:
# Configuration for SVM
IMG_SIZE = (128, 128) # Downsize images to speed up HOG

def process_dataset(subset_name):
    """
    Reads images from the folder, extracts HOG features, and assigns labels.
    subset_name: 'train' or 'val'
    """
    print(f"\n--- Processing {subset_name} data ---")

    image_dir = os.path.join(dataset_root, "images", subset_name)
    label_dir = os.path.join(dataset_root, "labels", subset_name)

    features_list = []
    labels_list = []

    # Get all image files
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    for filename in tqdm(image_files, desc=f"Loading {subset_name}"):
        # Read and Resize Image
        img_path = os.path.join(image_dir, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # SVM/HOG uses Grayscale

        if img is None: continue
        img_resized = cv2.resize(img, IMG_SIZE)

        # Extract HOG Features
        hog_features = hog(img_resized,
                           orientations=9,
                           pixels_per_cell=(8, 8),
                           cells_per_block=(2, 2),
                           visualize=False)

        # Determine Label
        txt_filename = filename.replace('.jpg', '.txt')
        txt_path = os.path.join(label_dir, txt_filename)

        if os.path.exists(txt_path) and os.path.getsize(txt_path) > 0:
            label = 1
        else:
            label = 0

        features_list.append(hog_features)
        labels_list.append(label)

    return np.array(features_list), np.array(labels_list)

# **Model Training - Baseline**

In [None]:
# Load Training and Validation Data
X_train, y_train = process_dataset('train')
X_val, y_val = process_dataset('val')

print(f"\nTraining set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")


--- Processing train data ---


Loading train: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 7828/7828 [01:53<00:00, 69.15it/s] 



--- Processing val data ---


Loading val: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3948/3948 [00:56<00:00, 69.40it/s]



Training set shape: (7828, 8100)
Validation set shape: (3948, 8100)


In [None]:
# Train SVM baseline model

print("\nðŸš€ Training SVM Model (LinearSVC)...")

svm_model = LinearSVC(random_state=42, max_iter=3000)
svm_model.fit(X_train, y_train)
print("Training Complete!")


ðŸš€ Training SVM Model (LinearSVC)...
Training Complete!




In [None]:

# Evaluation

print("\nðŸ“Š Evaluating Model...")
y_pred = svm_model.predict(X_val)

# Calculate Accuracy
acc = accuracy_score(y_val, y_pred)
print(f"SVM Baseline Accuracy: {acc:.4f}")

# Print Report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Normal", "Defect"]))


ðŸ“Š Evaluating Model...
SVM Baseline Accuracy: 0.7112

Classification Report:
              precision    recall  f1-score   support

      Normal       0.80      0.82      0.81      2974
      Defect       0.41      0.37      0.39       974

    accuracy                           0.71      3948
   macro avg       0.60      0.60      0.60      3948
weighted avg       0.70      0.71      0.71      3948



In [None]:
# Save the model
drive_save_dir = '/content/drive/My Drive/EEY9536 Data Science Project/Dataset/'
model_filename = 'fabric_svm_baseline.pkl'
save_path = os.path.join(drive_save_dir, model_filename)

print(f"\nSaving model to Google Drive at: {save_path}...")
joblib.dump(svm_model, save_path)
print("Model saved successfully!")


Saving model to Google Drive at: /content/drive/My Drive/EEY9536 Data Science Project/Dataset/fabric_svm.pkl...
Model saved successfully!


# **Model Training - Balanced**

In [None]:
# Train SVM baseline model

print("\nðŸš€ Training SVM Model (LinearSVC)...")

svm_model_balanced = LinearSVC(random_state=42, max_iter=3000, verbose=1, class_weight='balanced')
svm_model_balanced.fit(X_train, y_train)
print("Training Complete!")


ðŸš€ Training SVM Model (LinearSVC)...
[LibLinear]Training Complete!




In [None]:
# Evaluation

print("\nðŸ“Š Evaluating Model...")
y_pred = svm_model_balanced.predict(X_val)

# Calculate Accuracy
acc = accuracy_score(y_val, y_pred)
print(f"SVM balance Accuracy: {acc:.4f}")

# Print Report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Normal", "Defect"]))


ðŸ“Š Evaluating Model...
SVM balance Accuracy: 0.6702

Classification Report:
              precision    recall  f1-score   support

      Normal       0.81      0.74      0.77      2974
      Defect       0.36      0.45      0.40       974

    accuracy                           0.67      3948
   macro avg       0.59      0.60      0.59      3948
weighted avg       0.70      0.67      0.68      3948



In [None]:
# Save the model
drive_save_dir = '/content/drive/My Drive/EEY9536 Data Science Project/Dataset/'
model_filename = 'fabric_svm_balanced.pkl'
save_path = os.path.join(drive_save_dir, model_filename)

print(f"\nSaving model to Google Drive at: {save_path}...")
joblib.dump(svm_model, save_path)
print("Model saved successfully!")


Saving model to Google Drive at: /content/drive/My Drive/EEY9536 Data Science Project/Dataset/fabric_svm_balanced.pkl...
Model saved successfully!
