In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Set random seed for reproducibility
np.random.seed(42)

# Define the HOGFeatureExtractor class
class HOGFeatureExtractor:
    def __init__(self, resize_shape=(64, 64), orientations=12, pixels_per_cell=(8, 8),
                 cells_per_block=(4, 4), block_norm='L2-Hys', n_components=256):
        self.resize_shape = resize_shape
        self.orientations = orientations
        self.pixels_per_cell = pixels_per_cell
        self.cells_per_block = cells_per_block
        self.block_norm = block_norm
        self.n_components = n_components
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=n_components)

    def preprocess_image(self, image_path):
        # Read image in grayscale
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            raise ValueError(f"Image {image_path} cannot be loaded.")
        # Resize image
        img = cv2.resize(img, self.resize_shape)
        return img

    def extract_hog_features(self, image):
        # Extract HOG features
        features = hog(
            image,
            orientations=self.orientations,
            pixels_per_cell=self.pixels_per_cell,
            cells_per_block=self.cells_per_block,
            block_norm=self.block_norm,
            visualize=False
        )
        return features

    def prepare_data_with_hog(self, file_paths):
        features = []
        for path in tqdm(file_paths, desc="Extracting HOG features"):
            try:
                img = self.preprocess_image(path)
                hog_features = self.extract_hog_features(img)
                features.append(hog_features)
            except Exception as e:
                print(f"Error processing image {path}: {e}")
                continue
        return np.array(features)

    def fit_scaler(self, X):
        self.scaler.fit(X)
        return self.scaler.transform(X)

    def transform_scaler(self, X):
        return self.scaler.transform(X)

    def fit_pca(self, X):
        self.pca.fit(X)
        return self.pca.transform(X)

    def transform_pca(self, X):
        return self.pca.transform(X)

# Load train.csv and test.csv
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Assuming 'file_path' columns are 'im_name' in your CSV files
train_image_paths = train_df['im_name'].apply(lambda x: os.path.join('train_ims', x))
test_image_paths = test_df['im_name'].apply(lambda x: os.path.join('test_ims', x))

# Extract labels for training data
y_train = train_df['label'].values

# Initialize HOGFeatureExtractor
hog_extractor = HOGFeatureExtractor(
    resize_shape=(64, 64),
    orientations=12,
    pixels_per_cell=(8, 8),
    cells_per_block=(4, 4),
    block_norm='L2-Hys',
    n_components=256
)

# Extract HOG features for training data
print("Processing training data...")
X_train_features = hog_extractor.prepare_data_with_hog(train_image_paths)

# Extract HOG features for test data
print("Processing test data...")
X_test_features = hog_extractor.prepare_data_with_hog(test_image_paths)

# Fit and transform scaler on training data
X_train_scaled = hog_extractor.fit_scaler(X_train_features)
# Transform scaler on test data
X_test_scaled = hog_extractor.transform_scaler(X_test_features)

# Fit and transform PCA on training data
X_train_pca = hog_extractor.fit_pca(X_train_scaled)
# Transform PCA on test data
X_test_pca = hog_extractor.transform_pca(X_test_scaled)

# Initialize SVM model
svc_model = SVC(kernel='rbf', C=5, random_state=42)
# Train the model on the full training data
print("Training SVM model...")
svc_model.fit(X_train_pca, y_train)

# Make predictions on test data
print("Making predictions on test data...")
test_predictions = svc_model.predict(X_test_pca)

# Prepare submission DataFrame
submission = test_df.copy()
submission['label'] = test_predictions

# Save submission file
submission.to_csv('submission.csv', index=False)
print("Submission file saved as 'submission.csv'")

Processing training data...


Extracting HOG features: 100%|██████████| 50000/50000 [00:28<00:00, 1727.35it/s]


Processing test data...


Extracting HOG features: 100%|██████████| 10000/10000 [00:05<00:00, 1715.96it/s]


Training SVM model...
Making predictions on test data...
Submission file saved as 'submission.csv'
