In [24]:
import os
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import joblib


In [28]:
import os
import cv2
import numpy as np
import joblib
from skimage.feature import hog
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.decomposition import PCA

def extract_hog_features(image):
    hog_features, hog_image = hog(image, 
                                  orientations=9, 
                                  pixels_per_cell=(8, 8), 
                                  cells_per_block=(2, 2), 
                                  block_norm='L2-Hys', 
                                  visualize=True, 
                                  feature_vector=True)
    return hog_features

def load_and_preprocess_images(dataset_path, categories, image_size, apply_pca=False, n_components=50):
    data = []
    labels = []
    
    for category in categories:
        folder_path = os.path.join(dataset_path, category)
        
        for filename in os.listdir(folder_path):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            if image is None:
                continue
            
            print(f"Processing: {filename}")
            resized_image = cv2.resize(image, image_size)
            gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
            
            hog_features = extract_hog_features(gray_image)
            
            data.append(hog_features)
            labels.append(category)
    
    data = np.array(data)
    labels = np.array(labels)
    
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(labels)
    
    if apply_pca:
        pca = PCA(n_components=n_components)
        data = pca.fit_transform(data)
        return data, encoded_labels, label_encoder, pca
    else:
        return data, encoded_labels, label_encoder, None

IMAGE_SIZE = (128, 128)
DATASET_PATH = '../dataset/images'
CATEGORIES = ['cars', 'motorcycles', 'non_vehicles']

data, labels, label_encoder, pca = load_and_preprocess_images(dataset_path=DATASET_PATH, 
                                                              categories=CATEGORIES, 
                                                              image_size=IMAGE_SIZE, 
                                                              apply_pca=True,  # Set to False if you don't want PCA
                                                              n_components=50)  # Adjust the number of components as needed

model = SVC(kernel='linear')
model.fit(data, labels)

joblib.dump(model, 'vehicle_detection_model.pkl')
if pca is not None:
    joblib.dump(pca, 'pca_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

print("Model, PCA, and Label Encoder saved successfully!")


Processing: 1.jpg
Processing: 10.jpg
Processing: 100.jpg
Processing: 1000.jpg
Processing: 1001.jpg
Processing: 1002.jpg
Processing: 1003.jpg
Processing: 1004.jpg
Processing: 1005.jpg
Processing: 1006.jpg
Processing: 1007.jpg
Processing: 1008.jpg
Processing: 1009.jpg
Processing: 101.jpg
Processing: 1010.jpg
Processing: 1011.jpg
Processing: 1012.jpg
Processing: 1013.jpg
Processing: 1014.jpg
Processing: 1015.jpg
Processing: 1016.jpg
Processing: 1017.jpg
Processing: 1018.jpg
Processing: 1019.jpg
Processing: 102.jpg
Processing: 1020.jpg
Processing: 1021.jpg
Processing: 1022.jpg
Processing: 1023.jpg
Processing: 1024.jpg
Processing: 1025.jpg
Processing: 1026.jpg
Processing: 1027.jpg
Processing: 1028.jpg
Processing: 1029.jpg
Processing: 103.jpg
Processing: 1030.jpg
Processing: 1031.jpg
Processing: 1032.jpg
Processing: 1033.jpg
Processing: 1034.jpg
Processing: 1035.jpg
Processing: 1036.jpg
Processing: 1037.jpg
Processing: 1038.jpg
Processing: 1039.jpg
Processing: 104.jpg
Processing: 1040.jpg
Pr

In [30]:
# Function to preprocess and extract features for a single image
def preprocess_and_extract_features(image_path, image_size, apply_pca=False, pca=None):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not load image at path: {image_path}")
    
    # Resize the image to the required size
    resized_image = cv2.resize(image, image_size)
    # Convert to grayscale for HOG feature extraction
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    
    # Extract HOG features
    hog_features = extract_hog_features(gray_image)
    
    # If PCA was applied during training, apply it here as well
    if apply_pca and pca is not None:
        hog_features = pca.transform([hog_features])
    
    return hog_features

# Example usage for testing an image
image_path = '../dataset/images//bike-cropped.png'  # Path to your test image
IMAGE_SIZE = (128, 128)

# Load the PCA object (if PCA was used during training)
pca = joblib.load('pca_model.pkl')

# Load the trained SVM model
model = joblib.load('vehicle_detection_model.pkl')

# Load the label encoder
label_encoder = joblib.load('label_encoder.pkl')

# Preprocess the image and extract features
hog_features = preprocess_and_extract_features(image_path, IMAGE_SIZE, apply_pca=True, pca=pca)

# Now use your trained model to make a prediction
prediction = model.predict(hog_features)
predicted_class = label_encoder.inverse_transform(prediction)

print(f"Predicted class: {predicted_class}")


Predicted class: ['motorcycles']


In [None]:
import cv2
import os
import joblib
import numpy as np
from skimage.feature import hog

# Define the HOG feature extraction function
def extract_hog_features(image):
    features, _ = hog(image, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True)
    return features

# Function to extract and resize grid boxes from a frame
def extract_and_resize_grid(frame, grid_size, resized_grid_size=128):
    height, width, _ = frame.shape
    resized_grid_boxes = []
    positions = []
    for y in range(0, height, grid_size):
        for x in range(0, width, grid_size):
            grid_box = frame[y:y+grid_size, x:x+grid_size]
            if grid_box.shape[0] == grid_size and grid_box.shape[1] == grid_size:
                resized_box = cv2.resize(grid_box, (resized_grid_size, resized_grid_size))
                resized_grid_boxes.append(resized_box)
                positions.append((x, y))
    return resized_grid_boxes, positions

# Function to process video and save predictions
def process_video(input_path, output_path, model, label_encoder, pca=None, grid_size=500, resized_grid_size=128, output_dir="./Prediction/01_svc/resized_grid_boxes"):
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Width:", width)
    print("Height:", height)
    print("Frame count:", frame_count)

    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'DIVX'), 20, (width, height))

    if not cap.isOpened():
        print("Error opening the video file.")
        return

    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    frame_idx = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            current_grid_size = grid_size
            resized_grid_boxes, positions = extract_and_resize_grid(frame, current_grid_size, resized_grid_size)
            print(f"Processing Frame: {frame_idx}")

            for i, (box, (x, y)) in enumerate(zip(resized_grid_boxes, positions)):
                grid_folder = os.path.join(output_dir, f"frame_{frame_idx}")
                os.makedirs(grid_folder, exist_ok=True)
                gray_image = cv2.cvtColor(box, cv2.COLOR_BGR2GRAY)
                hog_features = extract_hog_features(gray_image)
                if pca is not None:
                    hog_features = pca.transform([hog_features])
                else:
                    hog_features = np.array(hog_features).reshape(1, -1)
                prediction = model.predict(hog_features)
                predicted_class = label_encoder.inverse_transform(prediction)[0]
                print(f"Prediction for Grid {i}: {predicted_class}")
                if predicted_class == 'cars' or predicted_class == 'motorcycles':
                    box_filename = os.path.join(grid_folder, f"grid_{i}_{predicted_class}.png")
                    cv2.imwrite(box_filename, box)

                    cv2.rectangle(frame, (x, y), (x + current_grid_size, y + current_grid_size), (0, 255, 0), 2)
                    cv2.putText(frame, predicted_class, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            writer.write(frame)
            frame_idx += 1
        else:
            break

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"Video processing complete. Output saved to {output_path}")
    print(f"Resized grid boxes saved in {output_dir}")

input_video_path = '../dataset/sample videos/sample.mp4'  
output_video_path = './Prediction/01_svc/grid_video.mp4' 

model = joblib.load('vehicle_detection_model.pkl')
pca = joblib.load('pca_model.pkl') if os.path.exists('pca_model.pkl') else None

label_encoder = joblib.load('label_encoder.pkl')

process_video(input_video_path, output_video_path, model, label_encoder, pca, grid_size=350)


In [2]:
import os
import cv2
import numpy as np

# Function to load images from a directory and return them as a list
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for label_folder in os.listdir(folder_path):
        label_folder_path = os.path.join(folder_path, label_folder)
        for filename in os.listdir(label_folder_path):
            img_path = os.path.join(label_folder_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(label_folder)  
    return images, labels

# Load dataset
dataset_folder = '../dataset/images'
images, labels = load_images_from_folder(dataset_folder)
print(f"Loaded {len(images)} images with corresponding labels.")


Loaded 8110 images with corresponding labels.


In [4]:
import cv2
import numpy as np
from skimage.feature import hog

# Extract color histogram features
def extract_color_histogram(image, bins=32):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist_features = []
    for i in range(3): 
        hist = np.histogram(hsv_image[:, :, i], bins=bins, range=(0, 256))[0]
        hist_features.append(hist)
    return np.concatenate(hist_features)

# Extract spatial binning features
def extract_spatial_binning(image, size=(32, 32)):
    return cv2.resize(image, size).ravel()

# Extract HOG features
def extract_hog_features(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    if gray_image.shape[0] >= pixels_per_cell[0] * cells_per_block[0] and gray_image.shape[1] >= pixels_per_cell[1] * cells_per_block[1]:
        features, _ = hog(gray_image, orientations=orientations, pixels_per_cell=pixels_per_cell,
                          cells_per_block=cells_per_block, visualize=True)
        return features
    else:
        # Return zeros if the image is too small for HOG extraction
        print(f"Image is too small for HOG feature extraction. Size: {gray_image.shape}")
        return np.zeros(orientations * ((gray_image.shape[0] // pixels_per_cell[0]) // cells_per_block[0]) *
                        ((gray_image.shape[1] // pixels_per_cell[1]) // cells_per_block[1]))

# Extract combined features
def extract_combined_features(image):
    # color_features = extract_color_histogram(image, bins=32)
    spatial_features = extract_spatial_binning(image, size=(32, 32))
    hog_features = extract_hog_features(image) 
    
    # Concatenate all features (ensure all arrays are the same size)
    combined_features = np.concatenate(( spatial_features, hog_features))
    return combined_features

# Assuming `images` is a list of images loaded using cv2
features = [extract_combined_features(img) for img in images]

# Convert the list of features into a numpy array
features_arr = np.array(features)

print(f"Features array shape: {features_arr.shape}")


Image is too small for HOG feature extraction. Size: (14, 18)


KeyboardInterrupt: 

In [10]:
from sklearn.preprocessing import LabelEncoder

# Encode labels into numerical values (not one-hot encoded)
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)  # No need to use one-hot encoding for SVM


In [11]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)

# Train a Support Vector Machine (SVM) classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Test the model and print the classification report
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (6488,) + inhomogeneous part.

In [None]:
# Sliding window function for object localization
def sliding_window(image, step_size, window_size):
    for y in range(0, image.shape[0] - window_size[1], step_size):
        for x in range(0, image.shape[1] - window_size[0], step_size):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

# Function to detect vehicles in a video frame using sliding window and classifier
def detect_vehicles_in_frame(frame, model, label_encoder, window_size=(128, 128), step_size=32):
    detected_objects = []
    for (x, y, window) in sliding_window(frame, step_size, window_size):
        if window.shape[0] != window_size[1] or window.shape[1] != window_size[0]:
            continue
        
        # Extract features for the current window
        window_features = extract_combined_features(window).reshape(1, -1)
        
        # Predict the class using the trained model
        prediction = model.predict(window_features)
        predicted_class = label_encoder.inverse_transform(prediction)[0]
        
        if predicted_class != 'no_vehicle':
            detected_objects.append((x, y, predicted_class))
            cv2.rectangle(frame, (x, y), (x + window_size[0], y + window_size[1]), (0, 255, 0), 2)
            cv2.putText(frame, predicted_class, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    return frame, detected_objects


In [None]:
def process_video(input_path, output_path, model, label_encoder, window_size=(128, 128), step_size=32):
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'DIVX'), 20, (width, height))

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            # Detect vehicles in the current frame
            frame, detected_objects = detect_vehicles_in_frame(frame, model, label_encoder, window_size, step_size)
            writer.write(frame)
            frame_idx += 1
        else:
            break

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"Video processing complete. Output saved to {output_path}")

# Example usage
input_video_path = './dataset/sample_videos/sample_2.mp4'
output_video_path = './Prediction/vehicle_detection_output.mp4'

# Process the video
process_video(input_video_path, output_video_path, svm_model, label_encoder)
