In [1]:
import os
import cv2
import numpy as np
from skimage.feature import hog

# Function to load images from a directory and return them as a list
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for label_folder in os.listdir(folder_path):
        label_folder_path = os.path.join(folder_path, label_folder) 
        for filename in os.listdir(label_folder_path):
            img_path = os.path.join(label_folder_path, filename)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 128))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            if img is not None:
                images.append(img)
                labels.append(label_folder)  
    return images, labels

# Load dataset
dataset_folder = '../dataset/images'
images, labels = load_images_from_folder(dataset_folder)
print(f"Loaded {len(images)} images with corresponding labels.")


Loaded 7992 images with corresponding labels.


In [6]:
import numpy as np
import pickle
def save_images_and_labels(images, labels, images_file, labels_file):
    np.save(images_file, np.array(images))
    with open(labels_file, 'wb') as f:
        pickle.dump(labels, f)
save_images_and_labels(images, labels, 'images.npy', 'labels.pkl')
print("Images and labels saved successfully.")

Images and labels saved successfully.


In [2]:
import cv2
import numpy as np
from skimage.feature import hog

# Extract SIFT features
def extract_sift_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    
    # Return an empty array if no descriptors are found
    if descriptors is None:
        return np.array([])  # Return an empty array
    
    return descriptors.flatten()

# Extract HOG features
def extract_hog_features(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale for HOG
    features, hog_image = hog(img, orientations=orientations, 
                              pixels_per_cell=pixels_per_cell,
                              cells_per_block=cells_per_block, 
                              transform_sqrt=True, 
                              visualize=True, feature_vector=True)
    return features

# Extract combined features (SIFT + HOG)
def extract_combined_features(image):
    sift_features = extract_sift_features(image)
    hog_features = extract_hog_features(image)
    
    # If no SIFT features, only use HOG
    if sift_features.size == 0:
        combined_features = hog_features
    else:
        combined_features = np.concatenate((sift_features, hog_features))
    
    return combined_features

# Pad all feature vectors to the same length
def pad_features(features, target_length):
    padded_features = []
    for feature in features:
        if len(feature) < target_length:
            # Pad with zeros if the feature vector is shorter than target length
            feature = np.pad(feature, (0, target_length - len(feature)), 'constant')
        padded_features.append(feature)
    return np.array(padded_features)

# Load images (replace this with actual image loading code)
# Assume `images` is a list of images
# Example: images = [cv2.imread('image1.jpg'), cv2.imread('image2.jpg')]

# Extract combined features from all images
features = [extract_combined_features(img) for img in images]

# Determine the maximum length of features to pad
max_length = max([len(f) for f in features])

# Pad the feature vectors to have the same length
features_padded = pad_features(features, max_length)

# Convert to a NumPy array
features_arr = np.array(features_padded)

print(f"Features array shape: {features_arr.shape}")


Features array shape: (7992, 98596)


In [3]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)  


In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
X_train, X_test, y_train, y_test = train_test_split(features_arr, encoded_labels, test_size=0.2, random_state=42)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

        cars       0.87      0.89      0.88       720
 motorcycles       0.79      0.81      0.80       390
non_vehicles       0.89      0.86      0.87       489

    accuracy                           0.86      1599
   macro avg       0.85      0.85      0.85      1599
weighted avg       0.86      0.86      0.86      1599



In [5]:
import pickle
# Save the trained model
with open('svm_model.pkl', 'wb') as model_file:
    pickle.dump(svm_model, model_file)
# Save the label encoder
with open('label_encoder.pkl', 'wb') as label_file:
    pickle.dump(label_encoder, label_file)

In [12]:
import cv2
import numpy as np
from skimage.feature import hog
# Feature extraction functions (same as before)
def extract_sift_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    if descriptors is None:
        return np.array([])
    return descriptors.flatten()
def extract_hog_features(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(img, orientations=orientations, 
                      pixels_per_cell=pixels_per_cell,
                      cells_per_block=cells_per_block, 
                      transform_sqrt=True, 
                      visualize=True, feature_vector=True)
    return features
def extract_combined_features(image, target_feature_length):
    sift_features = extract_sift_features(image)
    hog_features = extract_hog_features(image)
    combined_features = np.concatenate((sift_features, hog_features))
    
    # Ensure the feature vector length is fixed
    if len(combined_features) < target_feature_length:
        combined_features = np.pad(combined_features, (0, target_feature_length - len(combined_features)), 'constant')
    elif len(combined_features) > target_feature_length:
        combined_features = combined_features[:target_feature_length]
    
    return combined_features
def sliding_window(image, step_size, window_size):
    # get the window and image sizes
    h, w = window_size
    image_h, image_w = image.shape[:2]

    # loop over the image, taking steps of size `step_size`
    for y in range(0, image_h, step_size):
        for x in range(0, image_w, step_size):
            # define the window
            window = image[y:y + h, x:x + w]
            # if the window is below the minimum window size, ignore it
            if window.shape[:2] != window_size:
                continue
            # yield the current window
            yield (x, y, window)

def detect_vehicles_in_frame(frame, model, label_encoder, target_feature_length, window_size=(128, 128), step_size=32):
    detected_objects = []
    for (x, y, window) in sliding_window(frame, step_size, window_size):
        if window.shape[0] != window_size[1] or window.shape[1] != window_size[0]:
            continue
        window_features = extract_combined_features(window, target_feature_length).reshape(1, -1)
        prediction = model.predict(window_features)
        predicted_class = label_encoder.inverse_transform(prediction)[0]
        print(predicted_class)
        if predicted_class == 'cars':
            detected_objects.append((x, y, predicted_class))
            cv2.rectangle(frame, (x, y), (x + window_size[0], y + window_size[1]), (0, 255, 0), 2)
            cv2.putText(frame, predicted_class, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    return frame, detected_objects



In [None]:
def process_video(input_path, output_path, model, label_encoder, target_feature_length, window_size=(128, 128), step_size=32):
    # Open the video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error: Couldn't open video file {input_path}")
        return
    
    # Get video dimensions
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Define the codec and create the video writer object
    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'DIVX'), fps, (width, height))

    frame_idx = 0
    while cap.isOpened():
        print(f"Processing frame: {frame_idx}")
        ret, frame = cap.read()
        if not ret:
            print(f"End of video at frame {frame_idx}")
            break

        # Detect vehicles in the current frame
        frame, detected_objects = detect_vehicles_in_frame(frame, model, label_encoder, target_feature_length, window_size, step_size)
        
        # Write the processed frame to the output video
        writer.write(frame)
        frame_idx += 1

        # Press 'q' to stop early
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything once job is finished
    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"Video processing complete. Output saved to {output_path}")

# Get the number of features the model expects
target_feature_length = svm_model.n_features_in_

# Process the video
process_video('../dataset/sample videos/sample.mp4', 'output_video_path2.mp4', svm_model, label_encoder, target_feature_length)


In [18]:


def pyramid(image, scale=1.5, min_size=(40, 40)):
    yield image

    # Generate pyramid levels until minimum size is reached
    while True:
        # Calculate the new image size based on
        # the scale factor and resize the image
        w = int(image.shape[1] / scale)
        h = int(image.shape[0] / scale)
        image = cv2.resize(image, (w, h))

        # If the new level is too small, stop generating more levels
        if image.shape[0] < min_size[1] or image.shape[1] < min_size[0]:
            break

        yield image
def extract_combined_features(image, target_feature_length):
    sift_features = extract_sift_features(image)
    hog_features = extract_hog_features(image)
    combined_features = np.concatenate((sift_features, hog_features))
    
    # Ensure the feature vector length is fixed
    if len(combined_features) < target_feature_length:
        combined_features = np.pad(combined_features, (0, target_feature_length - len(combined_features)), 'constant')
    elif len(combined_features) > target_feature_length:
        combined_features = combined_features[:target_feature_length]  # Truncate if too long
    
    return combined_features

def sliding_window(image, step_size, window_size):
    # get the window and image sizes
    h, w = window_size
    image_h, image_w = image.shape[:2]

    # loop over the image, taking steps of size `step_size`
    for y in range(0, image_h, step_size):
        for x in range(0, image_w, step_size):
            # define the window
            window = image[y:y + h, x:x + w]
            # if the window is below the minimum window size, ignore it
            if window.shape[:2] != window_size:
                continue
            # yield the current window
            yield (x, y, window)


image = cv2.imread("./main_video_frames/frame8.jpg")

w, h = 400, 400

for resized in pyramid(image):
    for (x, y, window) in sliding_window(resized, step_size=200, window_size=(w, h)):

        # Extract combined features for the window
        # Get the number of features the model expects
        target_feature_length = svm_model.n_features_in_
        features = extract_combined_features(window,target_feature_length).reshape(1, -1)

        # Predict with the pre-trained classifier
        prediction = svm_model.predict(features)
        predicted_class = label_encoder.inverse_transform(prediction)[0]
        print(predicted_class)
        if predicted_class== "cars":
            clone = resized.copy()
            cv2.rectangle(clone, (x, y), (x + w, y + h), (0, 255, 0), 2)

            # Resize the image to be smaller for display purposes
            small_clone = cv2.resize(clone, (clone.shape[1] // 2, clone.shape[0] // 2))
            
            # Show the smaller version of the window
            cv2.imshow("Window", small_clone)
            cv2.waitKey(100)

cv2.destroyAllWindows()


non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
cars
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
non_vehicles
cars
cars
cars
cars
non_vehicles
non_vehicles
non_vehicles
non_vehicles
cars
non_vehicles
cars
cars
cars
cars
cars
cars
cars
non_vehicles
non_vehicles
non_vehicles
cars
non_vehicles
cars
cars
cars
cars
motorcycles
cars
cars
non_vehicles
non_vehicles
cars
non_vehicles
cars
cars
non_vehicles
non_vehicles
non_vehicles
non_vehicles
motorcycles
cars
cars
non_vehicles
cars


KeyboardInterrupt: 

: 

In [None]:
import cv2
import os

def process_video(input_path, output_path, model, label_encoder, window_size=(128, 128), step_size=32):
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error: Couldn't open video file {input_path}")
        return
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Ensure the output directory exists
    if not os.path.exists(os.path.dirname(output_path)):
        os.makedirs(os.path.dirname(output_path))
    
    # Try different codecs if 'DIVX' does not work
    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'MJPG'), 20, (width, height))

    if not writer.isOpened():
        print("Error: Couldn't initialize video writer.")
        return

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print(f"End of video at frame {frame_idx}")
            break

        print(f"Processing frame {frame_idx}")
        
        # Ensure frame dimensions match the video writer's dimensions
        if frame.shape[1] != width or frame.shape[0] != height:
            frame = cv2.resize(frame, (width, height))
        
        frame, detected_objects = detect_vehicles_in_frame(frame, model, label_encoder, window_size, step_size)
        
        print(f"Detected objects in frame {frame_idx}: {detected_objects}")
        
        writer.write(frame)
        frame_idx += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"Video processing complete. Output saved to {output_path}")

# Example usage
input_video_path = '../dataset/sample videos/sample.mp4'
output_video_path = './Prediction/vehicle_detection_output.mp4'

process_video(input_video_path, output_video_path, svm_model, label_encoder)