<a href="https://colab.research.google.com/github/Younes-khadraoui/place-recognition/blob/master/Place_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Buildings detection, Image stitching and place recognition using sift/VLAD

### The user gives an input image and the model recognizes the place ( math, info faculties  ... )

### And predict whether the input image contains a building or not

#### Step 1
* Build a panoramic dataset, using the videos dataset , there is 4 faculties, a video for each faculté
* each video we extract some frames from it (frames should not be so close nor too far)
* from these frame we make an panoramic image ( image stitching )


### Step 2
* There we extract image features from each panoramic image using SIFT ( and save them in an .npz  file)
* when the user gives an input image we extract its features using SIFT too
* And we compare with each saved features (.npz)
* Then we classify it with the one that has many simillar features

### Step 3
* for building detection we use image segmentation using a pre-trained model from facebook ( detectron2 )

In [None]:
import zipfile
import os

# Define the path to the zip file and the extraction directory
zip_file_path = '/content/data.zip'
extract_dir = '/content'

# Create the extraction directory if it doesn't exist
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

# Extract the zip file
print(zip_file_path)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# List the contents of the extraction directory
extracted_files = os.listdir(extract_dir)
print("Extracted files:", extracted_files)


/content/data.zip


BadZipFile: File is not a zip file

In [None]:
import cv2 as cv
import os

if not os.path.exists("frames"):
    os.mkdir("frames")

video_files = [
    "/content/data/VID_20240407_163907.mp4",
    "/content/data/VID_20240407_163620.mp4",
    "/content/data/VID_20240407_163452.mp4",
    "/content/data/VID_20240407_163112.mp4"
]

num_frames = 34

stitcher = cv.Stitcher_create()  # create a stitcher object

j = 0
for video_file in video_files:

    video_info = cv.VideoCapture(video_file)

    total_frames = int(video_info.get(cv.CAP_PROP_FRAME_COUNT))

    print(f"total frames: {total_frames}")

    interval = total_frames // num_frames

    print(f"interval: {interval}")

    frames = []

    if not os.path.exists(f'frames/{j}'):
        os.mkdir(f'frames/{j}')
    else:
        for file in os.listdir(f'frames/{j}'):
            os.remove(os.path.join(f'frames/{j}', file))

    for i in range(num_frames):
        video_info.set(cv.CAP_PROP_POS_FRAMES, i * interval)

        ret, frame = video_info.read()

        if not ret:
            break

        frame_name = f'frames/{j}/{os.path.basename(video_file)}_frame_{i}.jpg'

        cv.imwrite(frame_name, frame)

        frames.append(frame)

    video_info.release()

    status, panorama = stitcher.stitch(frames)

    if status == cv.Stitcher_OK:
        panorama_name = f'panoramas/{j}_panorama.jpg'
        cv.imwrite(panorama_name, panorama)
        print(f"Panorama {j} stitched successfully.")
    else:
        print(f"Error stitching panorama {j}: {status}")

    j += 1


In [None]:
import cv2 as cv
import os

if not os.path.exists("cropped"):
    os.mkdir("cropped")

panorama_files = [
    "/content/panoramas/0_panorama.jpg",
    "/content/panoramas/1_panorama.jpg",
    "/content/panoramas/2_panorama.jpg",
    "/content/panoramas/3_panorama.jpg"
]

for idx, panorama_file in enumerate(panorama_files):

    panorama = cv.imread(panorama_file)

    if panorama is None:
        print(f"Error reading panorama {idx}.")
        continue

    # Crop the panorama image by removing 100 pixels from each side
    h, w = panorama.shape[:2]
    panorama_cropped = panorama[115:h-160, 100:w-100]

    cropped_name = f'cropped/{idx}_panorama_cropped.jpg'
    cv.imwrite(cropped_name, panorama_cropped)
    print(f"Cropped panorama {idx} saved successfully in cropped_images folder.")


In [None]:
import cv2
import numpy as np

panorama_images = [
    "/content/cropped/0_panorama_cropped.jpg",
    "/content/cropped/1_panorama_cropped.jpg",
    "/content/cropped/2_panorama_cropped.jpg",
    "/content/cropped/3_panorama_cropped.jpg"
]

i = 0
for image in panorama_images:
    panorama = cv2.imread(image)

    # Create SIFT object
    sift = cv2.SIFT_create()

    # Detect keypoints and compute descriptors
    keypoints, descriptors = sift.detectAndCompute(panorama, None)

    # Extract keypoint information
    keypoint_coords = np.array([kp.pt for kp in keypoints])
    keypoint_sizes = np.array([kp.size for kp in keypoints])
    keypoint_angles = np.array([kp.angle for kp in keypoints])
    keypoint_responses = np.array([kp.response for kp in keypoints])

    # Save keypoints and descriptors to a .npz file
    np.savez(f'{i}_panorama_features.npz',
            keypoint_coords=keypoint_coords,
            keypoint_sizes=keypoint_sizes,
            keypoint_angles=keypoint_angles,
            keypoint_responses=keypoint_responses,
            descriptors=descriptors)

    # Draw keypoints on the panorama image
    panorama_with_keypoints = cv2.drawKeypoints(panorama, keypoints, None)

    cv2.imshow('Panorama with Keypoints', panorama_with_keypoints)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    i += 1

In [None]:
import cv2
import numpy as np

input_image_path = "/content/data/IMG_20240407_163146.jpg"

input_image = cv2.imread(input_image_path)

# Create SIFT object
sift = cv2.SIFT_create()

# Detect keypoints and compute descriptors for the input image
input_keypoints, input_descriptors = sift.detectAndCompute(input_image, None)

# Draw keypoints on the input image
input_image_with_keypoints = cv2.drawKeypoints(input_image, input_keypoints, None)

# Resize window to fit the image dimensions
cv2.namedWindow('Input Image with Keypoints', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Input Image with Keypoints', input_image.shape[1], input_image.shape[0])

# Display the input image with keypoints
cv2.imshow('Input Image with Keypoints', input_image_with_keypoints)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
import cv2
import os

# FLANN-based matcher
flann = cv2.FlannBasedMatcher()

best_match_count = 0
best_match_image = None

cropped_npz_files = [
    "0_panorama_features.npz",
    "1_panorama_features.npz",
    "2_panorama_features.npz",
    "3_panorama_features.npz"
]

for npz_file in cropped_npz_files:
    panorama_data = np.load(npz_file)
    keypoints = [cv2.KeyPoint(x[0], x[1], _size, _angle, _response)
                 for x, _size, _angle, _response in
                 zip(panorama_data['keypoint_coords'],
                     panorama_data['keypoint_sizes'],
                     panorama_data['keypoint_angles'],
                     panorama_data['keypoint_responses'])]
    descriptors = panorama_data['descriptors']

    matches = flann.knnMatch(input_descriptors, descriptors, k=2)

    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)

    if len(good_matches) > best_match_count:
        best_match_count = len(good_matches)
        best_match_image = os.path.join("/content/cropped", npz_file.replace("_features.npz", "_cropped.jpg"))

print("the best match is " ,best_match_image)

### Panoptic segmentation is a technique that combines 2 segmentation approaches:

* Instance Segmentation: Identifies individual objects and assigns a unique label to each instance (e.g., car1, car2, person1).
* Semantic Segmentation: Classifies each pixel in the image into a semantic category (e.g., building, road, sky).

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("/content/detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('/content/detectron2'))

# Install required packages
!pip install torch torchvision torchaudio -f https://download.pytorch.org/whl/cpu/torch_stable.html
!pip install -e detectron2

In [None]:
import torch
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import cv2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

from google.colab.patches import cv2_imshow

def detect_and_classify_buildings(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (800, 600))

    cfg = get_cfg()

    cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
    cfg.MODEL.DEVICE = "cpu"

    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")

    predictor = DefaultPredictor(cfg)

    panoptic_seg, segments_info = predictor(image)["panoptic_seg"]

    stuff_classes = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).stuff_classes

    # Filter by buildings :)
    building_segments = [seg for seg in segments_info if seg["category_id"] < len(stuff_classes) and stuff_classes[seg["category_id"]] == "building"]

    total_building_area = sum([seg["area"] for seg in building_segments])

    print(total_building_area)

    if total_building_area == 0:
        print("The image does not contain buildings.")
    elif total_building_area > 30000:
        print("The image contains most buildings.")
    else:
        print("The image contains some buildings.")

    v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    out = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info)
    cv2_imshow(out.get_image()[:, :, ::-1])

image_file = "/content/data/IMG_20240407_163935.jpg"
classification = detect_and_classify_buildings(image_file)
