## Setup

In [8]:
!pip install -q git+https://github.com/huggingface/transformers.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m66.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone


In [9]:
import requests
from PIL import Image
import io
import torch
import numpy as np
from torchvision import transforms
import pickle
from numpy import dot
from numpy.linalg import norm
import time
import os
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
from pathlib import Path
import os

In [10]:
# connect to drive
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [11]:
path = '/gdrive/MyDrive/berlin_bike_CV/'
folder = 'metadata'
tile_folder = 'labeled_tiles'
name = 'DSR'

# Print tiles to choose one for tile_quadkey

In [12]:
# load tiles edges file for tiles
file_name = '_tiles_edges.pkl'
file_path = os.path.join(path, folder, name + file_name)
with open(file_path, 'rb') as pickle_file:
    tiles_edges = pickle.load(pickle_file)
tiles = list(tiles_edges.keys())
print(tiles)

['12021023233301', '12021023233303', '12021023233321', '12021023233323', '12021023233310', '12021023233312', '12021023233330', '12021023233332', '12021023233311', '12021023233313', '12021023233331', '12021023233333', '12021023322200', '12021023322202', '12021023322220', '12021023322222']


In [13]:
tile_quadkey = '12021023233303'

## Function definitions

In [14]:
def load_mapillary_image(image_id):
# load image from mapillary based on image ID input

  # load mapillary access token
  file_path = '/gdrive/MyDrive/mapillary_token.txt'
  with open(file_path, 'r') as file:
      access_token = file.read()

  # request the URL of image
  header = {'Authorization' : 'OAuth {}'.format(access_token)}
  url = 'https://graph.mapillary.com/{}?fields=thumb_256_url'.format(image_id)
  r = requests.get(url, headers=header)
  data = r.json()
  image_url = data['thumb_256_url']

  # get image from URL
  image_data = requests.get(image_url, stream=True).content

  # convert image data to PIL Image
  image = Image.open(io.BytesIO(image_data))

  return image

In [15]:
def bike_lane_detection(image, processor, mask2former):
# load mask2former trained on Mapillary Vistas to detect potential bike lanes

  # prepare image for processing
  img = processor(images=image, return_tensors="pt").to(torch.device("cuda"))

  # Move mask2former to GPU
  mask2former.to(torch.device("cuda"))

  # run inference
  with torch.no_grad():
    outputs = mask2former(**img)

  # pass to processor for postprocessing
  predicted_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]

  # check if it contains a bike lane
  unique_vals = predicted_map.unique()
  contains_bike_lane = torch.any(unique_vals == 7)

  return contains_bike_lane

In [16]:
# define image transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224), # new size will be 3x224x224
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
        )])

In [17]:
def get_image_features(img_unsqueezed, model):
# If it's a bike lane, extract features for bike lane label assignment

  # select layer for feature extraction
  my_layer = model.head

  # put model in evaluation mode for consistent results
  model.eval()

  # deactivate gradients
  for param in model.parameters():
     param.requires_grad = False

  # create hook on my_layer to get features
  features = []
  def hook(module, input, output):
      features.append(output)

  # Move the input tensor to the GPU
  img_unsqueezed = img_unsqueezed.to(torch.device("cuda"))

  # run foward pass
  hook_handle = my_layer.register_forward_hook(hook)  # Register the hook for the current layer
  out = model(img_unsqueezed)
  hook_handle.remove()  # Remove the hook after extracting features

  # extract features, convert to np array
  img_features = torch.squeeze(torch.flatten(features[0])).cpu().numpy()

  return img_features

In [18]:
def assign_bikelane_label(img_features):
# assing a safety-label to bike lane image based on cosine similarity to medoids

  # define cosine similarity function
  def cosine_similarity(a, b):
    return dot(a, b)/(norm(a)*norm(b))

  # Path to the medoid file
  file_path = "/gdrive/My Drive/berlin_bike_CV/final_project_first_images/labelled images/dinoS14_bikelanes_medoids.pickle"

  # Load medoid features and labels
  with open(file_path, "rb") as myfile:
      medoids = pickle.load(myfile)

  # extract medoid features
  street_lane_medoid = medoids[1]['street_shared'] # bike lanes that are shared with cars on the road
  sidewalk_lane_medoid = medoids[1]['off_street'] # bike lanes that are separated and located on the side walk

  # compare medoids to our image using cosine similarity
  cos2street_lane = cosine_similarity(street_lane_medoid, img_features)
  cos2sidewalk_lane = cosine_similarity(sidewalk_lane_medoid, img_features)

  # find label. 0 and 1 are used for cobblestone vs. street so we use 2 and 3 here
  if cos2sidewalk_lane >= cos2street_lane:
    lane_label = 2 # safe bike lane
  elif cos2street_lane >  cos2sidewalk_lane:
    lane_label = 3 # unsafe bike lane
  else:
    print("Error: Lane comparison values are in wrong format!")
    assert False # stop script

  return lane_label

In [19]:
def road_surface_classification(img_unsqueezed, model):
# classify street image for cobblestone vs. asphalt

  # Move the input tensor to the GPU
  img_unsqueezed = img_unsqueezed.to(torch.device("cuda"))

  # run inference
  with torch.no_grad():
      predicted_label = model(img_unsqueezed)

  # Get the predicted class
  _, predicted_idx = torch.max(predicted_label, 1)
  _, label_index = torch.max(predicted_label, dim=1)
  my_index = label_index.item()
  labels = ['cobblestones', 'road']
  predicted_label = labels[my_index]

  return predicted_label, my_index

In [20]:
def run_image_labelling(image_id, processor, mask2former, dino, resnet):
# label image

  # labels
  my_labels = ['cobblestone', 'asphalt', 'safe bike lane', 'unsafe bike lane']

  # load image
  image = load_mapillary_image(image_id)

  # Is it a bike lane or not?
  contains_bike_lane = bike_lane_detection(image, processor, mask2former) # use raw image because Mask2Former has its own image processing

  # transform image for further processing in models
  img_t = transform(image)
  img_unsqueezed = torch.unsqueeze(img_t, 0) # add first singleton dimension, the 'batch'

  # if image contains bike lane, compare it to medoids
  if contains_bike_lane:
    print('Image contains bike lane. Moving on to bike-lane labelling.')

    # extract features
    img_features = get_image_features(img_unsqueezed, dino)

    # assign label to bike lane
    output_label = assign_bikelane_label(img_features)

  else:
    print('No bike lane detected. Check if it is a cobblestone street')
    predicted_label, output_label = road_surface_classification(img_unsqueezed, resnet)

  return image_id, output_label, image

## Load Edges with selected Images

In [21]:
# load edges per tile with selected images
file_name = '_tiles_edges_with_images.pkl'
file_path = os.path.join(path, folder, name + file_name)
with open(file_path, 'rb') as pickle_file:
  tiles_images = pickle.load(pickle_file)


# create new dict for desired tile
tile_path = os.path.join(path, tile_folder, tile_quadkey + '_labeled.pkl')
# check if target file for this tile exists
if os.path.exists(tile_path):
  with open(tile_path, 'rb') as pickle_file:
    tile_labeled = pickle.load(pickle_file)
else:
  file_name = '_tiles_edges.pkl'
  file_path = os.path.join(path, folder, name + file_name)
  with open(file_path, 'rb') as pickle_file:
    tiles_edges = pickle.load(pickle_file)
  tile_labeled = tiles_edges[tile_quadkey]

## Run image processing

In [22]:
# load all models outside loop for speed

# mask2former
processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-mapillary-vistas-semantic")
mask2former = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-large-mapillary-vistas-semantic")

# DinoV2
dino = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').to(torch.device("cuda"))

# ResNet
model_path = '/gdrive/MyDrive/berlin_bike_CV/CobblestoneModel/finetuned_ResNet101.pt'
resnet = torch.load(model_path).to(torch.device("cuda"))



Downloading (…)rocessor_config.json:   0%|          | 0.00/536 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/79.5k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/866M [00:00<?, ?B/s]

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 148MB/s]


In [23]:
# run pipeline for one image and time it

for edge in tile_labeled:
  # inference only for not yet labeld edges
  if not tile_labeled[edge]:
    for img in tiles_images[tile_quadkey][edge]:
      st = time.time()
      image_id = list(img.keys())[0]
      print(f"Infer Image {image_id}...")
      image_id_out, output_label, image = run_image_labelling(image_id, processor, mask2former, dino, resnet)
      # TODO: if the image was useless, take next image
      image_dict = {'image_id': image_id, 'label': output_label}
      image_dict.update(img[image_id])
      tile_labeled[edge].append(image_dict)
      # tile_labeled[edge][image_id]['label'] = output_label
      with open(tile_path, 'wb') as pickle_file:
         pickle.dump(tile_labeled, pickle_file)
      et = time.time()
      # get the execution time
      elapsed_time = et - st
      print('Execution time:', elapsed_time, 'seconds')
      break




# # compare label to actual image
# my_labels = ['cobblestone', 'asphalt', 'safe bike lane', 'unsafe bike lane']
# print('This image shows ' +  my_labels[output_label])

# show image to check assigned label
# image

Infer Image 802553823972168...
Image contains bike lane. Moving on to bike-lane labelling.


KeyboardInterrupt: ignored

In [None]:
# with open(tile_path, 'rb') as pickle_file:
#   tiles_labeled_reconstr = pickle.load(pickle_file)

In [None]:
# print(tiles_labeled_reconstr)