# Public Eye - Sample Version

**Upload a picture of a crowd and let Public Eye count/estimate the amount of people!**

This website is a slow and minimal sample version of the full Public Eye system.

For more information about Public Eye, please get in touch at ADD_EMAIL_HERE.

In [1]:
%%capture
cd ..

In [2]:
import os
from tqdm.notebook import tqdm

import numpy as np
import torch

from PIL import Image
import matplotlib.pyplot as plt
from matplotlib import cm

import models.ViCCT_models
import models.Swin_ViCCT_models
from timm.models import create_model

from datasets.dataset_utils import img_equal_split, img_equal_unsplit
import torchvision.transforms as standard_transforms

from fastai.vision.widgets import *
from fastbook import *

In [3]:
# Set some parameters
# Several parameters need to be defined to run this notebook.

# First, which model will we use?
# The generic ViCCT version 1 model is specified with 'ViCCT_base'. 
# The version 2 ViCCT model, which has Swin as its base, is specified with 'Swin_ViCCT_large_22k'.
# model_name = 'ViCCT_base'
model_name = 'Swin_ViCCT_large_22k'

# The model is trained to perform crowd counting. We specify here where the weights of this trained model is located.
# weights_path = 'models/trained_models/ViCCT_base_generic_1300_epochs.pth'
weights_path = 'models/trained_models/Swin_ViCCT_large_22k_generic_1600_epochs.pth'

# Some images are of extremely large resolution. When the heads in images occupy many (e.g. something like 100 x 100 
# pixels each) pixels, the model is unable to make pretty predictions. One way to overcome this issue is to scale the image
# by some factor. This factory is specified here. A factor of 1. means no scaling is performed.
# scale_factor = 1.

# We might want to save the predictions. Set 'save_results' to true if you want to save the prediction. Three figures are saved
# 1) The input image for the network. 2) The network's prediction. 3) The predictions overlayed with the input.
save_results = True

# Lastly, do we use cuda? If you have cuda, it's advised to use it.
use_cuda = False

In [4]:
# Load model
def load_model(model_name, weights_path, use_cuda):
    """ Creates the model and initialised it with the weights specified. """
    
    model = create_model(  # From the timm library. This function created the model specific architecture.
    model_name,
    init_path=weights_path,
    pretrained_cc=True,
    drop_rate=None if 'Swin' in model_name else 0.,  # Dropout

    # Bamboozled by Facebook. This isn't drop_path_rate, but rather 'drop_connect'.
    # I'm not yet sure what it is for the Swin version
    drop_path_rate=None if 'Swin' in model_name else 0.,
    drop_block_rate=None,  # Drops our entire Transformer blocks I think? Not used for ViCCT.
    )

    if use_cuda:
        model = model.cuda()  # Place model on GPU
    
    model = model.eval()
    
    return model

# print('Loading the model...')
model = load_model(model_name, weights_path, use_cuda)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [5]:
# Main Functions


# Only for hardcore users. No need to modify these.
mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Mean and std.dev. of ImageNet
overlap = 32  # We ensure crops have at least this many pixels of overlap.
ignore_buffer = 16  # When reconsturting the whole density map, ignore this many pixels on crop prediction borders.

train_img_transform = standard_transforms.Compose([
    standard_transforms.ToTensor(),
    standard_transforms.Normalize(*mean_std)
])


def rescale_image(img, scale_factor):
    
    # Get image dimensions
    img_w, img_h = img.size
    
    # Rescale image
    if scale_factor != 1.:
        new_w, new_h = round(img_w * scale_factor), round(img_h * scale_factor)
        img = img.resize((new_w, new_h))
    
    return img


def prepare_loaded_image(img):
    
    # Get image dimensions
    img_w, img_h = img.size
    
    # Before we make the prediction, we normalise the image and split it up into crops
    img = train_img_transform(img)
    img_stack = img_equal_split(img, 224, overlap)  # Split the image ensuring a minimum of 'overlap' of overlap between crops.

    if use_cuda:
        img_stack = img_stack.cuda()  # Place image stack on GPU        

    # This is the placeholder where we store the model predictions.
    pred_stack = torch.zeros(img_stack.shape[0], 1, 224, 224)
    
    return img_stack, pred_stack, img_h, img_w


def process_image(img_stack, pred_stack, img_h, img_w):
    if not use_cuda and img_stack.shape[0] > 100:  # If on CPU and more than 100 image crops.
        print('\033[93m'
              'WARNING: you are making a prediction on a very large image. This might take a long time! '
              'You may want to use a lower "Scale Factor" value for faster processing. '
              'You can stop a running process by pressing F5.'
              '\033[0m')

    with torch.no_grad():  # Dont make gradients
        print(f"Processing {len(img_stack)} image parts.")
        for idx, img_crop in enumerate(tqdm(img_stack)):  # For each image crop
            pred_stack[idx] = model.forward(img_crop.unsqueeze(0)).cpu()  # Make prediction.
    print('Done!')


    # Unsplit the perdiction crops to get the entire density map of the image.
    den = img_equal_unsplit(pred_stack, overlap, ignore_buffer, img_h, img_w, 1)
    den = den.squeeze()  # Remove the channel dimension

    # Compute the perdicted count, which is the sum of the entire density map. Note that the model is trained with density maps
    # scaled by a factor of 3000 (See sec 5.2 of my thesis for why: https://scripties.uba.uva.nl/search?id=723178). In short,
    # This works :)
    pred_cnt = den.sum() / 3000
    
    return den, pred_cnt


def show_overlay(input_image, den, pred_cnt):
    img_heat = np.array(input_image)
    den_heat = den.clone().numpy()

    den_heat = den_heat / 3000  # Scale values to original domain
    den_heat[den_heat < 0] = 0  # Remove negative values
    den_heat = den_heat / den_heat.max() # Normalise between 0 and 1

    den_heat **= 0.5  # Reduce large values, increase small values
    den_heat *= 255  # Values from 0 to 255 now
    den_heat[den_heat < 50] = 0  # Threshold of 50

    img_heat[:, :, 0][den_heat > 0] = img_heat[:, :, 0][den_heat > 0] / 2
    img_heat[:, :, 1][den_heat > 0] = img_heat[:, :, 1][den_heat > 0] / 2
    img_heat[:, :, 2][den_heat > 0] = den_heat[den_heat > 0]


#     plt.figure(figsize=(1920/200, 1080/200), dpi=200)
    plt.figure(figsize=(1440/200, 810/200), dpi=200)
    plt.imshow(img_heat)
    plt.title(f'Predicted count: {pred_cnt:.3f}')
    plt.show()

In [6]:
# Create widget items.
btn_upload = widgets.FileUpload()
scale_factor_slider = widgets.FloatSlider(value=0.5, min=0.05, max=1, step=0.05, description="Scale Factor")
out_pl = widgets.Output()
lbl_pred = widgets.Label()
btn_run = widgets.Button(description='Count People')

In [7]:
# Define click interaction.
def on_click_classify(change):
    input_image = PILImage.create(io.BytesIO(btn_upload.value[-1].content))
    out_pl.clear_output()
    with out_pl:
        
        # Scale image
        scale_factor = scale_factor_slider.value
        print(f"Scale factor used for downscaling image: {scale_factor}")
        image = rescale_image(input_image, scale_factor)
        
        # Show input image
        #display(img.to_thumb(128,128))
#         plt.figure(figsize=(1920/200, 1080/200), dpi=200)
        plt.figure(figsize=(1440/200, 810/200), dpi=200)
        plt.imshow(image, cmap=cm.jet)
        plt.title(f'Input image for the network')
        plt.show()
        
        # Process image
        img_stack, pred_stack, img_h, img_w = prepare_loaded_image(image)
        den, pred_cnt = process_image(img_stack, pred_stack, img_h, img_w)
        
        # Show model prediction
#         plt.figure(figsize=(1920/200, 1080/200), dpi=200)
        plt.figure(figsize=(1440/200, 810/200), dpi=200)
        plt.imshow(den, cmap=cm.jet)
        plt.title(f'Predicted count: {pred_cnt:.3f}')
        plt.show()
        
        # Show overlay
        show_overlay(image, den, pred_cnt)

btn_run.on_click(on_click_classify)

In [8]:
# Render app.
# VBox([widgets.Label("Choose and image to upload"),
VBox([widgets.HTML(value = f"<b><font color='DarkRed'>Choose an image to upload:</b>"),
      btn_upload, scale_factor_slider, btn_run, out_pl, lbl_pred])

VBox(children=(HTML(value="<b><font color='DarkRed'>Choose an image to upload:</b>"), FileUpload(value=(), des…