# Set up Segmentation


## Environment Setup

First, download the code and pretrained models if we are on colab.

In [None]:
import glob, os, cv2, shutil, sys
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageFilter 
from skimage import morphology

'=2.0.1'	        download_ADE20K.sh     install.log   requirements.txt
 config		        encoder_epoch_20.pth   LICENSE	     setup.py
 data		        eval_multipro.py       mit_semseg    teaser
 decoder_epoch_20.pth   eval.py		       notebooks     test.py
 demo_test.sh	        image.png	       README.md     train.py


In [None]:
%%bash
# Colab-specific setup
!(stat -t /usr/local/lib/*/dist-packages/google/colab > /dev/null 2>&1) && exit 
pip install yacs 2>&1 >> install.log
git init 2>&1 >> install.log
git remote add origin https://github.com/CSAILVision/semantic-segmentation-pytorch.git 2>> install.log
git pull origin master 2>&1 >> install.log
DOWNLOAD_ONLY=1 ./demo_test.sh 2>> install.log

In [None]:
# System libs
import os, csv, torch, numpy, scipy.io, PIL.Image, torchvision.transforms
# Our libs
from mit_semseg.models import ModelBuilder, SegmentationModule
from mit_semseg.utils import colorEncode

colors = scipy.io.loadmat('data/color150.mat')['colors']
names = {}
with open('data/object150_info.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        names[int(row[0])] = row[5].split(";")[0]

def visualize_result(img, pred, index=None):
    # filter prediction class if requested
    if index is not None:
        pred = pred.copy()
        pred[pred != index] = -1
        print(f'{names[index+1]}:')
        
    # colorize prediction
    pred_color = colorEncode(pred, colors).astype(numpy.uint8)

    # aggregate images and save
    im_vis = numpy.concatenate((img, pred_color), axis=1)
    display(PIL.Image.fromarray(im_vis))

## Loading the segmentation model

In [None]:
# Network Builders
net_encoder = ModelBuilder.build_encoder(
    arch='resnet50dilated',
    fc_dim=2048,
    weights='encoder_epoch_20.pth')
net_decoder = ModelBuilder.build_decoder(
    arch='ppm_deepsup',
    fc_dim=2048,
    num_class=150,
    weights='decoder_epoch_20.pth',
    use_softmax=True)

crit = torch.nn.NLLLoss(ignore_index=-1)
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
segmentation_module.eval()
segmentation_module.cuda()

## Load test data

Now we load and normalize a single test image.  Here we use the commonplace convention of normalizing the image to a scale for which the RGB values of a large photo dataset would have zero mean and unit standard deviation.  (These numbers come from the imagenet dataset.)  With this normalization, the limiiting ranges of RGB values are within about (-2.2 to +2.7).

In [None]:
# Load and normalize one image as a singleton tensor batch
pil_to_tensor = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], # These are RGB mean+std values
        std=[0.229, 0.224, 0.225])  # across a large photo dataset.
])

In [None]:
def get_houghlines(edges):
  kernel = np.ones((10,10), np.uint8)
  edge_history = cv2.HoughLinesP(edges.astype("uint8"),1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)
  edge_combined = np.zeros(edges.shape)
  try:
    for x in range(0, len(edge_history)):
      for x1,y1,x2,y2 in edge_history[x]:
        if np.abs(x1-x2)>5 and np.abs(y1-y2)>5:# we don't want edges in the border
          cv2.line(edge_combined,(x1,y1),(x2,y2),color = (255, 255, 255))
    edge_combined = cv2.dilate(edge_combined, kernel, iterations=1)
  except (RuntimeError, TypeError, NameError):
    print("no lines")
  return edge_combined

# Install detectron2

In [None]:
!git clone https://github.com/facebookresearch/detectron2
# install dependencies: 
!pip install pyyaml==5.1 pycocotools>=2.0.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

1.7.0+cu101 True
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [None]:
import torch
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

** fvcore version of PathManager will be deprecated soon. **
** Please migrate to the version in iopath repo. **
https://github.com/facebookresearch/iopath 

** fvcore version of PathManager will be deprecated soon. **
** Please migrate to the version in iopath repo. **
https://github.com/facebookresearch/iopath 



# Pre-trained detectron2 model

In [None]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
# Check classes information
MetadataCatalog.get(cfg.DATASETS.TRAIN[0])

In [None]:
import glob, numpy
all_frames = glob.glob("VIDEO FRAMES PATH/*.jpg") # frame1.jpg, frame2.jpg, etc
W = 10
w_count = 0
edge_rep = np.zeros((540, 960, W))

for count in np.arange(1,len(all_frames)): #each frame
  f_name = "/frame%d.jpg" %count

  """ segmentation """
  pil_image = PIL.Image.open(f_name).convert('RGB')
  img_original = numpy.array(pil_image)
  img_data = pil_to_tensor(pil_image)
  singleton_batch = {'img_data': img_data[None].cuda()}
  output_size = img_data.shape[1:]

  with torch.no_grad():
    scores = segmentation_module(singleton_batch, segSize=output_size)

  # Get the predicted scores for each pixel
  _, pred = torch.max(scores, dim=1)
  pred = pred.cpu()[0].numpy()
  # visualize_result(img_original, pred)

  # filter out other classes
  classes = [6, 11] #road, sidewalk
  pred_clean = pred.copy()
  pred_clean[~np.isin(pred_clean, classes)]= 0

  # filter out small islands
  pred_clean2 = morphology.remove_small_objects(pred_clean.astype(bool), min_size=16000).astype(int)*255

  # combine mask with correct class labels
  pred_clean3 = np.minimum(pred_clean, pred_clean2)

  # get structure edges and get only long ones
  image = Image.fromarray(np.uint8(pred_clean3 * 255) , 'L')
  image_edge = image.filter(ImageFilter.FIND_EDGES) 
  image_edge = np.array(image_edge)
  kernel = np.ones((10,10), np.uint8)
  image_edge = cv2.dilate(image_edge, kernel, iterations=1)

  minLineLength = 200
  maxLineGap = 1
  lines = cv2.HoughLinesP(image_edge,1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)
  edges = np.zeros(pred_clean3.shape)
  try:
    for x in range(0, len(lines)):
      for x1,y1,x2,y2 in lines[x]:
        if np.abs(x1-x2)>5 and np.abs(y1-y2)>5:# we don't want edges in the border
          cv2.line(edges,(x1,y1),(x2,y2),color = (255, 255, 255))
    edges = cv2.dilate(edges, kernel, iterations=1)
  except (RuntimeError, TypeError, NameError):
    print("no lines")

  ##############
  if count <= W: 
    edge_rep[:,:,count-1] = edges
  else:
    #update current edge
    hist_curr = np.concatenate([edge_rep, np.expand_dims(edges,2)], axis = 2)
    hist_curr = np.max(hist_curr, axis = 2)
    plt.imshow(hist_curr)
    hist_curr = cv2.erode(get_houghlines(hist_curr), np.ones((10,10)))
    plt.imshow(hist_curr)
    kernel2 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
    (thresh, binRed) = cv2.threshold(hist_curr, 0, 255, cv2.THRESH_BINARY)
    hist_curr = cv2.morphologyEx(hist_curr, cv2.MORPH_OPEN, kernel2, iterations=3)
    hist_curr = cv2.erode(get_houghlines(hist_curr), np.ones((10,10)))
    edges = hist_curr
  ##############


  """ detectron2 """
  im = cv2.imread(f_name)
  predictor = DefaultPredictor(cfg)
  outputs = predictor(im)
  instances = outputs["instances"]
  important_classes = [0,1,2,5,7] # person, bicycle, car, bus, train
  classes = instances.pred_classes if instances.has("pred_classes") else None
  if instances.has("pred_masks"):
      masks = np.asarray(instances.pred_masks.cpu().numpy())
  else:
      masks = None
  classes_fil = []
  for c in classes:
    if c in important_classes:
      classes_fil.append(1)
    else:
      classes_fil.append(0)
  if np.sum(classes_fil) == 0: #if no important objects, just show edges
    # masks_comb = np.zeros([540, 960])
    masks_comb = edges
  else:
    masks_idx = np.where(np.array(classes_fil) == 1)[0]
    masks_fil = masks[masks_idx,:,:]
    masks_comb = np.max(masks_fil, axis = 0)

  if os.path.isdir("detectron_mask")== False:
    os.mkdir("detectron_mask")

  print("frame %d" %count)
  plt.imshow(masks_comb, "gray")
  plt.axis("off")
  filename = "frame_%d_seg.jpg" %count
  plt.savefig("%detectron_mask/%s" %(filename), bbox_inches='tight', pad_inches=0)

print("finished")

## Make Videos

In [None]:
import cv2
import numpy as np
import moviepy.editor as mp
import glob
fps = 20
size = (334, 188)

vid_pathOut = "YOUR VIDEO OUTPUT NAME.avi" 
out = cv2.VideoWriter(vid_pathOut,cv2.VideoWriter_fourcc(*'DIVX'), fps, size)

allframes = glob.glob('IMAGES IN SEGMENTED FRAMES PATH/*.jpg')
for i in np.arange(1,len(allframes)):
  filename = "YOUR FRAME NAME" 
  img = cv2.imread(filename)
  out.write(img)

out.release()
clip = mp.VideoFileClip(vid_pathOut)
