In [1]:
%%bash
# Colab-specific setup
!(stat -t /usr/local/lib/*/dist-packages/google/colab > /dev/null 2>&1) && exit
pip install yacs 2>&1 >> install.log
git init 2>&1 >> install.log
git remote add origin https://github.com/jeongrok/semantic-segmentation-pytorch.git 2>> install.log
git pull origin master 2>&1 >> install.log
DOWNLOAD_ONLY=1 ./demo_test.sh 2>> install.log

hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>
From https://github.com/jeongrok/semantic-segmentation-pytorch
 * branch            master     -> FETCH_HEAD
 * [new branch]      master     -> origin/master


In [2]:
# System libs
import os, csv, torch, numpy as np, scipy.io, PIL.Image, torchvision.transforms, glob, matplotlib.pyplot as plt, cv2
# Our libs
from mit_semseg.models.models import ModelBuilder, SegmentationModule
from mit_semseg.utils import colorEncode

colors = scipy.io.loadmat('data/color150.mat')['colors']
names = {}
with open('data/object150_info.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        names[int(row[0])] = row[5].split(";")[0]

In [3]:
# Network Builders
net_encoder = ModelBuilder.build_encoder(
    arch='resnet50dilated',
    fc_dim=2048,
    weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth')
net_decoder = ModelBuilder.build_decoder(
    arch='ppm_deepsup',
    fc_dim=2048,
    num_class=150,
    weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth',
    use_softmax=True)

crit = torch.nn.NLLLoss(ignore_index=-1)
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
segmentation_module.eval()
segmentation_module.cuda()

Loading weights for net_encoder
Loading weights for net_decoder


SegmentationModule(
  (encoder): ResnetDilated(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): SynchronizedBatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): SynchronizedBatchNorm2d(128, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): SynchronizedBatchNorm2d(64, eps=1

In [4]:
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)
%cd /content/gdrive/My Drive/CV Project

Mounted at /content/gdrive/
/content/gdrive/My Drive/CV Project


In [5]:
# Load and normalize one image as a singleton tensor batch
pil_to_tensor = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], # These are RGB mean+std values
        std=[0.229, 0.224, 0.225])  # across a large photo dataset.
])

In [6]:
def readImage(img_name):
    pil_image = PIL.Image.open(img_name).convert('RGB')
    img_original = np.array(pil_image)
    img_data = pil_to_tensor(pil_image)
    singleton_batch = {'img_data': img_data[None].cuda()}
    output_size = img_data.shape[1:]
    with torch.no_grad():
        scores = segmentation_module(singleton_batch, segSize=output_size)
    # Get the predicted scores for each pixel
    _, model_pred = torch.max(scores, dim=1)
    model_pred = model_pred.cpu()[0].numpy()
    model_pred[model_pred != 0] = -1

    model_pred = colorEncode(model_pred, colors).astype(np.uint8)
    return img_original, model_pred

def resizeAndPad(img):
    height, width = img.shape[:2]
    scaled_height, scaled_width = height+2, width+2
    interpolate = cv2.INTER_CUBIC
    aspect = width/height
    if aspect > 1:
        new_width = scaled_width
        new_height = np.round(new_width/aspect).astype(int)
        pad_vertical = (scaled_height-new_height)/2
        pad_top, pad_bottom = np.floor(pad_vertical).astype(int), np.ceil(pad_vertical).astype(int)
        pad_left, pad_right = 0, 0
    elif aspect < 1:
        new_height = scaled_height
        new_width = np.round(new_height*aspect).astype(int)
        pad_horizontal = (scaled_width-new_width)/2
        pad_left, pad_right = np.floor(pad_horizontal).astype(int), np.ceil(pad_horizontal).astype(int)
        pad_top, pad_bottom = 0, 0
    else:
        new_height, new_width = scaled_height, scaled_width
        pad_left, pad_right, pad_top, pad_bottom = 0, 0, 0, 0
    pad_color = [255]*3
    scaled_img = cv2.resize(img, (new_width, new_height), interpolation=interpolate)
    scaled_img = cv2.copyMakeBorder(scaled_img, pad_top, pad_bottom, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=pad_color)
    return scaled_img

def getOutlineImg(img):
    blur = cv2.GaussianBlur(img, (3, 3), 0)
    sigma = np.std(blur)
    mean = np.mean(blur)
    lower = int(max(0, (mean - sigma)))
    upper = int(min(255, (mean + sigma)))
    canny = cv2.Canny(blur,lower,upper)
    # kernel = np.ones((5,5),np.uint8)
    # dilate = cv2.morphologyEx(canny, cv2.MORPH_CLOSE, kernel)
    return canny

def getColoredImage(img, new_color):
    hsv_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    h, s, v = cv2.split(hsv_img)
    hsv_color = cv2.cvtColor(np.uint8([[new_color]]), cv2.COLOR_RGB2HSV)
    h.fill(hsv_color[0][0][0])
    s.fill(hsv_color[0][0][1])
    new_hsv_img = cv2.merge([h, s, v])
    new_rgb_img = cv2.cvtColor(new_hsv_img, cv2.COLOR_HSV2RGB)
    return new_rgb_img

def selectWall(outline_img, position):
    wall = outline_img.copy()
    scaled_mask = resizeAndPad(outline_img)
    for p in position:
        cv2.floodFill(wall, scaled_mask, p, 255)
    cv2.subtract(wall, outline_img, wall)
    return wall

def getSamples(mask):
    gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    h,w = gray.shape
    a = min(h//30, w//30)
    population = np.argwhere(gray == 120)
    s = np.log2(len(population)).astype(int)*5
    i = np.random.choice(len(population), s, replace=False)
    final_inds = []
    for j in i:
        trials = [[population[j][0]-a, population[j][1]-a], [population[j][0]-a, population[j][1]+a], [population[j][0]+a, population[j][1]-a], [population[j][0]+a, population[j][1]+a]]
        trials = np.array(trials)
        # print(population[j])
        d = np.where(np.all(trials[:,None,:] == population, axis=2))
        if len(d[0]) == 4:
            final_inds.append(population[j])
        # if population[j][0]+10 in population and population[j]-10 in population:
        #     final_inds.append(population[j])
    final_inds = [(y,x) for x,y in final_inds]
    # print(len(final_inds))
    return final_inds

def mergeMasks(mask1, mask2):
    _, mask = cv2.threshold(mask1, 50, 255, cv2.THRESH_BINARY)
    mask_inv = cv2.bitwise_not(mask)
    mask1 = cv2.bitwise_and(mask1, mask1, mask=mask)
    mask2 = cv2.bitwise_and(mask2, mask2, mask=mask_inv)
    img = cv2.bitwise_or(mask1, mask2)
    # plt.imshow(img)
    # plt.show()
    return img

def mergeImages(img, colored_img, wall):
    colored_img = cv2.bitwise_and(colored_img, colored_img, mask=wall)
    marked_img = cv2.bitwise_and(img, img, mask=cv2.bitwise_not(wall))
    final_img = cv2.bitwise_xor(colored_img, marked_img)
    return final_img

def saveImage(img_name, img):
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img_name= img_name[8:]
    cv2.imwrite("./resnetcv/" + img_name, img)

def combine_images(img, edge):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    overlay = cv2.addWeighted(img, 0.7, edge, 0.3, 0)
    # plt.imshow(overlay)
    # plt.show()
    return overlay

def getCircles(img, inds):
    for i in inds:
        cv2.circle(img, i, 10, (255,0,0), -1)
    # plt.imshow(img)
    # plt.show()

def changeColor(img_name, new_color):
    img_original, model_pred = readImage(img_name)
    colored_img = getColoredImage(img_original, new_color)
    outline_img = getOutlineImg(img_original)
    pred_canny = getOutlineImg(model_pred)
    merge_mask = mergeMasks(pred_canny, outline_img)
    # overlay = combine_images(model_pred, outline_img)
    inds = getSamples(model_pred)
    # getCircles(overlay, inds)
    selected_wall = selectWall(merge_mask, inds)
    final_img = mergeImages(img_original, colored_img, selected_wall)
    saveImage(img_name, final_img)
    return final_img, selected_wall

def stats(pred_mask, true_mask):
    total_pixels = pred_mask.shape[0] * pred_mask.shape[1]
    pred_mask = np.where(pred_mask == 255, 1, 0)
    true_mask = np.where(true_mask == 255, 1, 0)
    accuracy = np.sum(pred_mask == true_mask) / total_pixels
    precision = np.sum(pred_mask & true_mask) / np.sum(pred_mask)
    recall = np.sum(pred_mask & true_mask) / np.sum(true_mask)
    return accuracy, precision, recall

def combine_masks(masks):
    combined_mask = masks[0]
    for mask in masks[1:]:
        combined_mask = cv2.bitwise_or(combined_mask, mask)
    return combined_mask



In [7]:
new_color = [111, 209, 201]
total_accuracy = 0
total_precision = 0
total_recall = 0
for i in range(1,101):
    m = 'bedroom/' + str(i) + 'wall*'
    masks = glob.glob(m)
    j = 'bedroom/' + str(i) + '_img*'
    img = glob.glob(j)
    masks = [cv2.imread(mask) for mask in masks]
    masks = [cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) for mask in masks]
    final_mask = masks[0]
    if len(masks) > 1:
        for mask in masks[1:]:
            final_mask = cv2.bitwise_or(final_mask, mask)

    final_img, wall = changeColor(img[0],new_color)
    # plt.imshow(final_img)
    # plt.show()
    acc, prec, rec = stats(wall, final_mask)
    print(acc)
    total_accuracy += acc
    total_precision += prec
    total_recall += rec
    if i % 20 == 0:
        print('iteration: ', i)

total_accuracy /= 100
total_precision /= 100
total_recall /= 100
print('Accuracy:', total_accuracy)
print('Precision: ', total_precision)
print('Recall: ', total_recall)
print('F1: ', 2 * (total_precision * total_recall) / (total_precision + total_recall))

0.6464501377410469
0.9735829402515723
0.9214324951171875
0.9776192708333333
0.9144159952799479
0.9258033532664609
0.9373484375
0.796712537435855
0.8419316609700521
0.9695262794723054
0.7767052341597797
0.9175456989247311
0.9544351697273233
0.8641901016235352
0.9427872721354167
0.9445471698113208
0.934844970703125
0.9009703703703704
0.93991875
0.9742634454069621
iteration:  20
0.9464778645833334
0.972707336523126
0.9146432552954292
0.8801103055090785
0.9271354166666667
0.9440736397748593
0.971156965648855
0.9423259259259259
0.8763170798898071
0.9744110107421875
0.95849609375
0.9412180532898314
0.921236914600551
0.7452243395611051
0.9848665659617322
0.9818391927083333
0.9460272988505747
0.9717442196531791
0.9176788330078125
0.9490536971830986
iteration:  40
0.8190364837646484
0.9242563971812802
0.877936914600551
0.9396812678062678
0.9810575938309537
0.9788165983606557
0.8923638222717978
0.9034499123831776
0.643635114034017
0.9650651041666667
0.9582964579264323
0.9022343272171254
0.975774

In [8]:
total_accuracy = 0
total_precision = 0
total_recall = 0
for i in range(1,101):
    m = 'bedroom/' + str(i) + 'wall*'
    masks = glob.glob(m)
    j = 'bedroom/' + str(i) + '_img*'
    img = glob.glob(j)
    masks = [cv2.imread(mask) for mask in masks]
    masks = [cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) for mask in masks]
    final_mask = masks[0]
    if len(masks) > 1:
        for mask in masks[1:]:
            final_mask = cv2.bitwise_or(final_mask, mask)

    _, wall = readImage(img[0])
    wall = cv2.cvtColor(wall, cv2.COLOR_BGR2GRAY)
    wall = np.where(wall == 120, 255, 0)
    acc, prec, rec = stats(wall, final_mask)
    total_accuracy += acc
    total_precision += prec
    total_recall += rec
    if i % 20 == 0:
        print('iteration: ', i)

total_accuracy /= 100
total_precision /= 100
total_recall /= 100
print('Accuracy:', total_accuracy)
print('Precision: ', total_precision)
print('Recall: ', total_recall)
print('F1: ', 2 * (total_precision * total_recall) / (total_precision + total_recall))

iteration:  20
iteration:  40
iteration:  60
iteration:  80
iteration:  100
Accuracy: 0.9144454188084155
Precision:  0.8206417537577808
Recall:  0.927590002006083
F1:  0.8708445931207238
