In [None]:
#Code to connect your google drive with google colaboratory
from google.colab import drive 
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import cv2 as cv
import numpy as np
import os
import random
from google.colab.patches import cv2_imshow

In [None]:
#Preparing the dataset
imagePath = []

trainingExamples = 'drive/MyDrive/MPAssignment/Data/BuildingSignageDetection/train'

for filename in os.listdir(trainingExamples):
  if filename.endswith(".jpg") or filename.endswith(".png"):
    imagePath.append(os.path.join(trainingExamples, filename))

valExamples = 'drive/MyDrive/MPAssignment/Data/BuildingSignageDetection/val'

for filename in os.listdir(valExamples):
  if filename.endswith(".jpg") or filename.endswith(".png"):
    imagePath.append(os.path.join(valExamples, filename))


In [None]:
testingImagePath = []

directory = 'drive/MyDrive/MPAssignment/Testing Data_withLabels/BuildingSinonageDetection'

for filename in os.listdir(directory):
  if filename.endswith(".jpg") or filename.endswith(".png"):
    testingImagePath.append(os.path.join(directory, filename))


for f in testingImagePath:
  print(f)

In [None]:
#preprocesssing
def preprocessing(image):

    #convert to hsv
    hsv = cv.cvtColor(image.copy(), cv.COLOR_BGR2HSV)

    #create upper and lower mask
    hsv_lower = np.array([0,0,163])
    hsv_upper = np.array([179,45,255])
    mask = cv.inRange(hsv, hsv_lower, hsv_upper)

    #remove mask from original image
    image = cv.bitwise_and(image.copy(), image.copy(), mask=mask)

    #dialate images
    kernel = cv.getStructuringElement(cv.MORPH_RECT, (3,3))
    image = cv.dilate(image , kernel, iterations=1)

    #convert to grayscale
    image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

    #smooth
    image = cv.GaussianBlur(image,(5,5),0)

    return image


In [None]:
def showContours(image, contours, text="Output Contours"):
    #copy image
    output_image = image.copy()
    
    #loop through contours
    for contour in contours:

        #get x,y,w,h of contour
        x, y, w, h = cv.boundingRect(contour)

        #draw contour on image
        cv.rectangle(output_image, (x, y), (x+w, y+h), (0, 255, 0), 1)

    #show image   
    cv.imshow(text,output_image)

In [None]:
def filterByAspectRatio(contours):

    filtered = []

    #loop through contours
    for contour in contours:

        #get width and height
        _, _, w, h = cv.boundingRect(contour)

        #calculate ratio
        hw_ratio = h / w

        #target ratio
        targetHeightWidthRatio = hw_ratio >= 1.3 and hw_ratio <= 3
        
        #check if ratio within target
        if targetHeightWidthRatio:
            filtered.append(contour)
    
    return filtered
        


In [None]:
def filterByArea(contours, threshold):

    filtered = []

    #loop through contours
    for contour in contours:

        #get width and height
        _, _, w, h = cv.boundingRect(contour)

        #calculate contour area
        contour_area = w*h

        #threshold
        minimum_area = threshold * 0.001
        maximum_area = threshold * 0.02
        
        #check area within threshold
        if maximum_area > contour_area > minimum_area:
            filtered.append(contour)
    
    return filtered 

In [None]:
def filterContourInsideContour(contours,hierarchy):

    filtered = []
    index = 0
    stats = []

    #loop through contours
    for contour in contours:

        #calculate center of contour
        M = cv.moments(contour)
        try:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
        except:

            #if center not detectable set to -1
            cX = -1
            cY = -1       

        stats.append([index,cX,cY])

        index = index + 1

    duplicate = []
    keep = []
    duplicateFound = False

    #loop through contours
    for stat in stats:
        
        #if stat not already to be found a duplicate
        if stat[0] not in duplicate:

            for second_stat in stats:
                
                #check if not the same contour
                if stat[0] != second_stat[0]:

                    #check if within x-axis 
                    if stat[1] - 2 < second_stat[1] < stat[1] + 2:
                        #check if within y-axis
                        if stat[2] - 2 < second_stat[2] < stat[2] + 2:
                            #check that second contour not in keep
                            if second_stat[0] not in keep:
                                #append
                                duplicate.append(second_stat[0])
                            
                            #keep larger contour
                            if stat[0] not in keep:
                                keep.append(stat[0])

                            duplicateFound = True
                            
                if not duplicateFound:
                    if stat[0] not in keep:
                        keep.append(stat[0])
                    duplicateFound = False
                        
    
    for stat in stats:
        if stat[0] not in duplicate:
            if stat[0] not in keep:
                keep.append(stat[0])

    for k in keep:

        filtered.append(contours[k])
    

    return filtered


In [None]:
def hasNeighbourHorizontally(contours):
    filtered = []


    index = 0
    stats = []
    for contour in contours:

        #calculate coordinates of contour
        x, y, w, h = cv.boundingRect(contour)
        M = cv.moments(contour)
        try:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
        except:
            cX = -1
            cY = -1  

        stats.append([index,cX,cY,x,y,w,h])

        index = index + 1
    
    hasNeighbour = []
    for stat in stats:
        for second_stat in stats:
            if(stat[0] != second_stat[0]):
                
                #check for contours neighbour 
                if((stat[1] < second_stat[1] and second_stat[1] < stat[1]+stat[5]*3) or stat[1]-stat[5]*3 < second_stat[1] and second_stat[1] < stat[1]):
                    if((stat[4] < second_stat[2] and second_stat[2] < stat[4]+stat[6])):
                            hasNeighbour.append([stat[0],second_stat[0]])

    filteredHasNeighbour = []
    for h in hasNeighbour:
        if h not in filteredHasNeighbour:
            filteredHasNeighbour.append(h)
    


    finalFilter = []

    #make sure other contour considers it a neighbour aswell
    for h in filteredHasNeighbour:

        c1 = h[0]
        n1 = h[1]
        
        if [c1,n1] not in filteredHasNeighbour:
            print("does not exist")
        else:
            finalFilter.append(c1)
            
    filteredFinalFilter = []

    for h in finalFilter:
        if h not in filteredFinalFilter:
            filteredFinalFilter.append(h)

    finalFilter = filteredFinalFilter

    for h in finalFilter:
        filtered.append(contours[h])
        
    return filtered


In [None]:
def inSecondContour(second_stat,x1,y1,x2,y2,x3,y3,x4,y4):

    inside = False

    checkXY = [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]

    #check all 4 points to see if in contour
    for xy in checkXY:
        

        if xy[0] > second_stat[1] and second_stat[1]+second_stat[3] > xy[0]:
            

            if xy[1] > second_stat[2] and second_stat[2]+second_stat[4] > xy[1]:
                inside = True

    return inside         

In [None]:
def removeOverlappingContours(contours):
    filtered = []

    index = 0
    stats = []
    for contour in contours:

        #get stats of contour
        x, y, w, h = cv.boundingRect(contour)

        stats.append([index,x,y,w,h])

        index = index + 1

    filteredContours = []
    exclude = []

    #check stats of contour against other contours
    for stat in stats:
        
        x1 = stat[1]
        y1 = stat[2]
        
        x2 = stat[1] + stat[3] 
        y2 = stat[2]

        x3 = stat[1] 
        y3 = stat[2] + stat[4] 

        x4 = stat[1] + stat[3] 
        y4 = stat[2] + stat[4] 

        for second_stat in stats:
            if stat[0] != second_stat[0]:

                if not inSecondContour(second_stat,x1,y1,x2,y2,x3,y3,x4,y4):

                    filteredContours.append(stat[0])
                else:
                   exclude.append(stat[0])
    f = []
    for c in filteredContours:
        if c not in f and c not in exclude:
            f.append(c)

    filteredContours = f

    for c in filteredContours:
        filtered.append(contours[c])

    return filtered

In [None]:
def filterBorderContours(contours,image_width,image_height):

    filtered = []

    index = 0
    stats = []
    for contour in contours:

        #get stats
        x, y, w, h = cv.boundingRect(contour)

        stats.append([index,x,y,w,h])

        index = index + 1
    
    exclude = []

    
    for stat in stats:
        
        #touching left hand side 
        if stat[1] <= 0:
            exclude.append(stat[0])

        #touching top
        if stat[2] <= 0:
            exclude.append(stat[0])

        #touching right hand side
        if image_width <= stat[1]+stat[3]:
            exclude.append(stat[0])

        #touching bottom
        if image_height <= stat[2]+stat[4]:
            exclude.append(stat[0])
    
    for stat in stats:
        if stat[0] not in exclude:
            filtered.append(contours[stat[0]])
    
    return filtered


In [None]:
def grab_second(index):
    return index[1]

In [None]:
def filterByMaximumRelativeArea(contours, tolerance=1.5):
    filtered = []

    index = 0

    stats = []
    for contour in contours:

        #get stats
        x, y, w, h = cv.boundingRect(contour)
        area = w*h

        stats.append([index,area])

        index = index + 1
       
    stats.sort(key=grab_second, reverse=True)

    #get the 3 largest contours
    largest3 = [stats[0],stats[1],stats[2]]
    

    #calculate average
    average = 0
    for stat in largest3:
        average = average + stat[1]

        
    average = int(average/3)

    maximum_average = int(average * tolerance)

    filteredContours = []

    for contour in contours:
        x, y, w, h = cv.boundingRect(contour)
        area = w*h
        
        #check within max average
        if area <= maximum_average:

            filtered.append(contour)

    return filtered

In [None]:
def filterByMinimumRelativeArea(contours, tolerance="0.9"):
    filtered = []

    index = 0

    stats = []
    for contour in contours:

        #get stats
        x, y, w, h = cv.boundingRect(contour)
        area = w*h

        stats.append([index,area])

        index = index + 1


       
    stats.sort(key=grab_second, reverse=True)

    #get largest
    largest3 = [stats[0],stats[1],stats[2]]
    

    #calculate average
    average = 0 
    for stat in largest3:
        average = average + stat[1]
        
    average = int(average/3)

    min_average = int(average * tolerance)

    filteredContours = []
    for contour in contours:
        x, y, w, h = cv.boundingRect(contour)
        area = w*h
        

        #check larger than min average
        if min_average <= area:
            filtered.append(contour)

    return filtered

In [None]:
def FilterByRelativeHeight(contours,tolerance=1.5):

    filtered = []

    index = 0

    stats = []
    for contour in contours:

        #get stats
        x, y, w, h = cv.boundingRect(contour)
        stats.append([index,w,h])

        index = index + 1

    stats.sort(key=grab_second, reverse=True)

    #get 3 largest contours
    largest3 = [stats[0],stats[1],stats[2]]
    
    #calculate average height
    average = 0
    for stat in largest3:
        average = average + stat[2]
        
    average = int(average/3)

    max_average = int(average * tolerance)

    filteredContours = []
    for contour in contours:
        x, y, w, h = cv.boundingRect(contour)
        
        #check height within max average height
        if h < max_average:
            filtered.append(contour)

    return filtered



In [None]:
def final3Selection(contours):


    filtered = []

    index = 0

    stats = []
    for contour in contours:

        #get stats
        x, y, w, h = cv.boundingRect(contour)
        area = w*h
        
        stats.append([index,h,y])

        index = index + 1

    filteredHeight = []
    mostCommonContour = []
    max_counter = 0

    #check which is the most common height
    for stat in stats:
        counter = 0
        for second_stat in stats:
            if stat[0] != second_stat[0]:
                if stat[1]-(stat[1]*0.25) < second_stat[1] < stat[1]+(stat[1]*0.25):
                    counter = counter + 1
        
        if counter > max_counter:
            mostCommonContour = stat
            max_counter = counter

    index = 0

    #filter by most common height
    for contour in contours:
        x, y, w, h = cv.boundingRect(contour)

        tolerance = mostCommonContour[1]*0.25
        if mostCommonContour[1]- tolerance < h < mostCommonContour[1] + tolerance:
            filteredHeight.append(index)
            
        index = index + 1        
    


    filteredYAxis = []
    mostCommonContour = []
    max_counter = 0

    #check which is the most common y-axis value
    for stat in stats:
        counter = 0
        for second_stat in stats:

            if stat[0] != second_stat[0]:

                if stat[2]-(stat[2]*0.25) < second_stat[2] < stat[2]+(stat[2]*0.25):
                    counter = counter + 1
        
        if counter > max_counter:
            mostCommonContour = stat
            max_counter = counter

    index = 0

    #filter contours by y-axis value
    for contour in contours:
        x, y, w, h = cv.boundingRect(contour)

        tolerance = mostCommonContour[2]*0.25
        if mostCommonContour[2]- tolerance < y < mostCommonContour[2] + tolerance:
            filteredYAxis.append(index)

        index = index + 1 


    for i in filteredHeight:
        if i in filteredYAxis:
            filtered.append(contours[i])
            

    #if length of contours is greater than 3 then grab the 3 largest
    if len(filtered) > 3:

        stats = []
        index = 0
        for contour in filtered:
            x, y, w, h = cv.boundingRect(contour)
            area = w*h

            stats.append([index,area])
            index = index+1

        stats.sort(key=grab_second, reverse=True)

        largest3 = [stats[0],stats[1],stats[2]]

        fFiltered = []
        for c in largest3:
            fFiltered.append(filtered[c[0]])

        filtered = fFiltered
    
    return filtered

In [None]:
def extractDigits(original_image,contours):
    digits = []
    sign = []

    minX = -1
    maxX = -1
    minY = -1
    maxY = -1
    for contour in contours:
        image = original_image.copy()
        x, y, w, h = cv.boundingRect(contour)

        if minX == -1 or x < minX:
            minX = x
        
        if maxX == -1 or x+w > maxX:
            maxX = x+w

        if minY == -1 or y < maxY:
            minY = y
        
        if maxY == -1 or maxY < y+h:
            maxY = y+h

        digit = image[y:y+h,x:x+w]
        digits.append(digit)
    
    image = original_image.copy()
    
    #add buffer
    minX = int(minX * 0.95)
    minY = int(minY * 0.95)
    maxX = int(maxX * 1.05)
    maxY = int(maxY * 1.05)
    sign = image[minY:maxY,minX:maxX]

    i = 0
    for digit in digits:
        text = "Digit: {}".format(i)
        cv2_imshow(digit)
        i = i + 1

    cv2_imshow(sign)
    

    return digits,sign
    
        


In [None]:
def drawDigits(original_image,contours):
    digits = []
    sign = []
    image = original_image.copy()

    minX = -1
    maxX = -1
    minY = -1
    maxY = -1
    for contour in contours:
        
        x, y, w, h = cv.boundingRect(contour)

        if minX == -1 or x < minX:
            minX = x
        
        if maxX == -1 or x+w > maxX:
            maxX = x+w

        if minY == -1 or y < maxY:
            minY = y
        
        if maxY == -1 or maxY < y+h:
            maxY = y+h

        cv.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
    
    
    #add buffer
    minX = int(minX * 0.95)
    minY = int(minY * 0.95)
    maxX = int(maxX * 1.05)
    maxY = int(maxY * 1.05)

    cv.rectangle(image, (minX, minY), (maxX, maxY), (0, 0, 255), 1)

    cv2_imshow(image)
    
    return image
    
        


In [None]:
#detectBuildingNumber

def detectBuildingNumber(image, refine=False):
    
    original_image = image.copy()
    image_width = image.shape[0]
    image_height = image.shape[1]
    image_area = image_width * image_height
    backup_contours = []
    skip_rest = False

    image = preprocessing(image)

    #cv.imshow("processed image",image)

    canny_output = cv.Canny(image, 150, 255) 

    #cv.imshow("edges ",canny_output)

    
    if refine:
        #sharpen image
        kernel = np.array([[0, -1, 0],
                   [-1, 5,-1],
                   [0, -1, 0]])
        image_sharp = cv.filter2D(src=image, ddepth=-1, kernel=kernel)

        #re-run canny
        image = preprocessing(image)

        canny_output = cv.Canny(image, 200, 255) 



    #get contours
    contours, hierarchy = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

    #showContours(original_image,contours,text="unprocessed")
    backup_contours = contours

    contours = filterByAspectRatio(contours)

    if(len(contours) < 3):
        contours = backup_contours
        skip_rest = True

    backup_contours = contours

    #showContours(original_image,contours,text="Filtered by aspect ratio")
    if not skip_rest:
        contours = filterByArea(contours,image_area)

    if(len(contours) < 3):
        contours = backup_contours
        skip_rest = True

    backup_contours = contours

    #showContours(original_image,contours,text="filterByArea")
    if not skip_rest:
        contours = filterContourInsideContour(contours,hierarchy)

    if(len(contours) < 3):
        contours = backup_contours
        skip_rest = True

    backup_contours = contours

    #showContours(original_image,contours,text="filterContourInsideContour")

    if not skip_rest:
        contours = hasNeighbourHorizontally(contours)

    #showContours(original_image,contours,text="has Neighbours (just before relative checking)")
    
    if(len(contours) < 3):
        contours = backup_contours
        skip_rest = True


    previousNumberOfContours = -1

    #print("Starting relative checking")

    while not skip_rest:

        if not skip_rest:
            contours = filterByMaximumRelativeArea(contours, tolerance=1.5)

        
        #showContours(original_image,contours,text="1")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours

        if not skip_rest:
            contours = hasNeighbourHorizontally(contours)

        #showContours(original_image,contours,text="has neighbour 2")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours
        
        if not skip_rest:
            contours = removeOverlappingContours(contours)
        
        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours

        #showContours(original_image,contours,text="removeOverlappingContours")

        if not skip_rest:
            contours = hasNeighbourHorizontally(contours)

        #showContours(original_image,contours,text="has neighbour 3")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours


        if not skip_rest:
            contours = filterBorderContours(contours,image_width,image_height)


        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours
        
        #showContours(original_image,contours,text="filterBorderContours")

        if not skip_rest:
            contours = filterByMinimumRelativeArea(contours, tolerance=0.6)

        #showContours(original_image,contours,text="2")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours

        if not skip_rest:
            contours = hasNeighbourHorizontally(contours)

        #showContours(original_image,contours,text="has neighbour 4")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours

        if not skip_rest:
            contours = FilterByRelativeHeight(contours, tolerance = 1.2)

        #showContours(original_image,contours,text="4")

        if(len(contours) < 3):
            contours = backup_contours
            skip_rest = True
        if(len(contours) == 3):
            skip_rest = True

        backup_contours = contours


        if len(contours) == previousNumberOfContours:
            skip_rest = True
            previousNumberOfContours = -1
        else:
            previousNumberOfContours = len(contours)


    #showContours(original_image,contours,text="hasNeighbour")

    #print("filtered contour length : ",len(contours))

    contours = final3Selection(contours)

    #showContours(original_image,contours,text="Final 3")

    digits, sign = extractDigits(original_image,contours)


    image = drawDigits(original_image,contours)

    return image

In [None]:
#read all images and detect the building numbers

for path in imagePath:

    print(path)
    
    image = cv.imread(path)

    width = image.shape[0]
    height = image.shape[1]

    ratio = height/width

    newWidth = 1000
    newHeight = int(newWidth * ratio)

    resized_image = cv.resize(image, (newHeight,newWidth), interpolation=cv.INTER_CUBIC)

    #cv.imshow(path,resized_image)
    
    detectBuildingNumber(resized_image)

In [None]:
#read all images and detect the building numbers

for path in testingImagePath:

    print(path)
    
    image = cv.imread(path)

    width = image.shape[0]
    height = image.shape[1]

    ratio = height/width

    newWidth = 1000
    newHeight = int(newWidth * ratio)

    resized_image = cv.resize(image, (newHeight,newWidth), interpolation=cv.INTER_CUBIC)
    
    image = detectBuildingNumber(resized_image)
