In [1]:
import cv2
import numpy as np
import glob
import itertools
import pickle
import sklearn.ensemble as en
import time
np.seterr(all="ignore")
import os

In [2]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    #this function uses a generator function to spit out lists according to size of chunk and
    # a given range
    for i in range(0, len(l), n):
        yield l[i:i+n]


def cubeRangeMaker(img, cubesize):
    #This function builds the ranges needed to extract cubes from an input matrix
    width, height, _= img.shape
    
    #Determine number of cubes that can be from matrix
    splitIntoW = round(width / 15) - 1
    splitIntoH = round(height / 15) - 1 
    start = 0
    end = cubesize
    vekW = []
    
    #Use number of cubes snd cube length to loop over a range builder
    for i in range(splitIntoW):
        vekW.append(range(start,end))
        end = end + 15
        start = start + 15
    start = 0
    end = cubesize
    vekH = []
    for i in range(splitIntoH):
        vekH.append(range(start,end))
        end = end + 15
        start = start + 15
    #return list of height and width range vectors
    return vekH, vekW

def cubeListr(img, vekH, vekW):
    ## Use with output from cubeRangeMaker to return cut up parts of the image
    output = []
    for i in range(len(vekH)):
        for j in range(len(vekW)):
            output.append(img[np.ix_(vekW[j], vekH[i])])
    return output

def cubeifiy(img, cubesize):
    #simplifed function for extracting cubes from a image
    VekH, VekW = cubeRangeMaker(img, cubesize)
    output = cubeListr(img, VekH, VekW)
    return output

def cubeifiyWvek(img, cubesize):
    #Used if function following it needs to determine location of cubes used
    #For example: determing which cubes are sky
    VekH, VekW = cubeRangeMaker(img, cubesize)
    output = cubeListr(img, VekH, VekW)
    return output, VekH, VekW

def cubeifiyPatchList(patchlist, cubesize):
    grandCubeList = []
    #this function has a list of cropped images as input and then
    #cuts them into desired size (here 15 by 15) by first
    #turning them into a list of lists and then flattening
    #to just a list, here I use it to prepare training data
    for i in range(len(skyPatchs)):
        grandCubeList.append(cubeifiy(patchlist[i], cubesize))
    grandCubeList = list(itertools.chain.from_iterable(grandCubeList))
    
    return grandCubeList

def HSVmeanz(img):
    #Extracts the mean values from the HSV color channels
    HSVimg = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    Hmean, Smean, Vmean = np.mean(HSVimg[:,:,0]), np.mean(HSVimg[:,:,1]), np.mean(HSVimg[:,:,2])
    
    return Hmean, Smean, Vmean

def sifty(img):
    #Extracts sift descriptor for an input 15x15 cube
    
    #Converts it to grayscale first as SIFT descriptor can only work with that
    gryImg = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    sift = cv2.xfeatures2d.SIFT_create()
    _, g = sift.compute(gryImg,[cv2.KeyPoint(x = 7.5, y = 7.5, _size = 2)])
    return g.ravel()

def HSVnSiftExtract(cubeList):
    #simplifed function to extract sky/non-sky classifer feature vectors
    cubeAnalysisVectors = []
    for i in range(len(cubeList)):
         x = cubeList[i]
         cubeAnalysisVectors.append(np.hstack((np.array(HSVmeanz(x)), sifty(x))))
    return cubeAnalysisVectors
    
def multiSplitter(img, sectionVec, sectionNum):
    width, height= img.shape
    sectSizeW, sectSizeH = width / sectionNum, height / sectionNum
    heightSections = list(chunks(range(0, height), round(height/sectionNum)))
    y = []
    x = []
    secLen = np.zeros([sectionNum, 2])
    #this func creates lists of chunks for x and y for each section
    #by appending and return where these lists split, a for loop can do operations on
    #all sections
    
    for i in range(sectionNum):
        yTem = list(chunks(heightSections[i], round(len(heightSections[i])/sectionVec[i])))
        secLen[i,0] = len(yTem)
        y.extend(yTem)
        
        xTem = list(chunks(range(0,width), round(width/sectionVec[i])))
        secLen[i,1] = len(xTem)
        x.extend(xTem)

    return x, y, secLen

def medianExtractor(secLen, darkChannelImg, x,y):
     #This function loops over every chunk in each section and calculated the median
    #from the image return a vector of dark channel median scores
    #returns 96 rather than original study 84 as rounds up
    #could fix this if an issue
    
    #Extracts median of dark channel to measure haze levels
    hazeVec = np.zeros(92)
    z = 0
    for cnt, p in enumerate(secLen):
        xSecLen = p[1]
        ySecLen = p[0]
        for i in range(np.int(ySecLen)):

            for j in range(np.int(xSecLen)):

                hazeVec[z] = np.median(darkChannelImg[np.ix_(y[i],x[j])].ravel())
                z = z + 1
               
    
    return hazeVec

def darkChannelBuilder(img):
    #Builds dark channel from input img matrix, Where dark channel is determined as
    #the lowest value from each color channel in a patch
    #This function creates the dark channel across the whole image
    return np.min((img[:,:,0].ravel(), img[:,:,1].ravel(), img[:,:,2].ravel()), axis = 0).reshape(512,512)

def HazePipe(img):
    #Splits an image into 3 sections with the bottom of the image getting the most
    ##due to the higher variability of haze at closers distances
    #of which the bottom of an image usually is
    
    #This function essentially returns the feature vector focused on 
    #detecting haze from inout img
    sectionSplit = [2,4,8]
    darkChannel = darkChannelBuilder(img)
    x, y, secLen  = multiSplitter(darkChannel, sectionSplit, len(sectionSplit))
    return medianExtractor(secLen, darkChannel, x, y)
   
def HazeListExtractor(imgList):
    #Simplifies HazePipe by allowing input for list
    hazeList = []
    for i in range(len(imgList)):
        hazeList.append(HazePipe(imgList[i]))
    return hazeList



def contFeVe(img):
    #extracts 171 dim contrast feature vector
    
    #convert color space to HSV
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    #extract saturation colorspace
    sat = img[:,:,1].ravel()
    
    #initalize percentile vector
    percenttileList = np.zeros([20])
    n = 0
    #create percentile vector
    for i in range(5, 105, 5):
        percenttileList[n] = np.percentile(sat, i)
        n = n + 1
    
    #convert all 0s to 1s to avoid infs during divison
    percenttileList[percenttileList == 0] = 1
    
    #initalize output vector
    satM = np.ones(171)
    
    #divde all percentiles by those below it
    n = 0
    for i in range(len(percenttileList)):
    
        for j in range(i-1):
            satM[n] = percenttileList[i] / percenttileList[j]
            if satM[n] == np.inf:
                print('nans about', n)
            n = n + 1
    return satM
   
#####Functions to test success
        
def skyTestrWHist(mixedPaths, carryOnLength):
    for z in range(carryOnLength):
        test = cv2.imread(mixedPaths[(z)])
        test = cv2.resize(test, (512, 512))
        testcopy = test
        testCubes, vekH, vekW = cubeifiyWvek(test, 15)
        testCubesEtrctd = HSVnSiftExtract(testCubes)
        predza = rf.predict_proba(testCubesEtrctd)
        predz = rf.predict(testCubesEtrctd)
        predza = (predza > 0.7) * 1
        counta = 0


        counta = 0

        rfMarker = np.zeros(test.shape[0:2], np.uint8)


        for i in range(len(vekH)-1):
            for j in range(len(vekW)-1):
                if predza[counta,1] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 1
                if predza[counta,0] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 0
                    
                counta = counta + 1 
        counta = 0
        aa = histoExtraktor(testcopy, rfMarker)
        text = rfHist.predict(aa)
        if text == 1:
            printText = 'Dull Sky'
        if text == 0:
            printText = 'Sunny Sky'
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(testcopy,printText,(10,500), font, 2,(255,255,255),2,cv2.LINE_AA)
        cv2.imshow('na', testcopy)
        time.sleep(3)
        cv2.destroyAllWindows()
        
####Functions regarding the sky histo 
        
def histoExtraktor(img, rfMarker):
    
    kernel = np.ones((20,20),np.uint8)
    rfMarker = cv2.erode(rfMarker, kernel)
    imgA = img * rfMarker[:,:,np.newaxis]
    aa = []
    for i in range(3):
        x, _ = np.histogram(imgA[:,:,i], 50, range=(1,255))
        aa.append(x)

    img = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    img = img * rfMarker[:,:,np.newaxis]
    for i in range(3):
        x, _ = np.histogram(img[:,:,i], 50, range=(1,255))
        aa.append(x)
    
    aa = np.array(aa).ravel()

    return aa

def histoPipeLine(img):
        testCubes, vekH, vekW = cubeifiyWvek(img, 15)
        testCubesEtrctd = HSVnSiftExtract(testCubes)
        predza = rf.predict_proba(testCubesEtrctd)
        predz = rf.predict(testCubesEtrctd)
        predza = (predza > 0.7) * 1
        counta = 0


        counta = 0

        rfMarker = np.zeros(img.shape[0:2], np.uint8)


        for i in range(len(vekH)-1):
            for j in range(len(vekW)-1):
                if predza[counta,1] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 1
                if predza[counta,0] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 0
                    
                counta = counta + 1 
        counta = 0
        skyHist = histoExtraktor(img, rfMarker)
        return rfHist.predict_proba(skyHist)
    

def SkytoHist(patch):
    #Turns a sky patch into histogram vector for analysis
    rfMarker = np.ones(patch.shape[0:2], np.uint8)
    aa = histoExtraktor(patch, rfMarker)
    return aa
    
def SkytoHistList(patchList):
    #Converts list of sky patch arrays into Histograms
    HistoListo = []
    for i in range(len(patchList)):
        
        HistoListo.append(SkytoHist(patchList[i]))
    
    return HistoListo








def HistToRF(patchSun, patchCloud):
    y = np.concatenate((np.ones((len(patchSun))), np.zeros((len(patchCloud)))), 0)
    X = np.concatenate((patchCloud, patchSun), 0)
    return X,y



##preperation for putting into a model


def randomForestPrep(type1, type2 = 0, mixed = False):
    #prepares a dataset for random forest by concatanateing lists of vectors with the 0s and 1s needed for binary
    #classification, returns X, y ready for model fitting
    
    def mixedPathClassifer(X):
        #Builds an alternating 1 and 0s list matching the mixed paths classification
        y = []
        for i in range(len(X)):
            if (i+1) % 2 == 0:
                y.append(1)
            else:
                y.append(0)
                
        return y
    
    if mixed == False:
        
        y = np.concatenate((np.ones((len(type1))), np.zeros((len(type2)))), 0)
        X = np.concatenate((type1, type2), 0)
        
    else: 
        X = type1
        y = mixedPathClassifer(X)
        
    return X,y

def trainingDataExtractor(fileList, howMany):
    #Extracts img arrays from a file list
    imgList = []
    for i in range(howMany):
        img = cv2.imread(fileList[i])
        cv2.resize(img, (512, 512))
        imgList.append(img)
        
    return imgList

def trainingDataExtractSampler(fileList, fromWhere, toWhere):
    #Extracts img arrays from a file list within a certain range
    imgList = []
    for i in range(fromWhere, toWhere):
        img = cv2.imread(fileList[i])
        if img.shape[0] + img.shape[1] < 2000:
            
            cv2.resize(img, (512, 512))
            imgList.append(img)
        
    return imgList


        
def fullPipelineTestr(mixedPaths, carryOnLength):
    for z in range(carryOnLength):

        test = cv2.imread(mixedPaths[(z)])
        test = cv2.resize(test, (512, 512))
        testcopy = test
        #cv2.imshow('na', test)
        

        
        testVec = imgPipeline(test)
        
        NightResult = rfNight.predict(testVec)
        
        if NightResult == 1:
            printText1 = 'Is Night'
            printText3 = ' '
            printText4 = ' '


        
        if NightResult == 0:
            
            printText1 = 'Not Night'


            
            snowResult = rfSnow.predict(testVec)
        
        
        
            if snowResult == 1:
                printText3 = 'Snow Found'
                printText4 = ' '




        
            if snowResult == 0:
            
                printText3 = 'Not Snow'



            
                sunResult = rfMain.predict(testVec)
            
                if sunResult == 1:
                    printText4 = 'Sunny/Bright'
        
                if sunResult == 0:
                    printText4 = 'Cloudy/Overcast'
                


        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(test,printText1,(10,100), font, 2,(0,255,0),3,cv2.LINE_AA)
        cv2.putText(test,printText3,(10,250), font, 2,(0,255,0),3,cv2.LINE_AA)
        cv2.putText(test,printText4,(2,400), font, 1,(0,255,0),3,cv2.LINE_AA)
        cv2.imshow('na', test)    


        time.sleep(3)
        cv2.destroyAllWindows()
        


def extrAnaSave(pipelinefunction, ExtractionSize, BatchSize, folderName):
    #Extracts images, turns them into vectors, then readies them for a model through pipelinefunction
    #THEN saves them
    fileListChunkz = list(chunks(range(ExtractionSize*2), BatchSize))
    
    #creates direction to save files
    wd = os.getcwd()
    saveDir = os.path.join(wd, folderName)
    
    if os.path.exists(saveDir) == False:
        #make folder if it doesnt already exist
        os.makedirs(saveDir)
        
    g = 0 
    
    for i in fileListChunkz:
        imgMatrixChunks = trainingDataExtractSampler(mixedPaths, min(i), max(i))
        analyzedChunks = pipelinefunction(imgMatrixChunks)
        
        #delete each section after use to avoid ram build up (but not ones that are pointing to later ones)
        del imgMatrixChunks[:]
        
        _, y = randomForestPrep(analyzedChunks, mixed = True)
        X = analyzedChunks
        
        #pickle dump files with ranges written on to increase readability
        pickle.dump(y, open(os.path.join(folderName, folderName + str(min(i)) + 'to'+ str(max(i)) + 'y' ), "wb"))
        np.save(os.path.join(folderName, folderName + str(min(i)) + 'to'+ str(max(i)) + 'X.npy' ), np.asarray(X))

        
        g = g + 1
        
        if g == 1000:
            print(str(max(i)) + ' completed.')
            g = 0
    
        del X[:], y[:]


def extrAnaSaveDeluxe(pipelinefunction, ExtractionSize, BatchSize, folderName, path):
    #Extracts images, turns them into vectors, then readies them for a model through pipelinefunction
    #THEN saves them, works for image paths that are solely one type e.g. snow or night
    fileListChunkz = list(chunks(range(ExtractionSize), BatchSize))
    
    #creates direction to save files
    wd = os.getcwd()
    saveDir = os.path.join(wd, folderName)
    
    if os.path.exists(saveDir) == False:
        #make folder if it doesnt already exist
        os.makedirs(saveDir)
    
    g = 0 
    
    for i in fileListChunkz:
        imgMatrixChunks = trainingDataExtractSampler(path, min(i), max(i))
        analyzedChunks = pipelinefunction(imgMatrixChunks)
        
        #delete each section after use to avoid ram build up (but not ones that are pointing to later ones)
        del imgMatrixChunks[:]
        
        X = analyzedChunks
        
        #pickle dump files with ranges written on to increase readability
        np.save(os.path.join(folderName, folderName + str(min(i)) + 'to'+ str(max(i)) + 'X.npy' ), np.asarray(X))

        g = g + 1
        
        if g == 1000:
            print(str(max(i)) + ' completed.')
            g = 0
    
        del X[:]

    
    
    
    
    
def imgPipeline(img):
    hazeVec = HazePipe(img)
    contraVec = contFeVe(img)
    skyPredVec = histoPipeLine(img)
    #print(hazeVec.shape, contraVec.shape, skyPredVec.ravel())
    
    #testing for Infs Nd Nans
    for name in ['hazeVec', 'contraVec', 'skyPredVec']:
        if np.sum(np.isinf(eval(name))) > 0:
                  print(name,  'has', np.sum(np.isinf(eval(name))),  'infs')
       
    return np.concatenate((hazeVec, contraVec, skyPredVec.ravel()), axis=0)

def imgPipeLineList(imgList):
    DataOutput = []
    for i in range(len(imgList)):
        DataOutput.append(imgPipeline(cv2.resize(imgList[i], (512,512))))
    
    return DataOutput

def testFuncPipeLineList(imgList, func):
    DataOutput = []
    for i in range(len(imgList)):
        DataOutput.append(func(cv2.resize(imgList[i], (512, 512))))
    
    return DataOutput

#Misc. Functions


def Rtable(x):
    
    #Imitation of the R table() function
    
    x = x.ravel()
    y = np.bincount(x)
    ii = np.nonzero(y)[0]

    return np.vstack((ii,y[ii])).T


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    #this function uses a generator function to spit out lists according to size of chunk and
    # a given range
    for i in range(0, len(l), n):
        yield l[i:i+n]


def cubeRangeMaker(img, cubesize):
    width, height, _= img.shape
    splitIntoW = round(width / 15) 
    splitIntoH = round(height / 15) 
    start = 0
    end = cubesize
    vekW = []
    for i in range(splitIntoW):
        vekW.append(range(start,end))
        end = end + 15
        start = start + 15
    start = 0
    end = cubesize
    vekH = []
    for i in range(splitIntoH):
        vekH.append(range(start,end))
        end = end + 15
        start = start + 15
    
    return vekH, vekW

def cubeListr(img, vekH, vekW):
    ## Use with output from cubeRangeMaker to return cut up parts of the image
    output = []
    for i in range(len(vekH)-1):
        for j in range(len(vekW)-1):
            output.append(img[np.ix_(vekW[j], vekH[i])])
    return output

def cubeifiy(img, cubesize):
    VekH, VekW = cubeRangeMaker(img, cubesize)
    output = cubeListr(img, VekH, VekW)
    return output

def cubeifiyWvek(img, cubesize):
    VekH, VekW = cubeRangeMaker(img, cubesize)
    output = cubeListr(img, VekH, VekW)
    return output, VekH, VekW

def cubeifiyPatchList(patchlist, cubesize):
    grandCubeList = []
    #this function has a list of cropped images as input and then
    #cuts them into desired size (here 15 by 15) by first
    #turning them into a list of lists and then flattening
    #to just a list
    for i in range(len(skyPatchs)):
        grandCubeList.append(cubeifiy(patchlist[i], cubesize))
    grandCubeList = list(itertools.chain.from_iterable(grandCubeList))
    
    return grandCubeList

def HSVmeanz(img):
    
    HSVimg = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    Hmean, Smean, Vmean = np.mean(HSVimg[:,:,0]), np.mean(HSVimg[:,:,1]), np.mean(HSVimg[:,:,2])
    
    return Hmean, Smean, Vmean

def sifty(img):
    gryImg = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    sift = cv2.xfeatures2d.SIFT_create()
    _, g = sift.compute(gryImg,[cv2.KeyPoint(x = 7.5, y = 7.5, _size = 2)])
    return g.ravel()

def HSVnSiftExtract(cubeList):
    cubeAnalysisVectors = []
    for i in range(len(cubeList)):
         x = cubeList[i]
         cubeAnalysisVectors.append(np.hstack((np.array(HSVmeanz(x)), sifty(x))))
    return cubeAnalysisVectors
    
def multiSplitter(img, sectionVec, sectionNum):
    width, height= img.shape
    sectSizeW, sectSizeH = width / sectionNum, height / sectionNum
    heightSections = list(chunks(range(0, height), round(height/sectionNum)))
    y = []
    x = []
    secLen = np.zeros([sectionNum, 2])
    #this func creates lists of chunks for x and y for each section
    #by appending and return where these lists split, a for loop can do operations on
    #all sections
    
    for i in range(sectionNum):
        yTem = list(chunks(heightSections[i], round(len(heightSections[i])/sectionVec[i])))
        secLen[i,0] = len(yTem)
        y.extend(yTem)
        
        xTem = list(chunks(range(0,width), round(width/sectionVec[i])))
        secLen[i,1] = len(xTem)
        x.extend(xTem)

    return x, y, secLen

def medianExtractor(secLen, darkChannelImg, x,y):
    hazeVec = np.zeros(92)
    z = 0
    for cnt, p in enumerate(secLen):
        xSecLen = p[1]
        ySecLen = p[0]
        for i in range(np.int(ySecLen)):

            for j in range(np.int(xSecLen)):

                hazeVec[z] = np.median(darkChannelImg[np.ix_(y[i],x[j])].ravel())
                z = z + 1
                #This function loops over every chunk in each section and calculated the median
                #from the image return a vector of dark channel median scores
                #returns 96 rather than original study 84 as rounds up
                #could fix this if an issue
    
    return hazeVec

def darkChannelBuilder(img):
    #print(img.shape)
    return np.min((img[:,:,0].ravel(), img[:,:,1].ravel(), img[:,:,2].ravel()), axis = 0).reshape(512,512)

def HazePipe(img):
    
    sectionSplit = [2,4,8]
    darkChannel = darkChannelBuilder(img)
    x, y, secLen  = multiSplitter(darkChannel, sectionSplit, len(sectionSplit))
    return medianExtractor(secLen, darkChannel, x, y)
   
def HazeListExtractor(imgList):
    #
    hazeList = []
    for i in range(len(imgList)):
        hazeList.append(HazePipe(imgList[i]))
    return hazeList



def contFeVe(img):
    #extracts 171 dim contrast vector
    
    #convert color space to HSV
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    #extract saturation colorspace
    sat = img[:,:,1].ravel()
    
    #initalize percentile vector
    percenttileList = np.zeros([20])
    n = 0
    #create percentile vector
    for i in range(5, 105, 5):
        percenttileList[n] = np.percentile(sat, i)
        n = n + 1
    
    #convert all 0s to 1s to avoid infs during divison
    percenttileList[percenttileList == 0] = 1
    
    #initalize output vector
    satM = np.ones(171)
    
    #divde all percentiles by those below it
    n = 0
    for i in range(len(percenttileList)):
    
        for j in range(i-1):
            satM[n] = percenttileList[i] / percenttileList[j]
            if satM[n] == np.inf:
                print('nans about', n)
            n = n + 1
    return satM
   
#####Functions to test success
        
def skyTestrWHist(mixedPaths, carryOnLength):
    #Presentation function that pastes text on its classifcation of image 
    #based on sky
    
    #input is a list of image filenames and number of images to classify
    
    #For loop runs through sky model for each image
    for z in range(carryOnLength):
        
        #Read in input image
        
        test = cv2.imread(mixedPaths[(z)])
        
        #Resize it to normalize values
        test = cv2.resize(test, (512, 512))
        
        #Make copy that can be printed later
        testcopy = test
        
        #Get ranges and 15x15 cubes to test
        testCubes, vekH, vekW = cubeifiyWvek(test, 15)
        
        #Extract feature vectors from each 15x15 cube
        testCubesEtrctd = HSVnSiftExtract(testCubes)
        
        #Use RF to predict for each cube
        predza = rf.predict_proba(testCubesEtrctd)
        predz = rf.predict(testCubesEtrctd)
        
        #Only look at those with probability over 70%
        predza = (predza > 0.7) * 1
        counta = 0


        #Create 2 dim matrix same shape as input image
        rfMarker = np.zeros(test.shape[0:2], np.uint8)

        #Use ranges to mark each block classifed as 70% prob sky to 1, the rest to 0
        #Used as a logical vector to only leave sky parts on input image
        for i in range(len(vekH)-1):
            for j in range(len(vekW)-1):
                if predza[counta,1] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 1
                if predza[counta,0] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 0
                    
                counta = counta + 1 
        counta = 0
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
        
        #Build feature vector from just sky
        aa = histoExtraktor(testcopy, rfMarker)
        #Use FV with RF to return 1 or 0
        text = rfHist.predict(aa)
        
        #Use the one or zero returned from RF to choose output text
        if text == 1:
            printText = 'D/C Sky'
        if text == 0:
            printText = 'L/S Sky'
            
        #selects font
        font = cv2.FONT_HERSHEY_SIMPLEX
        
        #Puts text on image
        cv2.putText(testcopy,printText,(10,500), font, 2,(255,255,255),2,cv2.LINE_AA)
        
        #Display image
        cv2.imshow('na', testcopy)
        
        #Pause to view before moving to next image
        time.sleep(3)
        cv2.destroyAllWindows()
        
def skyTestrWShowSky(mixedPaths, carryOnLength):
    for z in range(carryOnLength):
        
      #Read in input image
        
        test = cv2.imread(mixedPaths[(z)])
        
        #Resize it to normalize values
        test = cv2.resize(test, (512, 512))
        
        #Make copy that can be printed later
        testcopy = test
        
        #Get ranges and 15x15 cubes to test
        testCubes, vekH, vekW = cubeifiyWvek(test, 15)
        
        #Extract feature vectors from each 15x15 cube
        testCubesEtrctd = HSVnSiftExtract(testCubes)
        
        #Use RF to predict for each cube
        predza = rf.predict_proba(testCubesEtrctd)
        predz = rf.predict(testCubesEtrctd)
        
        #Only look at those with probability over 70%
        predza = (predza > 0.7) * 1
        counta = 0



        #Create 2 dim matrix same shape as input image
        rfMarker = np.zeros(test.shape[0:2], np.uint8)

        #Use ranges to mark each block classifed as 70% prob sky to 1, the rest to 0
        #Used as a logical vector to only leave sky parts on input image
        
        for i in range(len(vekH)-1):
            for j in range(len(vekW)-1):
                if predza[counta,1] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 1
                if predza[counta,0] == 1:
                    rfMarker[np.ix_(vekW[j], vekH[i])] = 0
                    
                counta = counta + 1 
        counta = 0
        
        #erode and dilation gets rid of smaller areas catorigrzed sky
        #as usually these are in error, due to the majority of sky images
        #having the sky more or less together
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
        rfMarker = cv2.erode(rfMarker, kernel, iterations = 1)
        rfMarker = cv2.dilate(rfMarker, kernel, iterations = 2)
        
        #Flips the 1s and 0s to ensure the sky is 0s
        rfMarker = rfMarker == 0

        #Multiply the input image by the marker matrix, which due to it being
        #a binary image of 1s and 0s, where 0s represnet the area classifed
        #as sky, turns the sky section black.
        test = test * rfMarker[:,:,np.newaxis]
        
        #Display image
        cv2.imshow('na', test)
        
        #Pause to view before moving to next image
        time.sleep(3)
        cv2.destroyAllWindows()



In [3]:
#Loads all the training data built from flickr images


#open  the sky/nonsky training patchs
skyPatchs = pickle.load(open("patches.sky", "rb"))
nonskyPatchs = pickle.load(open("patches.nonsky", "rb"))

#open the sunny/cloudy training patches
SunnyPatchs = pickle.load(open("patches.sunnySky", "rb"))
CloudyPatchs = pickle.load(open("patches.cloudySky", "rb"))

#build file lists of training examples
CloudPaths= glob.glob('./Data/cloudy/*.jpg')
SunPaths= glob.glob('./Data/sunny/*.jpg')
mixedPaths = list(itertools.chain.from_iterable(zip(CloudPaths, SunPaths)))
nightPaths = glob.glob('./NightImages/*.jpg')
snowPaths =  glob.glob('./snowImages/*.jpg')

#Possible expansions
#indoorPaths =  glob.glob('./indoor_images/indoorfullset/*.jpg')
#caliPaths =  glob.glob('./calipicz/*.jpg')

#Final test set, contains all image types
UltimateMixedPaths = list(itertools.chain.from_iterable(zip(mixedPaths, snowPaths, nightPaths)))



In [7]:
#Turn all patches into 15 by 15 cubes
skysqrs = cubeifiyPatchList(skyPatchs, 15)
nonskysqrs = cubeifiyPatchList(nonskyPatchs, 15)


#Turn them cubes into feature vectors with SIFT descriptors and mean HSV levels
skysqrs = HSVnSiftExtract(skysqrs)
nonskysqrs = HSVnSiftExtract(nonskysqrs)

X, y = randomForestPrep(skysqrs, nonskysqrs)

#Build and train the sky/nonsky classifer

rf = en.RandomForestClassifier(n_estimators=1000)

rf.fit(X,y)

rf.score(X,y)

# returns 0.99

###Turns input sky/non sky images in a list of all the 16x16 blocks within
#them.


patchSun = SkytoHistList(SunnyPatchs)

patchCloud = SkytoHistList(CloudyPatchs)

#Builds a feature vector for each 16 x 16 square in the training data

X, y = HistToRF(patchSun, patchCloud)


rfHist = en.RandomForestClassifier(n_estimators=1000)

rfHist.fit(X,y)

rfHist.score(X,y)


1
2
3
4
5
6
7
8
9
10


1.0

In [5]:
#load all previous models
from sklearn.externals import joblib

rfSnow = joblib.load('./FinalModelFolder/rfSnow.pkl')

#rf is the sky classifer Random Forest

#rf = joblib.load('./FinalModelFolder/rfSky.pkl')

rfMain = joblib.load('./FinalModelFolder/rfMain.pkl')

rfHist = joblib.load('./FinalModelFolder/rfHist.pkl')

rfNight = joblib.load('./FinalModelFolder/rfNight.pkl')

In [8]:
#Presentation box

skyTestrWShowSky(mixedPaths,15)

skyTestrWHist(mixedPaths,15)

fullPipelineTestr(UltimateMixedPaths[5000:], 20)

