# Potato analysis

We start by loading all the necessary libraries.

In [1]:
import scipy.io
from PIL import Image
from matplotlib import cm
import numpy as np
import cv2
from scipy.stats import pearsonr
from scipy.spatial.distance import cosine,correlation
import pandas as pd
from itertools import combinations
from imutils import paths
from tqdm import tqdm

Now, we define a function that given the path to a mat file returns 9 cropped images and their associated names. We first define a function to sort the contours of a given image. 

In [2]:
def sort_contours(cnts):
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    
    ys = [x[0] for x in boundingBoxes]
    xs = [x[1] for x in boundingBoxes]
    
    sortedIndicesTemp = []
    for i in range(0,9):
        x = np.argmin(xs)
        sortedIndicesTemp.append(x)
        xs[x] = 1000
    
    sortedIndices = []
    tempYs = [ys[sortedIndicesTemp[0]],ys[sortedIndicesTemp[1]],ys[sortedIndicesTemp[2]]]
    for i in range(0,3):
        y = np.argmin(tempYs)
        sortedIndices.append(sortedIndicesTemp[y])
        tempYs[y]=1000
    tempYs = [ys[sortedIndicesTemp[3]],ys[sortedIndicesTemp[4]],ys[sortedIndicesTemp[5]]]
    for i in range(0,3):
        y = np.argmin(tempYs)
        sortedIndices.append(sortedIndicesTemp[3+y])
        tempYs[y]=1000
    tempYs = [ys[sortedIndicesTemp[6]],ys[sortedIndicesTemp[7]],ys[sortedIndicesTemp[8]]]
    for i in range(0,3):
        y = np.argmin(tempYs)
        sortedIndices.append(sortedIndicesTemp[6+y])
        tempYs[y]=1000
    
    cntsFinal = []
    for i in sortedIndices:
        cntsFinal.append(cnts[i])

#     (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
#         key=lambda b:b[1][i], reverse=reverse))
    # return the list of sorted contours and bounding boxes
    return cntsFinal,[cv2.boundingRect(c) for c in cntsFinal]

The following function is in charge of loading the image and cropping it. 

In [3]:
def generateNames(imagePath):
    name = imagePath[imagePath.rfind('V')+1:imagePath.rfind('-')]
    firstVariety = int(name[0:3])
    firstPotato = int(name[3])
    lastVariety = int(name[4:7])
    potatos = []
    for i in range(firstVariety,firstVariety+9):
        if(firstVariety<100):
            potatos.append("Variety 0"+str(firstVariety)+". Potato " + str(firstPotato))
        else:
            potatos.append("Variety "+str(firstVariety)+". Potato " + str(firstPotato))
        if(firstPotato==2):
            firstPotato = 1
            firstVariety += 1
        else:
            firstPotato = 2
    
#     sortedPotatos = [potatos[6],potatos[3],potatos[0],potatos[7],potatos[4],potatos[1],
#                      potatos[8],potatos[5],potatos[2]]
    return potatos

In [4]:
generateNames('data/V10821122-nhs.mat')

['Variety 108. Potato 2',
 'Variety 109. Potato 1',
 'Variety 109. Potato 2',
 'Variety 110. Potato 1',
 'Variety 110. Potato 2',
 'Variety 111. Potato 1',
 'Variety 111. Potato 2',
 'Variety 112. Potato 1',
 'Variety 112. Potato 2']

We define a function to clean the image.

In [5]:
def black(x):
    if(x<0.1):
        return 0.0
    else:
        return x

myfunc_vec = np.vectorize(black)

In [6]:
def cropPotatos(imagePath):
    names = generateNames(imagePath)
    mat = scipy.io.loadmat(imagePath)
    myarray = mat['hsImage'][:,:,75]
    my_array2 = myfunc_vec(myarray)
    cnts,_ = cv2.findContours(np.uint8(my_array2*255).copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cntsValid = []
    for c in cnts:
        if cv2.contourArea(c) > 100:
            cntsValid.append(c)
    cs,b=sort_contours(cntsValid)
    crops = []
    for c in cs:
        (x, y, w, h) = cv2.boundingRect(c)
        roi = mat['hsImage'][y:y + h, x:x + w]
        crops.append(roi)
    return crops,names

In [7]:
crops,names=cropPotatos('data/V10821122-nhs.mat')

The following definition generates the histograms associated with each potato.

In [7]:
def generateHistograms(image,channels=(32,170),bins=5,annulus=3):
    """
    Generate histograms associated to a given image
    
    Parameters
    ---------------
    image    : array of shape (width,height,channels)
               A cropped image with all the channels.
    channels : pair of integers
               The range of channels that will be taken into account.                
    bins     : int
               The number of bins that will be used for the histogram.
    annulus  : int
               The number of annulus that will be extracted for computing the histograms. 
               
    Returns
    ----------
    histograms : narray
                 Array of size (bins*(channels[1]-channels[0]),annulus) that stores the 
                 histograms for each annulus of each channel.
    
    """
    shape = image.shape[0:2]
    center = (int(shape[1]/2),int(shape[0]/2))
    minChannel = channels[0]
    maxChannel = channels[1]
    
    histograms = []
    
    
    axis0 = (int(shape[1]/2/annulus))
    axis1 = (int(shape[0]/2/annulus))
    for i in range(1,annulus+1):
        histogram = []
        mask = np.zeros(shape, dtype="uint8")
        cv2.ellipse(mask, center, (axis0*i,axis1*i),0,0,360,(255,255,255),-1)
        cv2.ellipse(mask, center, (axis0*(i-1),axis1*(i-1)),0,0,360,(255,255,255),-1)
        
        for j in range(minChannel,maxChannel):
            roi = image[:,:,j]
            maxValue = np.max(roi)
            masked = cv2.bitwise_and(roi, roi, mask=mask)
            hist = np.histogram(masked.flatten(),bins=bins,range=(0.01,maxValue))[0]
            if(hist.sum()==0):
                print(i,j)
                print(hist)
            histogram.append(hist/hist.sum())
        
        histograms.append(np.array(histogram).flatten())
    
    return np.array(histograms)

Finally, we generate a dataframe with the results.

In [9]:
def analyseImage(imagePath,channels=(32,170),bins=5,annulus=3,method='correlation'):
    crops,names=cropPotatos(imagePath)
    results = []
    for crop,name in zip(crops,names):
        combs = list(combinations(range(0,annulus),2))
        result = []
        result.append(name)
        hists = generateHistograms(crop)
        for comb in combs:
            if(method=='pearsonr'):
                result.append(pearsonr(hists[comb[0]],hists[comb[1]])[0])
            if(method=='cosine'):
                result.append(1-cosine(hists[comb[0]],hists[comb[1]]))
            if(method=='correlation'):
                result.append(1-correlation(hists[comb[0]],hists[comb[1]]))
        results.append(result)
    
    df = pd.DataFrame(data=np.array(results),columns=['name']+list(combinations(range(0,annulus),2)))
    return df

In [23]:
#crops,names=cropPotatos('data/V10821122-nhs.mat')
pd.DataFrame(np.array([np.append([name],generateHistograms(crop,annulus=1)[0]) for name,crop in zip(names,crops)]))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,681,682,683,684,685,686,687,688,689,690
0,Variety 108. Potato 2,0.0602409638554216,0.0448304847296161,0.072289156626506,0.5553376295881199,0.2673017652003362,0.061030235162374,0.0433930571108622,0.0655095184770436,0.5198768197088466,...,0.8706968933669186,0.1116708648194794,0.0095158130422614,0.0050377833753148,0.0030786453960257,0.8970053176602295,0.0859221942345368,0.008956059333893,0.005317660229499,0.0027987685418415
1,Variety 109. Potato 1,0.0544811320754717,0.0474056603773584,0.0766509433962264,0.4959905660377358,0.3254716981132075,0.0544811320754717,0.0476415094339622,0.0674528301886792,0.4962264150943396,...,0.0579778458637756,0.6938486919632335,0.2071647419278812,0.031110063634221,0.0098986566108885,0.0655196794720716,0.7343860476078247,0.1569644119726608,0.0322884751355173,0.0108413858119255
2,Variety 109. Potato 2,0.0455850075975012,0.0415330069221678,0.0656761776126962,0.5205132534188756,0.326692554448759,0.0479486746581124,0.037987506331251,0.0665203444200574,0.5363835893972649,...,0.5501181235234559,0.4063449206884914,0.0231184610192372,0.0172122848464394,0.0032062099223759,0.5859915611814346,0.370126582278481,0.0217721518987341,0.0177215189873417,0.0043881856540084
3,Variety 110. Potato 1,0.0237529691211401,0.0300151155258043,0.0857266249190239,0.6326927229540056,0.2278125674800259,0.025253615368012,0.0300021584286639,0.0846104036261601,0.6477444420461904,...,0.618656877564241,0.371841934787303,0.0049665299071474,0.0012956164975167,0.0032390412437918,0.6938687392055267,0.2983592400690846,0.0032383419689119,0.0012953367875647,0.0032383419689119
4,Variety 110. Potato 2,0.0356569225827176,0.0443302280758111,0.0575008030838419,0.4513331191776421,0.4111789270799871,0.0379055573401863,0.0401541920976549,0.0594282043045293,0.4182460648891744,...,0.0199164792804368,0.246386122711211,0.6052039832958561,0.1223899775136524,0.0061034371988435,0.0154192097654995,0.19948602634115,0.6045615162222936,0.1680051397365885,0.0125281079344683
5,Variety 111. Potato 1,0.0676149247577819,0.0375180375180375,0.0952380952380952,0.5609152752009895,0.2387136672850958,0.0692070030895983,0.0356333676622039,0.0982492276004119,0.568692070030896,...,0.3524083985179086,0.6352408398517908,0.0078221490325236,0.0037052284890901,0.0008233841086867,0.416838205022643,0.5720461095100865,0.0065870728694936,0.0039110745162618,0.000617538081515
6,Variety 111. Potato 2,0.0342505291514335,0.0386761593226861,0.0634981720223205,0.594766211275736,0.2688089282278237,0.0353982300884955,0.036937283570604,0.0550211619853789,0.5821469796075414,...,0.17602924201616,0.8026163909195845,0.0202000769526741,0.0009619084263178,0.0001923816852635,0.1881492881877645,0.7929973066564063,0.017121969988457,0.0013466717968449,0.0003847633705271
7,Variety 112. Potato 1,0.0717948717948717,0.0311355311355311,0.0609890109890109,0.5492673992673993,0.2868131868131868,0.0723708318065225,0.0300476364968853,0.0676071821179919,0.5771344814950531,...,0.1175932699341624,0.7869422092172641,0.0799195318215069,0.0128017556693489,0.0027432333577176,0.1110501280643981,0.7768020490303695,0.093304061470911,0.0153677277716794,0.0034760336626417
8,Variety 112. Potato 2,0.0275941362460477,0.0465651049152055,0.0928427709111813,0.6096579476861167,0.2233400402414486,0.0267318194883587,0.0431158378844495,0.0617993676343777,0.5659672319632079,...,0.5561943087093992,0.4139120436907157,0.0189709686691578,0.0068985340615119,0.0040241448692152,0.6507617131359587,0.3193446392641563,0.0192584075883874,0.0074734118999712,0.0031618281115263


In [10]:
analyseImage('data/V10821122-nhs.mat')

Unnamed: 0,name,"(0, 1)","(0, 2)","(1, 2)"
0,Variety 108. Potato 2,0.9069979429180997,0.8231309420171073,0.9734151760236608
1,Variety 109. Potato 1,0.8836220413631974,0.7689085537478141,0.9707577470033868
2,Variety 109. Potato 2,0.9264951698337456,0.8148584089484205,0.960026293516706
3,Variety 110. Potato 1,0.9477032562902344,0.7841774288406529,0.9258706783922216
4,Variety 110. Potato 2,0.8685727562813668,0.7201925726998986,0.9505636354259775
5,Variety 111. Potato 1,0.8573474918123738,0.7907602325522342,0.9873085240521076
6,Variety 111. Potato 2,0.892004187141499,0.7415033297408817,0.9549798766796472
7,Variety 112. Potato 1,0.8762254091571589,0.7060523858222708,0.950308905623025
8,Variety 112. Potato 2,0.9406860542339426,0.856124013348529,0.973847017123348


We can now analyse all the files

In [25]:
files = list(paths.list_files('data',validExts='.mat'))

In [12]:
files.remove('data/V14711492-nhs.mat')

In [13]:
len(files)

22

In [None]:
dfs = []
for file in tqdm(files):
    dfs.append(analyseImage(file,bins=20,annulus=2))

result = pd.concat(dfs)

In [27]:
dfs = []
for file in tqdm(files):
    crops,names=cropPotatos(file)
    df=pd.DataFrame(np.array([np.append([name],generateHistograms(crop,annulus=1)[0]) for name,crop in zip(names,crops)]))
    dfs.append(df)

result = pd.concat(dfs)

100%|███████████████████████████████████████████████████████████████████████████████████| 23/23 [00:33<00:00,  1.46s/it]


In [28]:
result.to_csv('histograms.csv',index=None)