In [1]:
import os
import re
import cv2
import copy
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Reading TestInfo for each TestResult
TestInfo.csv store all the necessary information after the clustering<br/>
is done on the specific Region and Frequencies.

In [2]:
imgFreq = {}
#,'CPAC','EPAC','IO','SHEM','WPAC'
#,
freqList = ['19H','19V','19','22V','37V','37H','37','91H','91V','91','150H','183_1H','183_3H','183_7H']
regList = ['ATL']
funList = ['agglo',"kmeans","fuzzykmeans"]
for reg in regList:
    imgFreq[reg] = {}
    for freq in freqList:
        imgFreq[reg][freq] = {}
        for fun in funList:
            path = "..\\..\\AllFrequencies\\"+reg+"\\"+freq+"\\"+fun+"\\"
            imgFreq[reg][freq][fun] = {}
            ls = os.listdir(path)
            for test in ls:
                imgFreq[reg][freq][fun][test] = pd.read_csv(path+test+"\\testInfo.csv")

### Test Result Class
Function to Paste Images into Each Cluster<br />
Function to Remove those Pastes Images<br />
Function to Create Composite Images ( Mean of all images in a unique cluster )<br />
Function to Create Histograms of Cluster Label and Intensity Number<br />

In [33]:
def MakeDir(path):
    if os.path.isdir(path) == False:
        os.mkdir(path)
        return True
    return False

class TestResults():
    def __init__(self,df,path,minSilhoutteVal = -1):
        self.mDf = copy.deepcopy(df[df.SilhouetteVal > minSilhoutteVal])
        self.mMinSilVal = minSilhoutteVal
        self.mTestPath = path
        self.mPath = path + "SilValue_"+ str(self.mMinSilVal) + "\\"
    
    # Coping Images from MyCreatedData to The cluster where it belongs for this testCase
    def Paste_Images(self):
        cluster_labels = self.mDf.ClusterLabel.unique()
        for cL in cluster_labels:
            MakeDir(self.mPath+str(cL))
        
        for i,r in self.mDf.iterrows():
            cL_path = self.mPath+str(r.ClusterLabel)+"\\"
            imgName = r.FileName.split("//")[8]
            
            # Original Image File Path
            actualFile = cL_path+imgName
            
            # Copied at the cluster label where it belongs
            shutil.copy( r.FileName,cL_path )
            
            # Renaming the copied file : This helps us to visually understand which intensity belongs to a particular image
            # Old name : imgName
            # New name : imgIntensity_imgName
            t_no= str(r.T_No).replace(".","-")
            renameFile = cL_path+t_no+"_"+imgName[:len(imgName)-4]+"_"+str(r.SilhouetteVal)+".png"
            
            try:
                os.rename(actualFile,renameFile)
            except WindowsError:
                os.remove(renameFile)
                os.rename(actualFile,renameFile)
            
    def Create_Histogram_Images(self):
        cluster_labels = self.mDf.ClusterLabel.unique()
        t_no_labels = self.mDf.T_No.unique()
        
        # Creating Cluster_Label Folder
        cL_path = path+"Cluster_Label_"+ str(self.mMinSilVal)
        if not MakeDir(cL_path):
            return
        cL_path+="\\"

        # Creating T_No Folder 
        tno_path = path+"T_No_Label_" +str(self.mMinSilVal)
        if not MakeDir(tno_path):
            return
        tno_path+="\\"
        
        # Each graph depicts how each cluster is distributed amoung a particular intensity
        for t_no in t_no_labels:
            x = copy.deepcopy( self.mDf[ self.mDf.T_No == t_no ] )  
            plt.xlim(min(cluster_labels),max(cluster_labels))
            plt.xlabel("Cluster_Label - per each T_No")
            plt.ylabel("No_of_Images")
            plt.hist(x.ClusterLabel)
            plt.savefig(tno_path+str(t_no)+"_"+str(len(x))+".png")
            plt.close()

        # Each graph depicts how the intensity is distributed among a particular cluster
        for cL in cluster_labels:
            x = copy.deepcopy( self.mDf[ self.mDf.ClusterLabel == cL ] )
            plt.xlim(0,9)
            plt.xlabel("T_No - per each cluster") # Text for X-Axis
            plt.ylabel("No_of_Images")
            plt.hist(x.T_No)
            plt.savefig(cL_path+str(cL)+"_"+str(len(x))+".png")
            plt.close()
    
    
    def Composite_Images(self,w=360):
        folderName = "Composite_Images_" + str(self.mMinSilVal)
        if not MakeDir(self.mPath+folderName):
            return 
        
        cluster_labels = self.mDf.ClusterLabel.unique()
        for cL in cluster_labels:
            
            cluster_df = copy.deepcopy( self.mDf[ self.mDf.ClusterLabel == cL ] )
            
            comp_img = np.zeros((w,w,3))
            for i,r in cluster_df.iterrows():
                
                # Read ImageFiles and Not Considering Alpha Value just BGR is considered
                img_cv = 0
                try:
                    img_cv = cv2.imread(r.FileName) 
                except:
                    print("Error in reading Image: ",r.FileName)
                
                try:
                    img_cv = cv2.resize(img_cv, (w,w) )
                except:
                    print("Resize Image Error: ",r.FileName)
                
                comp_img += img_cv
            comp_img /= len(cluster_df)

            cv2.imwrite(self.mPath+folderName+"\\"+str(cL)+"_"+str(len(cluster_df))+".png",comp_img.astype(np.uint8))
    
    def Silhouette_Graph(self,test):
        cluster_labels = self.mDf.ClusterLabel.unique()
        plt.xlabel("Silhouette Value") # Text for X-Axis
        plt.ylabel("No of Images")
        plt.hist(self.mDf.SilhouetteVal)
        plt.savefig(test+".png")
        plt.close()
        
        
        '''
        # Each graph depicts how the silhouette is distributed among a particular cluster
        for cL in cluster_labels:
            x = copy.deepcopy( self.mDf[ self.mDf.ClusterLabel == cL ] )
            plt.xlabel("Silhouette Value") # Text for X-Axis
            plt.ylabel("No of Images")
            plt.hist(x.SilhouetteVal)
            plt.savefig(str(cL)+".png")
            plt.close()
        '''
        
    def Move_Cluster_Folder(self):
        cluster_labels = self.mDf.ClusterLabel.unique()
        newFolder = self.mPath + "SilValue_" + str(self.mMinSilVal)
        for root, dirs, files in os.walk(self.mPath):
            if "Cluster_Label_"+str(self.mMinSilVal) in root or "Composite_Images_"+str(self.mMinSilVal) in root or "T_No_Label_"+ str(self.mMinSilVal) in root:
                try:
                    shutil.move(root, newFolder)        
                except:
                    print("Already Exists: "+root)
                    
    def Remove_Pasted_Images(self):
        for root, dirs, files in os.walk(self.mPath):
            if "Cluster_Label" in root or "Composite_Images" in root or "T_No_Label" in root:
                shutil.rmtree(os.path.join(root))   
                
            for name in files:
                if "SSMIS" in name and ".png" in name:
                    #print(os.path.join(root, name))
                    os.remove(os.path.join(root, name))

### Analyzing the Results

In [34]:
reg = 'ATL'
freq = '91'
silValues = [-1.0,0.0,0.1]
#fun = 'agglo'
#test = 'Test_14'
for fun in funList:
    for test,df in imgFreq[reg][freq][fun].items():
        
        if test == "Test_10":
            path = "..\\..\\AllFrequencies\\"+reg+"\\"+freq+"\\"+fun+"\\"+test+"\\"
            for val in silValues:
                tR = TestResults(df,path,val)
                tR.Silhouette_Graph(fun+test)
                break
            #tR.Paste_Images()
            #tR.Move_Cluster_Folder()
            #tR.Composite_Images()
            #tR.Create_Histogram_Images()
            #tR.Remove_Pasted_Images()

In [None]:
'''            
for reg in regList:
    for freq in freqList:
        for fun in funList:
            for test,df in imgFreq[reg][freq][fun].items():   
                path = "..\\..\\AllFrequencies\\"+reg+"\\"+freq+"\\"+fun+"\\"+test+"\\"
                tR = TestResults(df,path)
                tR.Composite_Images()
                tR.Create_Histogram_Images()
                print(path)
                #tR.Remove_Pasted_Images()
'''

'''
1470 for 0.0
937 for 0.0 in CL==1

875 for 0.1
793 for 0.1 in CL==1
'''
reg = 'ATL'
freq = '91'
fun = 'fuzzykmeans'
test = 'Test_12'
df = imgFreq[reg][freq][fun][test]
path = "..\\..\\AllFrequencies\\"+reg+"\\"+freq+"\\"+fun+"\\"+test+"\\"
tR = TestResults(df,path)
tR.Remove_Pasted_Images()

#apple = copy.deepcopy( df[df.SilhouetteVal > 0.1]  )
#apple
#tR = TestResults(df,path,)
#tR.Composite_Images()
#tR.Create_Histogram_Images()


### Extra Code

#cv2.imread("..//..//MyCreatedData//5//ATL//29//F16//19//20051123T113200_SSMIS_F16.png")
'''

#RemovePastedImagesFrom(imgFreq,'ATL','19','agglo','Test_0')
for reg in regList:
    for freq in freqList:
        for fun in funList:
            for test,df in imgFreq[reg][freq][fun].items():
                RemoveImages(reg,freq,fun,test)
                break
            break
        break
    break

In [None]:
for reg in regList:
    for freq in freqList:
        for fun in funList:
            path = "..//..//AllFrequencies//"+reg+"//"+freq+"//"+fun+"//Test_14//"
            PasteImages( imgFreq[reg][freq][fun][0], path )
            
'''reg = 'ATL'
freq = '19H'
fun = 'kmeans'
'''

In [None]:
temp = imgFreq["ATL"]["91"]["agglo"][0][ imgFreq["ATL"]["91"]["agglo"][0].ClusterLabel == 2 ]
temp[ temp.SilhouetteVal > 0.1 ]

In [None]:
imgFreq[reg][freq][fun][0]

In [None]:
#for reg in regList:
    #for freq in freqList:

reg = 'ATL'
freq = '19'
path = "..//..//AllFrequencies//"+reg+"//"+freq+"//"+"agglo//Test_14//"
PasteImages( imgFreq[reg][freq]["agglo"][0], path )


Creating a Zip file of all the specific folders

In [None]:
from zipfile import ZipFile
reg = 'ATL'
freq = '91'
zipObj = ZipFile(reg+"_"+freq+"_agglo_test_14.zip", 'w')
path = "..//..//AllFrequencies//"+reg+"//"+freq+"//"+"agglo//Test_14//"
for folderName, subfolders, filenames in os.walk(path):
    for filename in filenames:
        #create complete filepath of file in directory
        filePath = os.path.join(folderName, filename)
        print(filePath)
        zipObj.write(filePath)
zipObj.close()

In [None]:
from zipfile import ZipFile

for reg in regList:
    zipObj = ZipFile(reg+'agglo_test_14.zip', 'w')
    for freq in freqList:
        path = "..//..//AllFrequencies//"+reg+"//"+freq+"//"+"agglo//Test_14//"
        for folderName, subfolders, filenames in os.walk(path):
            for filename in filenames:
                #create complete filepath of file in directory
                filePath = os.path.join(folderName, filename)
                zipObj.write(filePath)
    zipObj.close()

Code Below aims at finding common images among multiple frequencies

In [None]:
def Test( df ):
    df = copy.deepcopy(df[df.SilhouetteVal > 0])
    imgCol = []
    for i,r in df.iterrows():
        imgCol.append( r.FileName.split("//")[9]  )
    df['ImgName'] = imgCol 
    return df

def CompareTest( df1, df2 ):
    df = df1.merge(df2, left_on='ImgName', right_on='ImgName')
    
    return pd.DataFrame({"ImgName":df.ImgName})

In [None]:
finalDf = {}
for reg in regList:
    finalDf[reg] = Test( imgFreq[reg][freqList[0]]['agglo'][0] )
    for i in range(1,len(freqList)):
        finalDf[reg] = CompareTest( finalDf[reg], Test(imgFreq[reg][freqList[i]]['agglo'][0]) )

In [None]:
finalDf['WPAC']

In [None]:
df1 = Test( imgFreq['ATL']['19H']['kmeans'][9] )
df2 = Test( imgFreq['ATL']['91H']['kmeans'][9] )
df3 = Test( imgFreq['ATL']['150H']['kmeans'][9] )
df4 = Test( imgFreq['ATL']['183_1H']['kmeans'][9] )

df_imgName = CompareTest(df1,df2)
df_imgName = CompareTest(df_imgName, df3)
df_imgName = CompareTest(df_imgName, df4)

df1 = df1.merge( df_imgName, left_on='ImgName', right_on='ImgName' )
df2 = df2.merge( df_imgName, left_on='ImgName', right_on='ImgName' )
df3 = df3.merge( df_imgName, left_on='ImgName', right_on='ImgName' )
df4 = df4.merge( df_imgName, left_on='ImgName', right_on='ImgName' )

In [None]:
def LabelAndSill(df,silVal, maxLabel):
    dr= df[ df.T_No == silVal ]
    print("-------------------------")
    for i in range(maxLabel):
        print(len( dr[ dr.ClusterLabel == i] ) )
    ''' 
    j=0
    while j <9.5:
        dr=  df[ df.T_No == j ]
        maxLab = -1
        maxVal = -1
        for i in range(maxLabel):
            if len(dr[dr.ClusterLabel == i]) > maxVal:
                maxVal = len(dr[dr.ClusterLabel == i])
                maxLab = i

        print(j,maxVal,maxLab )
        #dr[dr.ClusterLabel == maxLab]
        j+=0.5
    '''

In [None]:
reg = 'ATL'
freq = '183_1H'
fun = 'kmeans'
j = 9
path = "..//..//AllFrequencies//"+reg+"//"+freq+"//"+fun+"//Test_"+str(j)+"//"
for i,r in df4.iterrows():
    tNo_path = path+str(r.ClusterLabel)+"//"+str(r.T_No)
    if os.path.isdir(tNo_path) == False:
        os.mkdir(tNo_path)
    shutil.copy( r.FileName,tNo_path )
    


In [None]:
from matplotlib import pyplot as plt
import matplotlib.image as mpimage

w=10
h=10
fig=plt.figure(figsize=(8, 8))
columns = 3
rows = 7
for i in range(1, columns*rows +1):
    img = np.random.randint(10, size=(h,w))
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
plt.show()

In [None]:
reg = 'ATL'
freq = '19H'
fun = 'kmeans'
test = 9
df = imgFreq[reg][freq][fun][test]
df = df[ df.SilhouetteVal>0.0 ]


'''
imgCol = []
for i,r in df.iterrows():
    imgCol.append( r.FileName.split("//")[9]  )
df['ImgName'] = imgCol
'''



In [None]:
dr = df[ df.T_No == 1.0 ]
dr[ dr.ClusterLabel == 0 ].FileName.iloc[3]

In [None]:
'''
def Ans(df):
    
    uniqueId = sorted(list(df.ClusterLabel_x.unique()))
    for label in uniqueId:
        tno[label] = {}
        df1 = df.T_No[df.ClusterLabel == label]
        t1 = list(df1)
        i=0
        while i <9.5:
            tno[label][i] = t1.count(i)
            i+=0.5
    
    newDf = pd.DataFrame(tno)
    print(newDf)


df = df[ df.Sil]

'''


    
    '''imgCol = []
    for i,r in df3.iterrows():
        imgCol.append( r.FileName.split("//")[9]  )
    df3['ImgName'] = imgCol   
    
    if (df.T_No_x == df.T_No_y).all():
        if (df.T_No_x == df.T_No).all():
            print("aplee")
    
            
    
    
    
    return df
    '''    
    
    

    '''
    label1 = {}
    for i,r in t1.iterrows():
        if label1.get(r.ClusterLabel) == None:
            label1[r.ClusterLabel] = []
        label1[r.ClusterLabel].append([r.FileName, r.SilhouetteVal, r.T_No])
    
    label2 = {}
    for i,r in t2.iterrows():
        if label2.get(r.ClusterLabel) == None:
            label2[r.ClusterLabel] = []
        label2[r.ClusterLabel].append([r.FileName, r.SilhouetteVal, r.T_No])
    
    for k1,v1 in label1.items():
        for k2,v2 in label2.items():
            
            #v1 and v2 are the lists of rows for a particular label
            count=0
            for l1 in v1:
                for l2 in v2:
                    if l1[0] == l2[0]:
                        count+=1
            print(k1,k2,count)
    '''

In [None]:
df1 = imgFreq['ATL']['150H']['agglo'][6]
df1 = df1[df1.SilhouetteVal > 0]

In [None]:
newdf1 = df1.T_No[ df1.ClusterLabel == 4]
t1 = list(newdf1)
i=0
while i <9.5:
    print(i, t1.count(i))
    i+=0.5
print(t1)   

In [None]:
newdf1 = df2.T_No[ df2.ClusterLabel == 6 ]
t1 = list(newdf1)
i=0
while i <9.0:
    print(i, t1.count(i))
    i+=0.5

In [None]:
for i in range(11):
    print( len( df1[ (df1['ClusterLabel']==i) ] ) )

for i in range(7):
    print( len( df[ (df['ClusterLabel_x']==i) ] ) )
    
for i in range(7):
    print( len( imgFreq['ATL']['91H']['fuzzykmeans'][4][ (imgFreq['ATL']['91H']['fuzzykmeans'][9]['ClusterLabel']==i) ] ) )