## KMeans Clustering of Hyperspectral Vegetation 
### Binned spectra

---

In [None]:
# -- here are functions that generate a class that memory maps the raw data 
#    cube.  After executing this cell, the syntax is:
#    fname = "[path to data]/foo.raw"
#    cube = read_hyper(fname)

import os
import numpy as np

def read_header(hdrfile, verbose=True):
    """
    Read a Middleton header file.

    Parameters
    ----------
    hdrfile : str
        Name of header file.
    verbose : bool, optional
        If True, alert the user.

    Returns
    -------
    dict : dict
        A dictionary continaing the number of rows, columns, and wavelengths
        as well as an array of band centers.
    """

    # -- alert
    if verbose:
        print("reading and parsing {0}...".format(hdrfile))

    # -- open the file and read in the records
    recs = [rec for rec in open(hdrfile)]

    # -- parse for samples, lines, bands, and the start of the wavelengths
    for irec, rec in enumerate(recs):
        if 'samples' in rec:
            samples = int(rec.split("=")[1])
        elif 'lines' in rec:
            lines = int(rec.split("=")[1])
        elif 'bands' in rec:
            bands = int(rec.split("=")[1])
        elif "Wavelength" in rec:
            w0ind = irec+1

    # -- parse for the wavelengths
    waves = np.array([float(rec.split(",")[0]) for rec in 
                      recs[w0ind:w0ind+bands]])

    # -- return a dictionary
    return {"nrow":samples, "ncol":lines, "nwav":bands, "waves":waves}


def read_raw(rawfile, shape, hyper=False, verbose=True):
    """
    Read a Middleton raw file.

    Parameters
    ----------
    rawfile : str
        The name of the raw file.
    shape : tuple
        The output shape of the data cube (nwav, nrow, ncol).
    hyper : bool, optional
        Set this flag to read a hyperspectral image.
    verbose : bool, optional
        Alert the user.

    Returns
    -------
    memmap : memmap
        A numpy memmap of the datacube.
    """

    # -- alert
    if verbose:
        print("reading {0}...".format(rawfile))

    # -- read either broadband or hyperspectral image
    if hyper:
        return np.memmap(rawfile, np.uint16, mode="r") \
            .reshape(shape[2], shape[0], shape[1])[:, :, ::-1] \
            .transpose(1, 2, 0)
    else:
        return np.memmap(rawfile, np.uint8, mode="r") \
            .reshape(shape[1], shape[2], shape[0])[:, :, ::-1]


def read_hyper(fpath, fname=None, full=True):
    """
    Read a full hyperspectral scan (raw and header file).

    Parameters
    ----------
    fpath : str
        Either the full name+path of the raw file or the path of the raw file.
        If the latter, fname must be supplied.
    fname : str, optional
        The name of the raw file (required if fpath is set to a path).
    full : bool, optional
        If True, output a class containing data and supplementary information.
        If False, output only the data.

    Returns
    -------
    output or memmap : class or memmap
        If full is True, a class containing data plus supplementary 
        information.  If full is False, a memmap array of the data.
    """

    # -- set up the file names
    if fname is not None:
        fpath = os.path.join(fpath, fname)

    # -- read the header
    hdr = read_header(fpath.replace("raw", "hdr"))
    sh  = (hdr["nwav"], hdr["nrow"], hdr["ncol"])

    # -- if desired, only output data cube
    if not full:
        return read_raw(fpath, sh, hyper=True)

    # -- output full structure
    class output():
        def __init__(self, fpath):
            self.filename = fpath
            self.data     = read_raw(fpath, sh, hyper=True)
            self.waves    = hdr["waves"]
            self.nwav     = sh[0]
            self.nrow     = sh[1]
            self.ncol     = sh[2]

    return output(fpath)

In [None]:
def kmeans_test_dictionary(labels, test, k):
    import collections, numpy
    
    unique_test, counts_test = numpy.unique(labels[test[:,0], test[:,1]], return_counts=True)
    counts_test_norm = (counts_test/test.shape[0])*100
    test_dict = dict(zip(unique_test, counts_test))
    test_dict_norm = dict(zip(unique_test, counts_test_norm))
    
    for i in range(0, k):
        if test_dict.get(i) is None: test_dict[i] = 0
        if test_dict_norm.get(i) is None: test_dict_norm[i] = 0
    
    return test_dict, test_dict_norm

In [None]:
def kmeans_test_dataframe(sky_dict,
                          clouds_dict,
                          veg_dict,
                          wtr_dict,
                          blt_dict,
                          windows_dict,
                          rds_dict,
                          cars_dict,
                          mtl_dict
                         ):
    import pandas as pd
    
    pixel_names = ['sky', 'clouds', 'vegetation', 'water', 'built',
                  'windows', 'roads', 'cars', 'metal']
    df_test = pd.DataFrame([sky_dict,
                            clouds_dict,
                            veg_dict,
                            wtr_dict,
                            blt_dict,
                            windows_dict,
                            rds_dict,
                            cars_dict,
                            mtl_dict], index=pixel_names)
    df_test = df_test.transpose()
    
    return df_test

In [None]:
def plot_confusion_matrix(df_test, norm=True):
    import numpy as np
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    cm = np.array([df_test['sky'].values,
                   df_test['clouds'].values,
                   df_test['vegetation'].values,
                   df_test['water'].values,
                   df_test['built'].values,
                   df_test['windows'].values,
                   df_test['roads'].values,
                   df_test['cars'].values,
                   df_test['metal'].values])
    classes = ['sky', 'clouds', 'vegetation', 'water', 'built', 'windows', 'roads', 
              'cars', 'metal']
    
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    if norm:
        title='Normalized Confusion Matrix'
        fmt='.2f'
    else:
        title='Confusion Matrix'
        fmt='d'
    ax.set(xticks=np.arange(cm.shape[1]),
          yticks=np.arange(cm.shape[0]),
          xticklabels=np.arange(0,cm.shape[1]).astype(str), 
          yticklabels=classes,
          title=title,
          ylabel='True Label',
          xlabel='Predicted Label')
    #plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
    #        rotation_mode="anchor")
    thresh = cm.max()/2
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i,j], fmt),
                   ha="center", va="center",
                   color="white" if cm[i,j] > thresh else "black")
    fig.tight_layout()
    plt.show()

In [None]:
def plot_test_result(df_Test):
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    df_test = df_Test.transpose()
    ax = df_test.plot.bar(rot=0, stacked=True, colormap='tab20b')
                          #color=['tab:blue', 'tab:green', 'tab:gray'])
    plt.xlabel('Actual Class')
    plt.ylabel('%of Test Pixels')
    plt.title('Error in Kmeans Prediction')
    plt.legend(bbox_to_anchor=(1,1), loc=2, borderaxespad=1.0, prop={'size':11})
    plt.show()

In [None]:
def point_from_string(text):
    
    items = text.strip("\n").split(" ")
    rind = int(items[0])
    cind = int(items[1])
    
    return rind, cind

---
### Goal of clustering:
1. Sky
2. Clouds
3. Water
4. Vegetation
5. Buildings (concrete structures)
6. Windows
7. Roads
8. Cars
9. Metal Structures
---


## KMeans on veg_00108 (South Facing @ ~2pm)

In [None]:
fname = "../../../image_files/veg_00108.raw"
cube = read_hyper(fname)

In [None]:
cube_sub = cube.data[:, :, :].astype(float)
print(cube_sub.shape)

In [None]:
# create array of indices for binning

num_of_bins = 20
bin_ind = []

for i in range(0, num_of_bins):
    low_ind = int(i*int(cube_sub.shape[0]/num_of_bins))
    upp_ind = int(low_ind + int(cube_sub.shape[0]/num_of_bins))
    bin_ind.append([low_ind, upp_ind])
bin_ind[-1][-1] = cube_sub.shape[0]

print(bin_ind)

In [None]:
cube_reshaped = cube_sub.transpose(1, 2, 0).reshape((cube_sub.shape[1] * cube_sub.shape[2]), cube_sub.shape[0])
print(cube_reshaped.shape)

In [None]:
cube_standard = (cube_reshaped - cube_reshaped.mean(1, keepdims=True)) / cube_reshaped.std(1, keepdims=True)

In [None]:
cube_norm = (cube_reshaped - cube_reshaped.min()) / (cube_reshaped.max() - cube_reshaped.min())

import matplotlib.pyplot as plt
%matplotlib inline

red_ind = (np.abs(cube.waves - 650.0)).argmin()
green_ind = (np.abs(cube.waves - 550.0)).argmin()
blue_ind = (np.abs(cube.waves - 450.0)).argmin()

cube_reshaped2 = cube_norm.reshape(cube_sub.shape[1], cube_sub.shape[2], cube_sub.shape[0])
cube_scene = cube_reshaped2[:, :, [red_ind, green_ind, blue_ind]]
fig, ax = plt.subplots(figsize=(10,10))
plt.title('veg_00108 RGB Image')
ax.imshow(cube_scene, aspect=0.5)
plt.show()

In [None]:
rgb = cube_reshaped2[:, :, [red_ind, green_ind, blue_ind]].copy()
rgb /= rgb.mean((0, 1), keepdims=True)

fig, ax = plt.subplots(figsize=(10,10))
plt.title('veg_00108 corrected RGB Image')
ax.imshow(rgb, aspect=0.5)
plt.show()

In [None]:
cube_binned = np.zeros(shape=(cube_standard.shape[0], num_of_bins))

for i in range(num_of_bins):
    cube_binned[:, i] = cube_standard[:, bin_ind[i][0]:bin_ind[i][1]].mean(1)

print(cube_binned.shape)

In [None]:
import time
start_time = time.time()

#from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans 

#km = MiniBatchKMeans(n_clusters=9, random_state=2, batch_size=20)
km = KMeans(n_clusters=9, random_state=2)
kmeans = km.fit(cube_binned)

elapsed_time = time.time() - start_time
print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

f, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3, figsize=(12, 12))
plt.suptitle('Standardized Spectra of Kmeans Cluster Centers (veg_00108)')
ax1.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[0,:], color=[0.0,0.33,0.62])
ax2.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[1,:], color=[0.0,0.33,0.62])
ax3.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[2,:], color=[0.0,0.33,0.62])
ax4.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[3,:], color=[0.0,0.33,0.62])
ax5.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[4,:], color=[0.0,0.33,0.62])
ax6.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[5,:], color=[0.0,0.33,0.62])
ax7.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[6,:], color=[0.0,0.33,0.62])
ax8.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[7,:], color=[0.0,0.33,0.62])
ax9.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeans.cluster_centers_[8,:], color=[0.0,0.33,0.62])
plt.show()
#f.savefig("./output/plots/13_kmeans_cluster_centers_spectra_standardized_4_clusters.png")

In [None]:
labels = kmeans.predict(cube_binned)
labels_reshape = labels.reshape(cube_sub.shape[1], cube_sub.shape[2])

In [None]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Paired9
from bokeh.models import (ColorBar, LinearColorMapper)
from bokeh.models import HoverTool

labels_reverse = np.flip(labels_reshape, axis=0)
color_mapper = LinearColorMapper(palette="Paired9", low=labels_reverse.min(), 
                                 high=labels_reverse.max())

source_data = dict(image=[labels_reverse],
                  x=[0],
                  y=[0],
                  dw=[1600],
                  dh=[1600])

hover = HoverTool()
hover.tooltips = [
    ("Label", "@image"),
    ("x", "@x"),
    ("y", "@y"),
]

output_notebook()
imgplt = figure(plot_width=800, plot_height=400, x_range=(0,1600), y_range=(0,1600),
               tools=['pan','tap','box_zoom','wheel_zoom','save','reset'],
               title="Clustered Standardized Spectra (veg_00108)")
imgplt.image(source=source_data, image='image', x='x', y='y', dw='dw', dh='dh',
            color_mapper = color_mapper)
imgplt.add_layout(ColorBar(color_mapper = color_mapper), 'left')
imgplt.tools.append(hover)
show(imgplt)

### read test sample (veg_00108)

In [None]:
# read manually selected coordinates files

#sky coordinates
sky_file = open("../../manual_classified_pixels/1_sky_coordinates_108.txt", "r")
sky_coords = sky_file.readlines()
sky_file.close()
sky_coords = np.array([point_from_string(line) for line in sky_coords])
print("sky:        ", sky_coords.shape)

#clouds coordinates
clouds_file = open("../../manual_classified_pixels/2_clouds_coordinates_108.txt", "r")
clouds_coords = clouds_file.readlines()
clouds_file.close()
clouds_coords = np.array([point_from_string(line) for line in clouds_coords])
print("clouds:     ", clouds_coords.shape)

#vegetation coordinates
veg_file = open("../../manual_classified_pixels/3_vegetation_coordinates_108.txt", "r")
veg_coords = veg_file.readlines()
veg_file.close()
veg_coords = np.array([point_from_string(line) for line in veg_coords])
print("vegetation: ", veg_coords.shape)

#water coordinates
wtr_file = open("../../manual_classified_pixels/4_water_coordinates_108.txt", "r")
wtr_coords = wtr_file.readlines()
wtr_file.close()
wtr_coords = np.array([point_from_string(line) for line in wtr_coords])
print("water:      ", wtr_coords.shape)

#buildings coordinates
blt_file = open("../../manual_classified_pixels/5_buildings_coordinates_108.txt", "r")
blt_coords = blt_file.readlines()
blt_file.close()
blt_coords = np.array([point_from_string(line) for line in blt_coords])
print("buildings:  ", blt_coords.shape)

#windows coordinates
windows_file = open("../../manual_classified_pixels/6_windows_coordinates_108.txt", "r")
windows_coords = windows_file.readlines()
windows_file.close()
windows_coords = np.array([point_from_string(line) for line in windows_coords])
print("windows:    ", windows_coords.shape)

#roads coordinates
rds_file = open("../../manual_classified_pixels/7_roads_coordinates_108.txt", "r")
rds_coords = rds_file.readlines()
rds_file.close()
rds_coords = np.array([point_from_string(line) for line in rds_coords])
print("road:       ", rds_coords.shape)

#cars coordinates
cars_file = open("../../manual_classified_pixels/8_cars_coordinates_108.txt", "r")
cars_coords = cars_file.readlines()
cars_file.close()
cars_coords = np.array([point_from_string(line) for line in cars_coords])
print("cars:       ", cars_coords.shape)

#metal coordinates
mtl_file = open("../../manual_classified_pixels/9_metal_coordinates_108.txt", "r")
mtl_coords = mtl_file.readlines()
mtl_file.close()
mtl_coords = np.array([point_from_string(line) for line in mtl_coords])
print("metal:      ", mtl_coords.shape)

In [None]:
sky_dict, sky_dict_norm = kmeans_test_dictionary(labels_reshape, sky_coords, 9)
clouds_dict, cloud_dict_norm = kmeans_test_dictionary(labels_reshape, clouds_coords, 9)
veg_dict, veg_dict_norm = kmeans_test_dictionary(labels_reshape, veg_coords, 9)
wtr_dict, wtr_dict_norm = kmeans_test_dictionary(labels_reshape, wtr_coords, 9)
blt_dict, blt_dict_norm = kmeans_test_dictionary(labels_reshape, blt_coords, 9)
windows_dict, windows_dict_norm = kmeans_test_dictionary(labels_reshape, windows_coords, 9)
rds_dict, rds_dict_norm = kmeans_test_dictionary(labels_reshape, rds_coords, 9)
cars_dict, cars_dict_norm = kmeans_test_dictionary(labels_reshape, cars_coords, 9)
mtl_dict, mtl_dict_norm = kmeans_test_dictionary(labels_reshape, mtl_coords, 9)

df_test = kmeans_test_dataframe(sky_dict, clouds_dict, veg_dict, wtr_dict,
                                blt_dict, windows_dict, rds_dict, cars_dict, mtl_dict)
print(df_test.transpose())
df_test_norm = kmeans_test_dataframe(sky_dict, clouds_dict, veg_dict, wtr_dict,
                                     blt_dict, windows_dict, rds_dict, cars_dict, mtl_dict)
print("")
print(df_test_norm.transpose())

In [None]:
plot_confusion_matrix(df_test, norm=False)
plot_confusion_matrix(df_test_norm, norm=True)
plot_test_result(df_test_norm)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as ListedColorMap
import matplotlib.patches as mpatches
from matplotlib.ticker import NullFormatter
%matplotlib inline

veg_by_row = np.zeros(cube_sub.shape[1])
for row in range(0, cube_sub.shape[1]):
    veg_by_row[row] = np.count_nonzero(labels_reshape[row,:] == 6)# + np.count_nonzero(labels_reshape[row,:] == 8) + np.count_nonzero(labels_reshape[row,:] == 9)

t=1
cmap = {0:[0.0,0.63,0.87,t/4], 1:[0.0,0.63,0.87,t], 2:[0.74,0.74,0.74,t],  3:[1.0,0.1,0.1,t/4],
        4:[0.0,0.63,0.87,t*3/4], 5:[0.0,0.63,0.87,t/2], 6:[0.93,0.91,0.77,t], 7:[1.0,0.1,0.1,t/2],
        8:[1.0,0.1,0.1,t]}
labels = {0:'0', 1:'1', 2:'2', 3:'3', 4:'4', 5:'5', 6:'6', 7:'7', 8:'8', 9:'9', 10:'10', 11:'11'}
arrayShow = np.array([[cmap[i] for i in j] for j in labels_reshape])
patches = [mpatches.Patch(color=cmap[i], label=labels[i]) for i in cmap]
#fig, ax = plt.subplots(figsize = (20,10))

fig = plt.figure(1, figsize=(30,10))
axImage = plt.axes([0.1,0.1,0.65,0.95])
axHist = plt.axes([0.75,0.1,0.2,0.95])
axHist.yaxis.set_major_formatter(NullFormatter())
axImage.tick_params(labelsize=20)
axHist.tick_params(labelsize=20)
axImage.imshow(arrayShow, aspect=0.5)
lgd = axImage.legend(handles=patches, bbox_to_anchor=(0.5,1), loc=9, borderaxespad=-2.0, prop={'size':25}, ncol=12)
axHist.plot(veg_by_row, np.arange(0,cube_sub.shape[1]), color=[0.0,0.33,0.62])
axHist.fill_between(veg_by_row, np.arange(0,cube_sub.shape[1]), cube_sub.shape[1], facecolor=[0.0,0.33,0.62])
axHist.set_ylim(cube_sub.shape[1], 0)
axHist.set(title='Vegetation Pixels by Row')
axHist.title.set_fontsize(25)
plt.show()
#fig.savefig("./output/plots/19_kmeans_clustering_of_veg_00108.png", bbox_extra_artists=(lgd,), bbox_inches='tight')

---
## KMeans on veg_00000 (South Facing @ ~6pm)

In [None]:
fname0 = "../../../image_files/veg_00000.raw"
cube0 = read_hyper(fname0)

In [None]:
cube_sub0 = cube0.data[:, :, :].astype(float)
print(cube_sub0.shape)

In [None]:
cube_reshaped0 = cube_sub0.transpose(1, 2, 0).reshape((cube_sub0.shape[1] * cube_sub0.shape[2]), cube_sub0.shape[0])
print(cube_reshaped0.shape)

In [None]:
cube_standard0 = (cube_reshaped0 - cube_reshaped0.mean(1, keepdims=True)) / cube_reshaped0.std(1, keepdims=True)

In [None]:
cube_binned0 = np.zeros(shape=(cube_standard0.shape[0], num_of_bins))

for i in range(num_of_bins):
    cube_binned0[:, i] = cube_standard0[:, bin_ind[i][0]:bin_ind[i][1]].mean(1)

print(cube_binned0.shape)

In [None]:
cube_norm0 = (cube_reshaped0 - cube_reshaped0.min()) / (cube_reshaped0.max() - cube_reshaped0.min())

import matplotlib.pyplot as plt
%matplotlib inline

red_ind0 = (np.abs(cube0.waves - 650.0)).argmin()
green_ind0 = (np.abs(cube0.waves - 550.0)).argmin()
blue_ind0 = (np.abs(cube0.waves - 450.0)).argmin()

cube_reshaped02 = cube_norm0.reshape(cube_sub0.shape[1], cube_sub0.shape[2], cube_sub0.shape[0])
cube_scene0 = cube_reshaped02[:, :, [red_ind0, green_ind0, blue_ind0]]
fig, ax = plt.subplots(figsize=(10,10))
plt.title('veg_00000 RGB Image')
ax.imshow(cube_scene0, aspect=0.5)
plt.show()

In [None]:
rgb0 = cube_reshaped02[:, :, [red_ind0, green_ind0, blue_ind0]].copy()
rgb0 /= rgb0.mean((0, 1), keepdims=True)

fig, ax = plt.subplots(figsize=(10,10))
plt.title('veg_00000 corrected RGB Image')
ax.imshow(rgb0, aspect=0.5)
plt.show()

In [None]:
import time
start_time = time.time()

#from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans

#km0 = MiniBatchKMeans(n_clusters=9, random_state=2, batch_size=20)
km0 = KMeans(n_clusters=9, random_state=2)
kmeans0 = km0.fit(cube_binned0)

elapsed_time = time.time() - start_time
print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

f, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3, figsize=(12, 12))
plt.suptitle('Standardized Spectra of Kmeans Cluster Centers (veg_00000)')
ax1.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[0,:], color=[0.0,0.33,0.62])
ax2.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[1,:], color=[0.0,0.33,0.62])
ax3.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[2,:], color=[0.0,0.33,0.62])
ax4.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[3,:], color=[0.0,0.33,0.62])
ax5.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[4,:], color=[0.0,0.33,0.62])
ax6.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[5,:], color=[0.0,0.33,0.62])
ax7.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[6,:], color=[0.0,0.33,0.62])
ax8.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[7,:], color=[0.0,0.33,0.62])
ax9.plot(cube0.waves[np.array(bin_ind).mean(1).astype(int)], kmeans0.cluster_centers_[8,:], color=[0.0,0.33,0.62])
plt.show()
#f.savefig("./output/plots/13_kmeans_cluster_centers_spectra_standardized_4_clusters.png")

In [None]:
labels0 = kmeans0.predict(cube_binned0)
labels_reshape0 = labels0.reshape(cube_sub0.shape[1], cube_sub0.shape[2])

In [None]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Paired9
from bokeh.models import (ColorBar, LinearColorMapper)
from bokeh.models import HoverTool

labels_reverse0 = np.flip(labels_reshape0, axis=0)
color_mapper = LinearColorMapper(palette="Paired9", low=labels_reverse0.min(), 
                                 high=labels_reverse0.max())

source_data = dict(image=[labels_reverse0],
                  x=[0],
                  y=[0],
                  dw=[1600],
                  dh=[1600])

hover = HoverTool()
hover.tooltips = [
    ("Label", "@image"),
    ("x", "@x"),
    ("y", "@y"),
]

output_notebook()
imgplt = figure(plot_width=800, plot_height=400, x_range=(0,1600), y_range=(0,1600),
               tools=['pan','tap','box_zoom','wheel_zoom','save','reset'],
               title="Clustered Standardized Spectra (veg_00000)")
imgplt.image(source=source_data, image='image', x='x', y='y', dw='dw', dh='dh',
            color_mapper = color_mapper)
imgplt.add_layout(ColorBar(color_mapper = color_mapper), 'left')
imgplt.tools.append(hover)
show(imgplt)

### read test sample (veg_00000)

In [None]:
# read manually selected coordinates files

#sky coordinates
sky_file0 = open("../../manual_classified_pixels/1_sky_coordinates_000.txt", "r")
sky_coords0 = sky_file0.readlines()
sky_file0.close()
sky_coords0 = np.array([point_from_string(line) for line in sky_coords0])
print("sky:        ", sky_coords0.shape)

#clouds coordinates
clouds_file0 = open("../../manual_classified_pixels/2_clouds_coordinates_000.txt", "r")
clouds_coords0 = clouds_file0.readlines()
clouds_file0.close()
clouds_coords0 = np.array([point_from_string(line) for line in clouds_coords0])
print("clouds:     ", clouds_coords0.shape)

#vegetation coordinates
veg_file0 = open("../../manual_classified_pixels/3_vegetation_coordinates_000.txt", "r")
veg_coords0 = veg_file0.readlines()
veg_file0.close()
veg_coords0 = np.array([point_from_string(line) for line in veg_coords0])
print("vegetation: ", veg_coords0.shape)

#water coordinates
wtr_file0 = open("../../manual_classified_pixels/4_water_coordinates_000.txt", "r")
wtr_coords0 = wtr_file0.readlines()
wtr_file0.close()
wtr_coords0 = np.array([point_from_string(line) for line in wtr_coords0])
print("water:      ", wtr_coords0.shape)

#buildings coordinates
blt_file0 = open("../../manual_classified_pixels/5_buildings_coordinates_000.txt", "r")
blt_coords0 = blt_file0.readlines()
blt_file0.close()
blt_coords0 = np.array([point_from_string(line) for line in blt_coords0])
print("buildings:  ", blt_coords0.shape)

#windows coordinates
windows_file0 = open("../../manual_classified_pixels/6_windows_coordinates_000.txt", "r")
windows_coords0 = windows_file0.readlines()
windows_file0.close()
windows_coords0 = np.array([point_from_string(line) for line in windows_coords0])
print("windows:    ", windows_coords0.shape)

#roads coordinates
rds_file0 = open("../../manual_classified_pixels/7_roads_coordinates_000.txt", "r")
rds_coords0 = rds_file0.readlines()
rds_file0.close()
rds_coords0 = np.array([point_from_string(line) for line in rds_coords0])
print("road:       ", rds_coords0.shape)

#cars coordinates
cars_file0 = open("../../manual_classified_pixels/8_cars_coordinates_000.txt", "r")
cars_coords0 = cars_file0.readlines()
cars_file0.close()
cars_coords0 = np.array([point_from_string(line) for line in cars_coords0])
print("cars:       ", cars_coords0.shape)

#metal coordinates
mtl_file0 = open("../../manual_classified_pixels/9_metal_coordinates_000.txt", "r")
mtl_coords0 = mtl_file0.readlines()
mtl_file0.close()
mtl_coords0 = np.array([point_from_string(line) for line in mtl_coords0])
print("metal:      ", mtl_coords0.shape)

In [None]:
wtr_coords0=np.array([[0,0]])

In [None]:
sky_dict0, sky_dict_norm0 = kmeans_test_dictionary(labels_reshape0, sky_coords0, 9)
clouds_dict0, cloud_dict_norm0 = kmeans_test_dictionary(labels_reshape0, clouds_coords0, 9)
veg_dict0, veg_dict_norm0 = kmeans_test_dictionary(labels_reshape0, veg_coords0, 9)
wtr_dict0, wtr_dict_norm0 = kmeans_test_dictionary(labels_reshape0, wtr_coords0, 9)
blt_dict0, blt_dict_norm0 = kmeans_test_dictionary(labels_reshape0, blt_coords0, 9)
windows_dict0, windows_dict_norm0 = kmeans_test_dictionary(labels_reshape0, windows_coords0, 9)
rds_dict0, rds_dict_norm0 = kmeans_test_dictionary(labels_reshape0, rds_coords0, 9)
cars_dict0, cars_dict_norm0 = kmeans_test_dictionary(labels_reshape0, cars_coords0, 9)
mtl_dict0, mtl_dict_norm0 = kmeans_test_dictionary(labels_reshape0, mtl_coords0, 9)

df_test0 = kmeans_test_dataframe(sky_dict0, clouds_dict0, veg_dict0, wtr_dict0,
                                blt_dict0, windows_dict0, rds_dict0, cars_dict0, mtl_dict0)
print(df_test0.transpose())
df_test_norm0 = kmeans_test_dataframe(sky_dict0, clouds_dict0, veg_dict0, wtr_dict0,
                                     blt_dict0, windows_dict0, rds_dict0, cars_dict0, mtl_dict0)
print("")
print(df_test_norm0.transpose())

In [None]:
plot_confusion_matrix(df_test0, norm=False)
plot_confusion_matrix(df_test_norm0, norm=True)
plot_test_result(df_test_norm0)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as ListedColorMap
import matplotlib.patches as mpatches
from matplotlib.ticker import NullFormatter
%matplotlib inline

veg_by_row0 = np.zeros(cube_sub0.shape[1])
for row in range(0, cube_sub0.shape[1]):
    veg_by_row0[row] = np.count_nonzero(labels_reshape0[row,:] == 6)# + np.count_nonzero(labels_reshape[row,:] == 8) + np.count_nonzero(labels_reshape[row,:] == 9)

t=1
cmap = {0:[0.0,0.63,0.87,t/4], 1:[0.0,0.63,0.87,t], 2:[0.74,0.74,0.74,t],  3:[1.0,0.1,0.1,t/4],
        4:[0.0,0.63,0.87,t*3/4], 5:[0.0,0.63,0.87,t/2], 6:[0.93,0.91,0.77,t], 7:[1.0,0.1,0.1,t/2],
        8:[1.0,0.1,0.1,t]}
labels = {0:'0', 1:'1', 2:'2', 3:'3', 4:'4', 5:'5', 6:'6', 7:'7', 8:'8', 9:'9', 10:'10', 11:'11'}
arrayShow = np.array([[cmap[i] for i in j] for j in labels_reshape0])
patches = [mpatches.Patch(color=cmap[i], label=labels[i]) for i in cmap]
#fig, ax = plt.subplots(figsize = (20,10))

fig = plt.figure(1, figsize=(30,10))
axImage = plt.axes([0.1,0.1,0.65,0.95])
axHist = plt.axes([0.75,0.1,0.2,0.95])
axHist.yaxis.set_major_formatter(NullFormatter())
axImage.tick_params(labelsize=20)
axHist.tick_params(labelsize=20)
axImage.imshow(arrayShow, aspect=0.5)
lgd = axImage.legend(handles=patches, bbox_to_anchor=(0.5,1), loc=9, borderaxespad=-2.0, prop={'size':25}, ncol=12)
axHist.plot(veg_by_row0, np.arange(0,cube_sub.shape[1]), color=[0.0,0.33,0.62])
axHist.fill_between(veg_by_row0, np.arange(0,cube_sub.shape[1]), cube_sub.shape[1], facecolor=[0.0,0.33,0.62])
axHist.set_ylim(cube_sub.shape[1], 0)
axHist.set(title='Vegetation Pixels by Row')
axHist.title.set_fontsize(25)
plt.show()
#fig.savefig("./output/plots/19_kmeans_clustering_of_veg_00108.png", bbox_extra_artists=(lgd,), bbox_inches='tight')

---
## KMeans on North Facing Image

In [None]:
fname_north = "../../../image_files/scan1_slow_roof_VNIR.raw"
cube_north = read_hyper(fname_north)

In [None]:
cube_sub_north = cube_north.data[:, :, :].astype(float)
print(cube_sub_north.shape)

In [None]:
cube_reshaped_north = cube_sub_north.transpose(1, 2, 0).reshape((cube_sub_north.shape[1] * cube_sub_north.shape[2]), cube_sub_north.shape[0])
print(cube_reshaped_north.shape)

In [None]:
cube_norm_north = (cube_reshaped_north - cube_reshaped_north.min()) / (cube_reshaped_north.max() - cube_reshaped_north.min())

import matplotlib.pyplot as plt
%matplotlib inline

red_ind_n = (np.abs(cube_north.waves - 650.0)).argmin()
green_ind_n = (np.abs(cube_north.waves - 550.0)).argmin()
blue_ind_n = (np.abs(cube_north.waves - 450.0)).argmin()

cube_reshaped_north2 = cube_norm_north.reshape(cube_sub_north.shape[1], cube_sub_north.shape[2], cube_sub_north.shape[0])
cube_scene_north = cube_reshaped_north2[:, :, [red_ind_n, green_ind_n, blue_ind_n]]
fig, ax = plt.subplots(figsize=(10,10))
plt.title('North Facing RGB Image')
ax.imshow(cube_scene_north, aspect=0.4)
plt.show()

In [None]:
rgbn = cube_reshaped_north2[:, :, [red_ind_n, green_ind_n, blue_ind_n]].copy()
rgbn /= rgbn.mean((0, 1), keepdims=True)

fig, ax = plt.subplots(figsize=(10,10))
plt.title('North Facing corrected RGB Image')
ax.imshow(rgbn.clip(0, 1)**0.5, aspect=0.4)
plt.show()

In [None]:
cube_standard_north = (cube_reshaped_north - cube_reshaped_north.mean(1, keepdims=True)) / cube_reshaped_north.std(1, keepdims=True)
cube_reshaped_north = cube_standard_north

In [None]:
print(len(cube.waves))
print(min(cube.waves), max(cube.waves))
print()
print(len(cube_north.waves))
print(min(cube_north.waves), max(cube_north.waves))

In [None]:
# interpolating an extrapolating the north facing scene

from scipy.interpolate import interp1d

interp_hsi = interp1d(cube_north.waves, cube_reshaped_north, axis=1, fill_value="extrapolate")
northri = interp_hsi(cube.waves)

In [None]:
fig, ax = plt.subplots(figsize=(12,5))
ax.plot(cube_north.waves, cube_reshaped_north[1000, :], ".", color="r")
ax.plot(cube.waves, northri[1000, :], 'o', ms=3, color="b")
ax.plot(cube_north.waves, cube_reshaped_north[1000, :], ".", ms=3, color="y")

In [None]:
print(cube_reshaped_north.shape)
print(northri.shape)

In [None]:
cube_binned_north = np.zeros(shape=(northri.shape[0], num_of_bins))

for i in range(num_of_bins):
    cube_binned_north[:, i] = northri[:, bin_ind[i][0]:bin_ind[i][1]].mean(1)

print(cube_binned_north.shape)

In [None]:
import time
start_time = time.time()

#from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans

#kmn = MiniBatchKMeans(n_clusters=9, random_state=2, batch_size=20)
kmn = KMeans(n_clusters=9, random_state=2)
kmeansn = kmn.fit(cube_binned_north

elapsed_time = time.time() - start_time
print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

f, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3, figsize=(12, 12))
plt.suptitle('Standardized Spectra of Kmeans Cluster Centers (north facing)')
ax1.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[0,:], color=[0.0,0.33,0.62])
ax2.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[1,:], color=[0.0,0.33,0.62])
ax3.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[2,:], color=[0.0,0.33,0.62])
ax4.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[3,:], color=[0.0,0.33,0.62])
ax5.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[4,:], color=[0.0,0.33,0.62])
ax6.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[5,:], color=[0.0,0.33,0.62])
ax7.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[6,:], color=[0.0,0.33,0.62])
ax8.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[7,:], color=[0.0,0.33,0.62])
ax9.plot(cube.waves[np.array(bin_ind).mean(1).astype(int)], kmeansn.cluster_centers_[8,:], color=[0.0,0.33,0.62])
plt.show()
#f.savefig("./output/plots/13_kmeans_cluster_centers_spectra_standardized_4_clusters.png")

In [None]:
labels_north = kmeansn.predict(cube_binned_north)
labels_reshape_north = labels_north.reshape(cube_sub_north.shape[1], cube_sub_north.shape[2])

In [None]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Paired9
from bokeh.models import (ColorBar, LinearColorMapper)
from bokeh.models import HoverTool

labels_reverse_north = np.flip(labels_reshape_north, axis=0)
color_mapper = LinearColorMapper(palette="Paired9", low=labels_reverse_north.min(), 
                                 high=labels_reverse_north.max())

source_data = dict(image=[labels_reverse_north],
                  x=[0],
                  y=[0],
                  dw=[1600],
                  dh=[1600])

hover = HoverTool()
hover.tooltips = [
    ("Label", "@image"),
    ("x", "@x"),
    ("y", "@y"),
]

output_notebook()
imgplt = figure(plot_width=800, plot_height=400, x_range=(0,1600), y_range=(0,1600),
               tools=['pan','tap','box_zoom','wheel_zoom','save','reset'],
               title="Clustered Standardized Spectra (north facing)")
imgplt.image(source=source_data, image='image', x='x', y='y', dw='dw', dh='dh',
            color_mapper = color_mapper)
imgplt.add_layout(ColorBar(color_mapper = color_mapper), 'left')
imgplt.tools.append(hover)
show(imgplt)

### read test sample (north facing)

In [None]:
# read manually selected coordinates files

#sky coordinates
sky_filen = open("../manual_classified_pixels/1_sky_coordinates_north.txt", "r")
sky_coordsn = sky_filen.readlines()
sky_filen.close()
sky_coordsn = np.array([point_from_string(line) for line in sky_coordsn])
print("sky:        ", sky_coordsn.shape)

#clouds coordinates
clouds_filen = open("../manual_classified_pixels/2_clouds_coordinates_north.txt", "r")
clouds_coordsn = clouds_filen.readlines()
clouds_filen.close()
clouds_coordsn = np.array([point_from_string(line) for line in clouds_coordsn])
print("clouds:     ", clouds_coordsn.shape)

#vegetation coordinates
veg_filen = open("../manual_classified_pixels/3_vegetation_coordinates_north.txt", "r")
veg_coordsn = veg_filen.readlines()
veg_filen.close()
veg_coordsn = np.array([point_from_string(line) for line in veg_coordsn])
print("vegetation: ", veg_coordsn.shape)

#water coordinates
wtr_filen = open("../manual_classified_pixels/4_water_coordinates_north.txt", "r")
wtr_coordsn = wtr_filen.readlines()
wtr_filen.close()
wtr_coordsn = np.array([point_from_string(line) for line in wtr_coordsn])
print("water:      ", wtr_coordsn.shape)

#buildings coordinates
blt_filen = open("../manual_classified_pixels/5_buildings_coordinates_north.txt", "r")
blt_coordsn = blt_filen.readlines()
blt_filen.close()
blt_coordsn = np.array([point_from_string(line) for line in blt_coordsn])
print("buildings:  ", blt_coordsn.shape)

#windows coordinates
windows_filen = open("../manual_classified_pixels/6_windows_coordinates_north.txt", "r")
windows_coordsn = windows_filen.readlines()
windows_filen.close()
windows_coordsn = np.array([point_from_string(line) for line in windows_coordsn])
print("windows:    ", windows_coordsn.shape)

#roads coordinates
rds_filen = open("../manual_classified_pixels/7_roads_coordinates_north.txt", "r")
rds_coordsn = rds_filen.readlines()
rds_filen.close()
rds_coordsn = np.array([point_from_string(line) for line in rds_coordsn])
print("road:       ", rds_coordsn.shape)

#cars coordinates
cars_filen = open("../manual_classified_pixels/8_cars_coordinates_north.txt", "r")
cars_coordsn = cars_filen.readlines()
cars_filen.close()
cars_coordsn = np.array([point_from_string(line) for line in cars_coordsn])
print("cars:       ", cars_coordsn.shape)

#metal coordinates
mtl_filen = open("../manual_classified_pixels/9_metal_coordinates_north.txt", "r")
mtl_coordsn = mtl_filen.readlines()
mtl_filen.close()
mtl_coordsn = np.array([point_from_string(line) for line in mtl_coordsn])
print("metal:      ", mtl_coordsn.shape)

In [None]:
sky_dictn, sky_dict_normn = kmeans_test_dictionary(labels_reshape_north, sky_coordsn, 9)
clouds_dictn, cloud_dict_normn = kmeans_test_dictionary(labels_reshape_north, clouds_coordsn, 9)
veg_dictn, veg_dict_normn = kmeans_test_dictionary(labels_reshape_north, veg_coordsn, 9)
wtr_dictn, wtr_dict_normn = kmeans_test_dictionary(labels_reshape_north, wtr_coordsn, 9)
blt_dictn, blt_dict_normn = kmeans_test_dictionary(labels_reshape_north, blt_coordsn, 9)
windows_dictn, windows_dict_normn = kmeans_test_dictionary(labels_reshape_north, windows_coordsn, 9)
rds_dictn, rds_dict_normn = kmeans_test_dictionary(labels_reshape_north, rds_coordsn, 9)
cars_dictn, cars_dict_normn = kmeans_test_dictionary(labels_reshape_north, cars_coordsn, 9)
mtl_dictn, mtl_dict_normn = kmeans_test_dictionary(labels_reshape_north, mtl_coordsn, 9)

df_testn = kmeans_test_dataframe(sky_dictn, clouds_dictn, veg_dictn, wtr_dictn,
                                blt_dictn, windows_dictn, rds_dictn, cars_dictn, mtl_dictn)
print(df_testn.transpose())
df_test_normn = kmeans_test_dataframe(sky_dictn, clouds_dictn, veg_dictn, wtr_dictn,
                                     blt_dictn, windows_dictn, rds_dictn, cars_dictn, mtl_dictn)
print("")
print(df_test_normn.transpose())

In [None]:
plot_confusion_matrix(df_testn, norm=False)
plot_confusion_matrix(df_test_normn, norm=True)
plot_test_result(df_test_normn)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as ListedColorMap
import matplotlib.patches as mpatches
from matplotlib.ticker import NullFormatter
%matplotlib inline

veg_by_row_north = np.zeros(cube_sub_north.shape[1])
for row in range(0, cube_sub_north.shape[1]):
    veg_by_row_north[row] = np.count_nonzero(labels_reshape_north[row,:] == 7)# + np.count_nonzero(labels_reshape[row,:] == 8) + np.count_nonzero(labels_reshape[row,:] == 9)

t=1
cmap = {0:[0.0,0.63,0.87,t/4], 1:[0.0,0.63,0.87,t], 2:[0.74,0.74,0.74,t],  3:[1.0,0.1,0.1,t/4],
        4:[0.0,0.63,0.87,t*3/4], 5:[0.0,0.63,0.87,t/2], 6:[0.93,0.91,0.77,t], 7:[1.0,0.1,0.1,t/2],
        8:[1.0,0.1,0.1,t]}
labels = {0:'0', 1:'1', 2:'2', 3:'3', 4:'4', 5:'5', 6:'6', 7:'7', 8:'8', 9:'9', 10:'10', 11:'11'}
arrayShow = np.array([[cmap[i] for i in j] for j in labels_reshape_north])
patches = [mpatches.Patch(color=cmap[i], label=labels[i]) for i in cmap]
#fig, ax = plt.subplots(figsize = (20,10))

fig = plt.figure(1, figsize=(30,10))
axImage = plt.axes([0.1,0.1,0.65,0.95])
axHist = plt.axes([0.75,0.1,0.2,0.95])
axHist.yaxis.set_major_formatter(NullFormatter())
axImage.tick_params(labelsize=20)
axHist.tick_params(labelsize=20)
axImage.imshow(arrayShow, aspect=0.5)
lgd = axImage.legend(handles=patches, bbox_to_anchor=(0.5,1), loc=9, borderaxespad=-2.0, prop={'size':25}, ncol=12)
axHist.plot(veg_by_row_north, np.arange(0,cube_sub_north.shape[1]), color=[0.0,0.33,0.62])
axHist.fill_between(veg_by_row_north, np.arange(0,cube_sub_north.shape[1]), cube_sub_north.shape[1], facecolor=[0.0,0.33,0.62])
axHist.set_ylim(cube_sub_north.shape[1], 0)
axHist.set(title='Vegetation Pixels by Row')
axHist.title.set_fontsize(25)
plt.show()
#fig.savefig("./output/plots/19_kmeans_clustering_of_veg_00108.png", bbox_extra_artists=(lgd,), bbox_inches='tight')