In [20]:
import itertools
import os
import h5py
import hdf5storage

import numpy as np
import scipy.io as sio
from sklearn.neighbors import KNeighborsClassifier
import lightgbm as lgb
from sklearn.svm import SVC

In [30]:
DATA_PATH = '/scratch0/ilya/locDoc/data/hyperspec'
#DATA_PATH = '/Users/artsyinc/Documents/MATH630/research/data/hyper'
DATASET_PATH = '/scratch0/ilya/locDoc/data/hyperspec/datasets'
#DATASET_PATH = '/Users/artsyinc/Documents/MATH630/research/data/hyper'
coarsetraintestfilenames = ['PaviaU_gt_traintest_coarse_16px16p.mat', 'PaviaU_gt_traintest_coarse_32px32p.mat', 'PaviaU_gt_traintest_coarse_64px64p.mat', 'PaviaU_gt_traintest_coarse_128px128p.mat']
unifttfiles = [ 'PaviaU_gt_traintest_s03_1_3f6384.mat', 'PaviaU_gt_traintest_s60_1_dd069a.mat', 'PaviaU_gt_traintest_s200_1_591636.mat']

In [168]:
traintestfilename = unifttfiles[2]

mat_contents = sio.loadmat(os.path.join(DATASET_PATH, 'PaviaU_gt.mat'))
labels = mat_contents['paviaU_gt']
height, width = labels.shape
all_pixels = np.array(list(itertools.product(range(width),range(height))))
flat_labels = labels.transpose().reshape(height*width)
n_classes = len(set(flat_labels.tolist())) - 1

mat_contents = None
try:
    mat_contents = sio.loadmat(os.path.join(DATA_PATH, traintestfilename))
except:
    mat_contents = hdf5storage.loadmat(os.path.join(DATA_PATH, traintestfilename))
train_mask = mat_contents['train_mask'].astype(int).squeeze()
test_mask = mat_contents['test_mask'].astype(int).squeeze()

trainX = np.array(filter(lambda (x,y): labels[y,x]*train_mask[x*height+y] != 0, all_pixels))
trainY = flat_labels[train_mask==1]
testX = np.array(filter(lambda (x,y): labels[y,x]*test_mask[x*height+y] != 0, all_pixels))
testY = flat_labels[test_mask==1]


In [123]:
classifier = KNeighborsClassifier(n_neighbors=1)

In [124]:
classifier.fit(trainX, trainY)
y_pred = classifier.predict(testX)

In [125]:
sum(y_pred == testY) / float(len(testY))

0.7418161931300294

### Classify raw spectra feat

In [169]:
mat_contents = sio.loadmat(os.path.join(DATASET_PATH, 'PaviaU.mat'))
data = mat_contents['paviaU'].astype(np.float32)
data /= np.max(np.abs(data))
nbands = data.shape[2]

trainX_spec = np.zeros((train_mask.sum(),nbands), dtype=np.float32)
testX_spec = np.zeros((test_mask.sum(),nbands), dtype=np.float32)

for pixel_i, pixel in enumerate(trainX):
    # this iterates through columns first
    [pixel_x, pixel_y] = pixel
    trainX_spec[pixel_i,:] = data[pixel_y:(pixel_y+1), pixel_x:(pixel_x+1), :]

for pixel_i, pixel in enumerate(testX):
    # this iterates through columns first
    [pixel_x, pixel_y] = pixel
    testX_spec[pixel_i,:] = data[pixel_y:(pixel_y+1), pixel_x:(pixel_x+1), :]

#### SVM

In [170]:
clf = SVC(kernel='linear')
clf.fit(trainX_spec, trainY)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [171]:
y_pred_svm = clf.predict(testX_spec)

In [172]:
sum(y_pred_svm == testY) / float(len(testY))

0.6522842639593909

In [130]:
trainX_spec.shape

(4377, 103)

#### SVM on loc + spec

In [173]:
trainX_combo = np.concatenate([trainX, trainX_spec], axis=1)
testX_combo = np.concatenate([testX, testX_spec], axis=1)

In [174]:
clf = SVC(kernel='linear')
clf.fit(trainX_combo, trainY)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [175]:
y_pred_svm = clf.predict(testX_combo)

In [176]:
sum(y_pred_svm == testY) / float(len(testY))

0.8955974228816869

#### LGB

In [None]:
lgbclassifier = lgb.LGBMClassifier(objective='multiclass', num_class=n_classes, metric='multi_logloss')
lgbclassifier.fit(trainX_spec, trainY)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        importance_type='split', learning_rate=0.1, max_depth=-1,
        metric='multi_logloss', min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=100,
        n_jobs=-1, num_class=9, num_leaves=31, objective='multiclass',
        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
        subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [None]:
y_pred_lgb = lgbclassifier.predict(testX_spec)

In [None]:
sum(y_pred_lgb == testY) / float(len(testY))