## Importing Dependencies

In [1]:
import scipy.io
import numpy as np
from random import shuffle
import random
import spectral
import scipy.ndimage
from skimage.transform import rotate
import os
# import patch_size
patch_size = 21 
%matplotlib inline

## Loading Indian Pines Data (Numpy Array)

In [2]:
DATA_PATH = os.path.join(os.getcwd(),"IndianPines")
NEW_DATA_PATH = os.path.join(os.getcwd(),"new_indian_pines_data")
input_mat = scipy.io.loadmat(os.path.join(DATA_PATH, 'Indian_pines_corrected.mat'))['indian_pines_corrected']
target_mat = scipy.io.loadmat(os.path.join(DATA_PATH, 'Indian_pines_gt.mat'))['indian_pines_gt']

print("Input Matrix Type: %s" %type(input_mat))
print("Target Matrix Type: %s" %type(input_mat))

print("\n")
print("Input Matrix: %s" %str(np.shape(input_mat)))
print("Target Matrix: %s" %str(np.shape(input_mat)))

Input Matrix Type: <class 'numpy.ndarray'>
Target Matrix Type: <class 'numpy.ndarray'>


Input Matrix: (145, 145, 200)
Target Matrix: (145, 145, 200)


In [3]:
# Now let us define our global variables that define our data
HEIGHT = input_mat.shape[0]
WIDTH = input_mat.shape[1]
BAND = input_mat.shape[2]
print(BAND)
PATCH_SIZE = patch_size
TRAIN_PATCH, TRAIN_LABELS, TEST_PATCH, TEST_LABELS, CLASSES = [],[],[],[],[]
COUNT =200 # Number of patches of each class (This will change depending on the selected patch size)
OUTPUT_CLASSES = 16
TEST_FRAC = 0.25 # percentage used for testing


200


## Scaling input between [0,1]

In [4]:
input_mat =input_mat.astype(float) # change elements in the tensor to float
input_mat -= np.min(input_mat) # shift [0 to max]
input_mat /= np.max(input_mat) # scale ([0 to max])/max = [0 to 1] array

## Normalize data

In [5]:
# calculate the mean of each channel for normalization
MEAN_ARRAY = np.ndarray(shape=(BAND,),dtype=float) #row vector containing the mean of each batch
for i in range(BAND):
    MEAN_ARRAY[i] = np.mean(input_mat[:,:,i]) # mean per band
    print(MEAN_ARRAY[i])
#np.transpose(input_mat, (2,0,1)).shape transpose

0.231513871206
0.362622411449
0.38414871768
0.37171426415
0.411802366546
0.443472672983
0.450146557326
0.434701411599
0.429402801201
0.402828713264
0.399143551886
0.387621902087
0.369221490419
0.398366819915
0.406516222793
0.403828316086
0.385139329339
0.360198778934
0.348285917323
0.336018902888
0.332833259713
0.330136521319
0.322258079639
0.312484245874
0.311774405954
0.301452661185
0.288851235989
0.292987412785
0.29031911616
0.265439480195
0.294790022669
0.262392190941
0.295587519551
0.346116572486
0.372140879696
0.403517656293
0.503496657666
0.577634573578
0.576080559718
0.367975513242
0.594116370116
0.607950239001
0.578010882606
0.570455099935
0.522339934964
0.469485789357
0.504717745544
0.554142375748
0.545172225446
0.546258973806
0.538296455131
0.542029893829
0.529045654072
0.415637644596
0.390610933006
0.378519534951
0.313514990564
0.15830015883
0.175973930578
0.202181602514
0.260398138032
0.351874370086
0.411255945819
0.44522874329
0.434213491171
0.436907628452
0.430471176793


## Patching function

In [6]:
def patch(height_index, width_index):
    """
    Returns a mean-normalized patch, the top left corner of which 
    is at (height_index, width_index)
    
    Inputs: 
    height_index - row index of the top left corner of the image patch
    width_index - column index of the top left corner of the image patch
    
    Outputs:
    mean_normalized_patch - mean normalized patch of size (PATCH_SIZE, PATCH_SIZE) 
    whose top left corner is at (height_index, width_index)
    """
    
    
    
    # creating patch slices
    transposed_array = np.transpose(input_mat, (2,0,1))
    height_slice = slice(height_index, height_index+PATCH_SIZE)
    width_slice = slice(width_index, width_index+PATCH_SIZE)
    transposed_patch = transposed_array[:,height_slice,width_slice]
    mean_normalized_patch = []
    for i in range(transposed_patch.shape[0]): # for every band normalize patch
        mean_normalized_patch.append(transposed_patch[i]-MEAN_ARRAY[i]) # substract the mean for every element of in the matrix
    return np.array(mean_normalized_patch)

## Obtain all patches and separate them by class 

In [7]:
CLASSES = []
for classes in range(OUTPUT_CLASSES):
    CLASSES.append([]) # append the number of list classes 
    
    
# getting patches

for i in range(HEIGHT - PATCH_SIZE +1):
    for j in range(WIDTH - PATCH_SIZE +1):
        # the actual patch tensor
        input_img = patch(i,j)
        #choose the middle of the patch as classification
        target_id = target_mat[i+int((PATCH_SIZE-1)/2),j+int((PATCH_SIZE-1)/2)]
        if(target_id != 0): # ignore the UNKNOWN patches
            CLASSES[target_id -1].append(input_img)
            
print("Done")
print(type(CLASSES[0]))

Done
<class 'list'>


In [8]:
print(input_mat.shape)
print(target_mat.shape)
print(len(CLASSES))
print(np.array(CLASSES[0]).shape)
print("\n")

for c in range(len(CLASSES)):
    print("Class %d contains: %s patches"%(c+1,len(CLASSES[c])))

(145, 145, 200)
(145, 145)
16
(46, 200, 21, 21)


Class 1 contains: 46 patches
Class 2 contains: 1378 patches
Class 3 contains: 547 patches
Class 4 contains: 152 patches
Class 5 contains: 340 patches
Class 6 contains: 730 patches
Class 7 contains: 28 patches
Class 8 contains: 356 patches
Class 9 contains: 20 patches
Class 10 contains: 843 patches
Class 11 contains: 2245 patches
Class 12 contains: 481 patches
Class 13 contains: 205 patches
Class 14 contains: 1095 patches
Class 15 contains: 142 patches
Class 16 contains: 93 patches


## Extract 25% from data set for testing

In [9]:
for i in range(len(CLASSES)):
    class_population = len(CLASSES[i])
    split_size = int(class_population*TEST_FRAC) # getting 25% (spliting size for the test)
    patches_class = CLASSES[i] # extracting patches from class
    shuffle(patches_class) # shuffling patches
    TRAIN_PATCH.append(patches_class[:-split_size]) # from 0 to (len -split_size) contains patches
    TEST_PATCH.extend(patches_class[-split_size:]) # from split_size to len(class)-1
    TEST_LABELS.extend(np.full(split_size,i,dtype=int)) # 0 to 15 label

for c in TRAIN_PATCH:
    print(len(c))
print("____________")

print(len(TRAIN_PATCH))
print(len(TEST_PATCH))
print("____________")
print(np.shape(TRAIN_PATCH[0]))
print(np.shape(TEST_PATCH[0]))
    

35
1034
411
114
255
548
21
267
15
633
1684
361
154
822
107
70
____________
16
2170
____________
(35, 200, 21, 21)
(200, 21, 21)


In [10]:
#x = CLASSES[0]
#print(int(len(x)*TEST_FRAC))
#print(np.shape(x))
#np.shape(x[:-int(class_population*TEST_FRAC)])
#np.full(int(len(x)*TEST_FRAC),len(x))

## Oversample the classes which do not have at least COUNT patches in the training set and extract COUNT patches

In [11]:
# This methods ensures that there are at least COUNT samples for every class
for i in range(len(TRAIN_PATCH)):
    if(len(TRAIN_PATCH[i])<COUNT):
        temp = TRAIN_PATCH[i]
        for j in range(int(COUNT/len(TRAIN_PATCH[i]))):
            shuffle(TRAIN_PATCH[i])
            TRAIN_PATCH[i] = TRAIN_PATCH[i]+temp # adding oversamples to the TRAININ_PATCH
    shuffle(TRAIN_PATCH[i])
    TRAIN_PATCH[i] = TRAIN_PATCH[i][:COUNT] # include all new oversamples of at most COUNT
            

In [12]:
# Checking the code from above
for c in range(len(TRAIN_PATCH)):
    print(len(TRAIN_PATCH[c]))

200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200


## Convert data to numpy arrays

In [13]:
TRAIN_PATCH = np.asarray(TRAIN_PATCH)
print(np.shape(TRAIN_PATCH))
TRAIN_PATCH = TRAIN_PATCH.reshape((-1,200,PATCH_SIZE,PATCH_SIZE)) # reduce dimension of the training tensor
print(np.shape(TRAIN_PATCH))

(16, 200, 200, 21, 21)
(3200, 200, 21, 21)


## Training and Testing Labels

In [14]:
TRAINING_LABELS = np.array([])
for i in range(OUTPUT_CLASSES):
    TRAINING_LABELS = np.append(TRAINING_LABELS,np.full(COUNT,i,dtype=int))
print("The length of training labels is: %d"%len(TRAINING_LABELS))
print("The length of testing labels is: %d"%len(TEST_LABELS))

The length of training labels is: 3200
The length of testing labels is: 2170


## Saving Training and Testing Data

In [15]:
l_train = int(len(TRAIN_PATCH)/(COUNT))
l_testing = int(len(TEST_PATCH)/(COUNT))
for i in range(l_train):
    train_dict ={}
    start = i*COUNT
    end = (i+1)*COUNT
    train_dict["train_patches"] = TRAIN_PATCH[start:end]
    train_dict["train_labels"] = TRAIN_LABELS[start:end]
    file_name = "Training(%s)_class(%d).mat"%(str(PATCH_SIZE),i)
    scipy.io.savemat(os.path.join(NEW_DATA_PATH,file_name),train_dict)

for i in range(l_testing):
    test_dict = {}
    start = i*COUNT
    end = (i+1)*COUNT
    test_dict["testing_patches"] = TEST_PATCH[start:end]
    test_dict["testing_labels"] = TEST_LABELS[start:end]
    file_name = "Testing(%s)_class(%d).mat"%(str(PATCH_SIZE),i)
    scipy.io.savemat(os.path.join(NEW_DATA_PATH,file_name),test_dict)
print('Done')
    

Done
