# Indian pines HSI classification


## Imports

In [None]:
import matplotlib.pyplot as plt
import sklearn.linear_model as linmod
import logging
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import tables as tab
import pandas as pd


## Config

In [None]:
random_seed = 42

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Directory to save results
output_dir = "model_results"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

## Data preprocessing

### Define helper functions

In [None]:
# load some helper functions
def get_PRISMA_vnir(path):
    file1 = tab.open_file(path)
    d2 = file1.get_node("/HDFEOS/SWATHS/PRS_L1_HCO/")
    vnir = d2['Data Fields']['VNIR_Cube'][:,3:-3,:]
    file1.close()
    vnir = vnir.transpose((0, 2, 1))
    return vnir

def rgbplot(vnir):
    rgb_flat = vnir[:,[35,45,52]]
    rgb = rgb_flat.reshape((1000, 1000, 3))/ 10000#.
    rgb[rgb>1] = 1
    plt.imshow(rgb)
    #return rgb
    
def intobatches(orderlist, size):
    batches = []
    j = 0
    while j < len(orderlist):
        i = 0
        batch = []
        while i < size:
            try:
                batch.append(orderlist[j+i])
            except IndexError:
                batches.append(batch)
                return batches
            i += 1
        j += i
        batches.append(batch)
    return batches

### Load dataset

In [None]:
# load list of images
path_to_xls = "./Faubai/2023_02_22_Faubai_dataset_v1.xlsx"
datalist = pd.read_excel(path_to_xls)

# remove datetimes that are not a number
truetime = datalist['datetime'] > pd.Timestamp(2010)
datalist = datalist[truetime]
train_data = datalist[datalist['Train'] == 'X']
test_data = datalist[datalist['Test'] == 'X']

print(datalist)

# find the relevant files
path = "./Faubai/"

fname_dict = {}
for i in range(len(datalist)):
    for root,d_names,f_names in os.walk(path):
        for f in f_names:
            if f == datalist['name'][i] + ".he5":
                fname_dict[datalist['name'][i]] = (os.path.join(root, f))
                
# find the labels also               
lname_dict = {}
for i in range(len(datalist)):
    for root,d_names,f_names in os.walk(path):
        for f in f_names:
            if (datalist['name'][i] in f) and ("labels.csv" in f):
                lname_dict[datalist['name'][i]] = (os.path.join(root, f))


# determine images to include in training
training_files = [[fname_dict[i], lname_dict[i]] for i in train_data['name']]

print(training_files)


# determine images to include in test
test_files = [[fname_dict[i], lname_dict[i]] for i in test_data['name']]

print(test_files)


In [None]:
# load first image
# extract VNIR component
vnir = get_PRISMA_vnir(training_files[0][0])
labels = np.loadtxt(training_files[0][1], dtype=np.uint8)
vnir = vnir.reshape((-1, 60))
labels = labels.flatten()

# get a random selection of the pixels
random_selection = np.arange(len(vnir))
np.random.shuffle(random_selection)

# initialize SVM on VNIR
svm2train = linmod.SGDClassifier(max_iter=10000, tol=1e-3, eta0=0.1, learning_rate='adaptive')
svm2train.fit(vnir[random_selection[:10000]], labels[random_selection[:10000]])

# do prediction on the scene
prediction = svm2train.predict(vnir)

# plot the prediction
fig, ax = plt.subplots(1,2)
ax[0].imshow(labels.reshape((1000,1000)))
ax[1].imshow(prediction.reshape((1000,1000)))

#print accuracy
scores = []
for i in np.arange(6):
    m = labels==i
    scores.append((labels[m]==prediction[m]).sum()/m.sum())

# save score in output directory
with open(os.path.join(output_dir, "svm2train_scores.json"), "w") as f:
    json.dump(scores, f)
print(scores)