All code used to run the analysis is located in the online repository here:
https://github.com/jonathanventura/canopy
Files needed:Hyperspectral Imagery: data/NEON_D17_TEAK_DP1_20170627_181333_reflectance.tif


In [1]:
import numpy as np
import sklearn
import torch



In [2]:
#  Not needed as I have the actual file locally because this code wasn't working for me :/
# if not os.path.exists('tree_species_classifier_data.npz'):
#   !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

In [3]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']

Step 1: Inspect the Data

In [4]:
# Step 2: Preprocess the data with PCA

# Fit the PCA model using 32 components and whiten = True from the instructions
pca = sklearn.decomposition.PCA(n_components=32, whiten=True)
pca_train_features = pca.fit_transform(train_features)
pca_test_features = pca.transform(test_features)


In [5]:
# Step 3: Classifiers using scikit-learn

linear_model = torch.nn.Sequential(
    torch.nn.Linear(32, len(np.unique(train_labels))), # 32 inputs, number of train_label outputs
)
# Like in Lab 3.1, use Multi-Layer Perceptron (MLP) to implement all 100 hidden layers. 
mlp_model = torch.nn.Sequential(
    torch.nn.Linear(32, 100), # 32 inputs, 1 hidden layer of size 100
    
    # hidden activation function, the magic happens
    torch.nn.ReLU(),
    
    # 100 inputs, number of train_label outputs
    torch.nn.Linear(100, len(np.unique(train_labels))) 
)


In [6]:
# Create a cross-entropy loss function and a stochastic gradient descent (SGD) optimizer
loss_fn = torch.nn.CrossEntropyLoss()
lr = 1e-2
opt = torch.optim.SGD(mlp_model.parameters(), lr=lr)

In [7]:

# Calculate accuracy on the test set for each classifier

def accuracy(model, X, y):
    
    # Set model to evaluation mode 
    model.eval()

    # Convert the test_features to a torch tensor
    test_features_tensor = torch.tensor(X, dtype=torch.float32)
    test_labels_tensor = torch.tensor(y, dtype=torch.long)

    # Disable gradient inside the "with"
    with torch.no_grad():
        outputs = model(test_features_tensor)
    
    # First dimension of outputs are the samples (dim = 0)
    # Second dimension of outputs are the labels (dim = 1)
    # Get the highest predicted labels value for each sample in the 
    sample, predicted_labels = torch.max(outputs, dim=1)


    # Calculate the accuracy (the number of correct predictions divided by total number of samples)
    correct = (predicted_labels == test_labels_tensor).sum().item()

    # size(0) refers to first dimension, which are the samples (dim = 0)
    total = test_labels_tensor.size(0) 

    return correct/total



In [8]:
# Calculate accuracy on the test set for each classifier

# From Lab 1.2 Binary Linear Classifier
logreg  = sklearn.linear_model.LogisticRegression()
logreg.fit(pca_test_features, test_labels)
logreg.score(pca_test_features, test_labels)



0.9163449163449163

In [9]:
# Calculate accuracy on the test set for each classifier

mlpnn = sklearn.neural_network.MLPClassifier(max_iter=100)
mlpnn.fit(pca_test_features, test_labels)
mlpnn.score(pca_test_features, test_labels)





0.9916344916344917

In [10]:
# Step 4: Classifiers using PyTorch

# a. Create TensorDataset and DataLoader to train and test splits
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(torch.tensor(pca_train_features, dtype=torch.float32),
                               torch.tensor(train_labels, dtype=torch.long))
test_dataset = torch.utils.data.TensorDataset(torch.tensor(pca_test_features, dtype=torch.float32),
                              torch.tensor(test_labels, dtype=torch.long))


train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
# b. 