All code used to run the analysis is located in the online repository here:
https://github.com/jonathanventura/canopy
Files needed:Hyperspectral Imagery: data/NEON_D17_TEAK_DP1_20170627_181333_reflectance.tif


In [20]:
import numpy as np
import sklearn
import torch



In [21]:
#  Not needed as I have the actual file locally because this code wasn't working for me :/
# if not os.path.exists('tree_species_classifier_data.npz'):
#   !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

In [22]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']

Step 1: Inspect the Data

In [23]:
# Step 2: Preprocess the data with PCA
train_labels = torch.tensor(train_labels).long()
test_labels = torch.tensor(test_labels).long()

# Fit the PCA model using 32 components and whiten = True from the instructions
pca = sklearn.decomposition.PCA(n_components=32, whiten=True)
pca_train_features = torch.tensor(pca.fit_transform(train_features)).float()
pca_test_features = torch.tensor(pca.transform(test_features)).float()



In [24]:
print(f"pca_train_features shape: {pca_train_features.shape}")
print(f"train_labels shape: {train_labels.shape}")


pca_train_features shape: torch.Size([15707, 32])
train_labels shape: torch.Size([15707])


In [25]:
# Step 3: Classifiers using scikit-learn

linear_model = torch.nn.Sequential(
    torch.nn.Linear(32, len(np.unique(train_labels))), # 32 inputs, number of train_label outputs
)
# Like in Lab 3.1, use Multi-Layer Perceptron (MLP) to implement all 100 hidden layers. 
mlp_model = torch.nn.Sequential(
    torch.nn.Linear(32, 100), # 32 inputs, 1 hidden layer of size 100
    
    # hidden activation function, the magic happens
    torch.nn.ReLU(),
    
    # 100 inputs, number of train_label outputs
    torch.nn.Linear(100, len(np.unique(train_labels))) 
)


In [26]:
# Create a cross-entropy loss function and a stochastic gradient descent (SGD) optimizer
loss_function = torch.nn.CrossEntropyLoss()
lr = 1e-2
linear_opt = torch.optim.SGD(linear_model.parameters(), lr=lr, weight_decay=0.001)
mlp_opt = torch.optim.SGD(mlp_model.parameters(), lr=lr, weight_decay=0.001)


In [34]:

"""
Classifiers using scikit-learn. Make a linear classifier and a neural network (NN)
classifier using scikit-learn and calculate accuracy on the test set for each
classifier. The N should have three layers and a hidden layer size of 100.
"""
def accuracy(model, X, y):
    
    # Set model to evaluation mode 
    model.eval()
    with torch.no_grad():
        z = model(X)
        
        # First dimension of outputs are the samples (dim = 0)
        # Second dimension of outputs are the labels (dim = 1)
        # Get the highest predicted labels value for each sample in the 
        sample, predicted_labels = torch.max(z, dim=1)


        # Calculate the accuracy (the number of correct predictions divided by total number of samples)
        correct = (predicted_labels == y).sum().item()

        # size(0) refers to first dimension, which are the samples (dim = 0)
        total = y.size(0)

        return correct/total

print(f"Linear Classifier Accuracy {accuracy(linear_model, pca_test_features, test_labels)}")
print(f"MLP Classifier Accuracy {accuracy(mlp_model, pca_test_features, test_labels)}")

Linear Classifier Accuracy 0.8275418275418276
MLP Classifier Accuracy 0.859073359073359


In [28]:
# Step 4: Classifiers using PyTorch

# a. Create TensorDataset and DataLoader to train and test splits
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(pca_train_features, train_labels)
test_dataset = torch.utils.data.TensorDataset(pca_test_features, test_labels)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [29]:
# b. calculate model accuracy with data loader
def accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_X, batch_y in dataloader:
            z = model(batch_X)
            sample, predicted_labels = torch.max(z, dim=1)
            correct += (predicted_labels == batch_y).sum().item()
            total += batch_y.size(0)

    return correct/total


In [30]:
# c. Function to train model

def trainModel(model, train_loader, opt, loss_fn):
    epochs = 100
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_X, batch_y in train_loader:
            opt.zero_grad()  # Zero out gradients

            z = model(batch_X)  # Forward pass
            loss = loss_fn(z, batch_y)  # Compute loss

            loss.backward()  # Backpropagation
            opt.step()  # Apply gradients

            total_loss += loss.item()
        train_accuracy = accuracy(model, train_loader)
        test_accuracy = accuracy(model, test_loader)
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, "
              f"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")


In [31]:
trainModel(linear_model, train_loader, linear_opt, loss_function)
print(f"Linear Train Accuracy {accuracy(linear_model, train_loader)}")
print(f"Linear Test Accuracy {accuracy(linear_model, test_loader)}")

Epoch 1/100, Loss: 1.4570, Train Accuracy: 0.7657, Test Accuracy: 0.7445
Epoch 2/100, Loss: 0.9524, Train Accuracy: 0.7966, Test Accuracy: 0.7857
Epoch 3/100, Loss: 0.8082, Train Accuracy: 0.8088, Test Accuracy: 0.7941
Epoch 4/100, Loss: 0.7415, Train Accuracy: 0.8180, Test Accuracy: 0.7992
Epoch 5/100, Loss: 0.7026, Train Accuracy: 0.8230, Test Accuracy: 0.8063
Epoch 6/100, Loss: 0.6769, Train Accuracy: 0.8266, Test Accuracy: 0.8076
Epoch 7/100, Loss: 0.6583, Train Accuracy: 0.8297, Test Accuracy: 0.8082
Epoch 8/100, Loss: 0.6444, Train Accuracy: 0.8319, Test Accuracy: 0.8121
Epoch 9/100, Loss: 0.6336, Train Accuracy: 0.8331, Test Accuracy: 0.8115
Epoch 10/100, Loss: 0.6248, Train Accuracy: 0.8343, Test Accuracy: 0.8140
Epoch 11/100, Loss: 0.6174, Train Accuracy: 0.8354, Test Accuracy: 0.8172
Epoch 12/100, Loss: 0.6114, Train Accuracy: 0.8363, Test Accuracy: 0.8179
Epoch 13/100, Loss: 0.6062, Train Accuracy: 0.8369, Test Accuracy: 0.8160
Epoch 14/100, Loss: 0.6016, Train Accuracy: 0.8

In [32]:
trainModel(mlp_model, train_loader, mlp_opt, loss_function)
print(f"MLP Train Accuracy {accuracy(mlp_model, train_loader)}")
print(f"MLP Test Accuracy {accuracy(mlp_model, test_loader)}")


Epoch 1/100, Loss: 1.6208, Train Accuracy: 0.6871, Test Accuracy: 0.6763
Epoch 2/100, Loss: 1.0309, Train Accuracy: 0.7730, Test Accuracy: 0.7748
Epoch 3/100, Loss: 0.7744, Train Accuracy: 0.8030, Test Accuracy: 0.8037
Epoch 4/100, Loss: 0.6660, Train Accuracy: 0.8222, Test Accuracy: 0.8089
Epoch 5/100, Loss: 0.6077, Train Accuracy: 0.8320, Test Accuracy: 0.8179
Epoch 6/100, Loss: 0.5707, Train Accuracy: 0.8401, Test Accuracy: 0.8192
Epoch 7/100, Loss: 0.5447, Train Accuracy: 0.8474, Test Accuracy: 0.8256
Epoch 8/100, Loss: 0.5252, Train Accuracy: 0.8506, Test Accuracy: 0.8295
Epoch 9/100, Loss: 0.5095, Train Accuracy: 0.8541, Test Accuracy: 0.8333
Epoch 10/100, Loss: 0.4962, Train Accuracy: 0.8576, Test Accuracy: 0.8346
Epoch 11/100, Loss: 0.4849, Train Accuracy: 0.8600, Test Accuracy: 0.8378
Epoch 12/100, Loss: 0.4748, Train Accuracy: 0.8621, Test Accuracy: 0.8385
Epoch 13/100, Loss: 0.4658, Train Accuracy: 0.8639, Test Accuracy: 0.8398
Epoch 14/100, Loss: 0.4576, Train Accuracy: 0.8