All code used to run the analysis is located in the online repository here:
https://github.com/jonathanventura/canopy
Files needed:Hyperspectral Imagery: data/NEON_D17_TEAK_DP1_20170627_181333_reflectance.tif


In [14]:
import numpy as np
import sklearn
import torch



In [15]:
#  Not needed as I have the actual file locally because this code wasn't working for me :/
# if not os.path.exists('tree_species_classifier_data.npz'):
#   !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

In [16]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']

Step 1: Inspect the Data

In [17]:
# Step 2: Preprocess the data with PCA
train_labels = torch.tensor(train_labels).long()
test_labels = torch.tensor(test_labels).long()

# Fit the PCA model using 32 components and whiten = True from the instructions
pca = sklearn.decomposition.PCA(n_components=32, whiten=True)
pca_train_features = torch.tensor(pca.fit_transform(train_features)).float()
pca_test_features = torch.tensor(pca.transform(test_features)).float()



In [18]:
print(f"pca_train_features shape: {pca_train_features.shape}")
print(f"train_labels shape: {train_labels.shape}")


pca_train_features shape: torch.Size([15707, 32])
train_labels shape: torch.Size([15707])


In [19]:
# Step 3: Classifiers using scikit-learn

linear_model = torch.nn.Sequential(
    torch.nn.Linear(32, len(np.unique(train_labels))), # 32 inputs, number of train_label outputs
)
# Like in Lab 3.1, use Multi-Layer Perceptron (MLP) to implement all 100 hidden layers. 
mlp_model = torch.nn.Sequential(
    torch.nn.Linear(32, 100), # 32 inputs, 1 hidden layer of size 100
    
    # hidden activation function, the magic happens
    torch.nn.ReLU(),
    
    # 100 inputs, number of train_label outputs
    torch.nn.Linear(100, len(np.unique(train_labels))) 
)


In [20]:
# Create a cross-entropy loss function and a stochastic gradient descent (SGD) optimizer
loss_function = torch.nn.CrossEntropyLoss()
lr = 1e-2
linear_opt = torch.optim.SGD(linear_model.parameters(), lr=lr, weight_decay=0.001)
mlp_opt = torch.optim.SGD(mlp_model.parameters(), lr=lr, weight_decay=0.001)


In [21]:
# Step 4: Classifiers using PyTorch

# a. Create TensorDataset and DataLoader to train and test splits
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(pca_train_features, train_labels)
test_dataset = torch.utils.data.TensorDataset(pca_test_features, test_labels)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [22]:
# b. calculate model accuracy with data loader
def accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_X, batch_y in dataloader:
            z = model(batch_X)
            sample, predicted_labels = torch.max(z, dim=1)
            correct += (predicted_labels == batch_y).sum().item()
            total += batch_y.size(0)

    return correct/total


In [23]:
# c. Function to train model

def trainModel(model, train_loader, opt, loss_fn):
    epochs = 100
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_X, batch_y in train_loader:
            opt.zero_grad()  # Zero out gradients

            z = model(batch_X)  # Forward pass
            loss = loss_fn(z, batch_y)  # Compute loss

            loss.backward()  # Backpropagation
            opt.step()  # Apply gradients

            total_loss += loss.item()
        train_accuracy = accuracy(model, train_loader)
        test_accuracy = accuracy(model, test_loader)
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, "
              f"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")


In [24]:
trainModel(linear_model, train_loader, linear_opt, loss_function)
print(f"Linear Train Accuracy {accuracy(linear_model, train_loader)}")
print(f"Linear Test Accuracy {accuracy(linear_model, test_loader)}")

Epoch 1/100, Loss: 1.4581, Train Accuracy: 0.7534, Test Accuracy: 0.7677
Epoch 2/100, Loss: 0.9584, Train Accuracy: 0.7901, Test Accuracy: 0.7928
Epoch 3/100, Loss: 0.8121, Train Accuracy: 0.8062, Test Accuracy: 0.7992
Epoch 4/100, Loss: 0.7443, Train Accuracy: 0.8152, Test Accuracy: 0.8044
Epoch 5/100, Loss: 0.7048, Train Accuracy: 0.8212, Test Accuracy: 0.8082
Epoch 6/100, Loss: 0.6784, Train Accuracy: 0.8256, Test Accuracy: 0.8108
Epoch 7/100, Loss: 0.6595, Train Accuracy: 0.8292, Test Accuracy: 0.8082
Epoch 8/100, Loss: 0.6454, Train Accuracy: 0.8311, Test Accuracy: 0.8127
Epoch 9/100, Loss: 0.6344, Train Accuracy: 0.8331, Test Accuracy: 0.8140
Epoch 10/100, Loss: 0.6253, Train Accuracy: 0.8345, Test Accuracy: 0.8153
Epoch 11/100, Loss: 0.6179, Train Accuracy: 0.8357, Test Accuracy: 0.8179
Epoch 12/100, Loss: 0.6117, Train Accuracy: 0.8360, Test Accuracy: 0.8192
Epoch 13/100, Loss: 0.6064, Train Accuracy: 0.8368, Test Accuracy: 0.8185
Epoch 14/100, Loss: 0.6018, Train Accuracy: 0.8

In [25]:
trainModel(mlp_model, train_loader, mlp_opt, loss_function)
print(f"MLP Train Accuracy {accuracy(mlp_model, train_loader)}")
print(f"MLP Test Accuracy {accuracy(mlp_model, test_loader)}")


Epoch 1/100, Loss: 1.6640, Train Accuracy: 0.6493, Test Accuracy: 0.6338
Epoch 2/100, Loss: 1.1015, Train Accuracy: 0.7689, Test Accuracy: 0.7844
Epoch 3/100, Loss: 0.8156, Train Accuracy: 0.7973, Test Accuracy: 0.8037
Epoch 4/100, Loss: 0.6903, Train Accuracy: 0.8162, Test Accuracy: 0.8102
Epoch 5/100, Loss: 0.6242, Train Accuracy: 0.8292, Test Accuracy: 0.8172
Epoch 6/100, Loss: 0.5827, Train Accuracy: 0.8366, Test Accuracy: 0.8179
Epoch 7/100, Loss: 0.5537, Train Accuracy: 0.8427, Test Accuracy: 0.8275
Epoch 8/100, Loss: 0.5321, Train Accuracy: 0.8481, Test Accuracy: 0.8353
Epoch 9/100, Loss: 0.5150, Train Accuracy: 0.8511, Test Accuracy: 0.8340
Epoch 10/100, Loss: 0.5013, Train Accuracy: 0.8539, Test Accuracy: 0.8333
Epoch 11/100, Loss: 0.4896, Train Accuracy: 0.8578, Test Accuracy: 0.8366
Epoch 12/100, Loss: 0.4792, Train Accuracy: 0.8592, Test Accuracy: 0.8411
Epoch 13/100, Loss: 0.4702, Train Accuracy: 0.8618, Test Accuracy: 0.8430
Epoch 14/100, Loss: 0.4618, Train Accuracy: 0.8