In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.ensemble import RandomForestClassifier

In [None]:
#!pip install ucimlrepo
from ucimlrepo import fetch_ucirepo

# fetch dataset
covertype = fetch_ucirepo(id=31)

# data (as pandas dataframes)
X = covertype.data.features
y = covertype.data.targets

df_combined = X.copy()
df_combined['Cover_Type'] = y['Cover_Type']

# metadata
print(covertype.metadata)

# variable information
print(covertype.variables)

# Check-in Week 8/9/10

Here we will use a neural network for multiclass classification. The response variable here is the forest cover type

For this dataset, we will use a normal deep neural network



In [None]:
import torch
import torch.nn as nn

# Check for GPU availability and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Extremely simple model architecture
model = nn.Sequential()

# Input layer
model.add_module("Input", nn.Linear(in_features = X.shape[1], out_features = 1024))

# Hidden layers
model.add_module("Hidden Layer 1", nn.Linear(in_features=1024, out_features=1024))
model.add_module("Activation 1", nn.Sigmoid())
model.add_module("Hidden Layer 2", nn.Linear(in_features=1024, out_features=1024))
model.add_module("Activation 2", nn.Sigmoid())

# Output layer
model.add_module("Output", nn.Linear(in_features=1024, out_features = len(np.unique(y))))

# Move model to GPU if available
model = model.to(device)


In [None]:
batch_size = 100
from torch.utils.data import DataLoader, TensorDataset

# Shift the y labels to start from 0 (if this doesn't happen, PyTorch can't train)
y = y - 1

# Convert Trees Pandas DataFrame to Torch Tensors
X_tensor = torch.tensor(X.to_numpy(), dtype=torch.float32)
y_tensor = torch.tensor(y.to_numpy().flatten(), dtype=torch.long)

# Split dataset into training, validation and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_tensor, y_tensor, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

# Convert to DataLoader for batching
train_dl = DataLoader(train_dataset, batch_size =batch_size, shuffle=True)
val_dl = DataLoader(val_dataset, batch_size = batch_size, shuffle=False)
test_dl = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [None]:
num_epochs = 30

# We will use the cross entropy loss function for multiclass
loss_function = nn.CrossEntropyLoss()\

# Stochastic gradient descent with momentum as optimizer with lr and momentum from hyperparameter tuning
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Our training function (just the same thing from the MNIST notebook)
def train(model, num_epochs, train_dl, valid_dl, device, loss_function, optimizer):
    loss_hist_train = [0] * num_epochs
    accuracy_hist_train = [0] * num_epochs
    loss_hist_valid = [0] * num_epochs
    accuracy_hist_valid = [0] * num_epochs

    # Form
    for epoch in range(num_epochs):
        model.train()
        for x_batch, y_batch in train_dl:
            # Move batches to GPU if available
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            pred = model(x_batch)
            loss = loss_function(pred, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_hist_train[epoch] += loss.item()*y_batch.size(0)
            is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
            accuracy_hist_train[epoch] += is_correct.sum().cpu()

        loss_hist_train[epoch] /= len(train_dl.dataset)
        accuracy_hist_train[epoch] /= len(train_dl.dataset)

        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in valid_dl:
                # Move batches to GPU if available
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                pred = model(x_batch)
                loss = loss_function(pred, y_batch)
                loss_hist_valid[epoch] += loss.item() * y_batch.size(0)
                is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
                accuracy_hist_valid[epoch] += is_correct.sum().cpu()

        loss_hist_valid[epoch] /= len(valid_dl.dataset)
        accuracy_hist_valid[epoch] /= len(valid_dl.dataset)

        print(f'Epoch {epoch+1} accuracy: {accuracy_hist_train[epoch]:.4f} val_accuracy: {accuracy_hist_valid[epoch]:.4f}')
    return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid

torch.manual_seed(1)
hist_train_validation = train(model, num_epochs, train_dl, val_dl, device, loss_function, optimizer)

Epoch 1 accuracy: 0.4804 val_accuracy: 0.4882
Epoch 2 accuracy: 0.4938 val_accuracy: 0.4907
Epoch 3 accuracy: 0.5010 val_accuracy: 0.5131
Epoch 4 accuracy: 0.4982 val_accuracy: 0.4888
Epoch 5 accuracy: 0.4937 val_accuracy: 0.4833
Epoch 6 accuracy: 0.4935 val_accuracy: 0.4811
Epoch 7 accuracy: 0.5012 val_accuracy: 0.5013
Epoch 8 accuracy: 0.5012 val_accuracy: 0.4939
Epoch 9 accuracy: 0.5025 val_accuracy: 0.5011
Epoch 10 accuracy: 0.4930 val_accuracy: 0.4879
Epoch 11 accuracy: 0.5004 val_accuracy: 0.5083
Epoch 12 accuracy: 0.5017 val_accuracy: 0.4958
Epoch 13 accuracy: 0.4927 val_accuracy: 0.4964
Epoch 14 accuracy: 0.4962 val_accuracy: 0.4965


For our neural network training, we used crossentropyloss as the standard loss function as it combines logsoftmax to convert our logits into probabilities and measures how well the predicted probabilities match the true class labels.

Since we have 7 forest cover types, using CrossEntropyLoss penalizes confident wrong predictions more heavily than uncertain ones and we can see that the loss decreases as the epochs increase.

For our accuracy, we need it to detect overfitting by tracking it separately on our training and validation sets.

For our learning rate, we decided with learning rate of 0.01 being the best with fast convergence, stable training epochs, and a solid final validation accuracy.



For our hyperparameter tuning, we chose SGD from class with momentum to get convergence and was a solid choice for neural network training. For our batch size, we chose 100 as we wanted to have faster updates with generalization and 30 epochs to see model behavior after convergence which typically happens around 20 epochs without overfitting.

To complete everything, we split it into a training of 70, validation of 15, and test of 15 using PyTorch's default initialization. Our forward pass computes the predictions and CrossEntropyLoss calculates the loss and our backward pass computing the gradient with SGD as our optimization.