In [1]:
# Import modules
import torch
import torch.nn as nn
from torch.nn import functional
from torch.nn import Module
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# package for progress bars
from tqdm import tqdm
from mushroom_dataloader import enumerated_data, numerize_data, one_hot, Dataset

In [2]:
#### Where to run your model ####
# if you have a gpu you would like to run your model on the gpu for shorter runtime:
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cpu")

In [3]:
#################
### LOAD DATA ###
#################

In [4]:
PATH = './mushroom_data/agaricus-lepiota.data'

In [5]:
with open(PATH, 'r') as f:
    data = [el.strip().split(',') for el in f.readlines()]
input_data = [el[1:] for el in data] # list1(list2); list2 consists of 23 features for each mushroom 
output_data = [el[0] for el in data] # list(str); contains the output values [e, p]

In [6]:
# Split data
train_instance_number = int(len(input_data) * 0.7)

train_input = input_data[:train_instance_number]
train_output = output_data[:train_instance_number]

test_input = input_data[train_instance_number:]
test_output = output_data[train_instance_number:]

In [7]:
# Adapt data to Torch requirements (make DataLoader) #

train_inp = numerize_data(train_input)
train_out = one_hot(train_output)
train_dataset = Dataset(enumerated_data(train_inp, train_out))
train_dataloader = DataLoader(train_dataset)

test_inp = numerize_data(test_input)
test_out = one_hot(test_output)
test_dataset = Dataset(enumerated_data(test_inp, test_out))
test_dataloader = DataLoader(test_dataset)

In [8]:
# for el in dataloader:
#     print(el)
#     break

In [9]:
######################
# Define Model Class #
######################

In [10]:
class MushroomClassifier(nn.Module):        # inherit from PyTorch model class (important!)
    """
    A classifier for the mushroom dataset
    Predicts for a tensor describing the attributes of a mushroom whether the mushroom is edible or poisonous
    A 2 layer Feedforward Network with ReLU activation funct
    """

    ## init function
    # needs to get all parameters that your model should have 
    def __init__(self,
                input_size: int,
                number_classes: int,
                hidden_size: int):
        """

        :param input_size: the size of the input layer; needs to match the length of an input tensor
        :param number_classes: the number of different classes (outputs); will be the output dimension
        :param hidden_size: the size of the hidden layer
        """
        super(MushroomClassifier, self).__init__()          # important!
        
        # define each of the layers of your model: type of layer, dimensions
        self.layer1 = nn.Linear(input_size, hidden_size)      # [22, 128]
        self.layer2 = nn.Linear(hidden_size, number_classes)  # [128, 2]



        #Alternative: adding a third layer of a smaller size
        #self.layer2 = nn.Linear(hidden_size, hidden_size_2) # [128, 54]
        #self.layer3 = nn.Linear(hidden_size_2, number_classes) # [54, 2]


    ## forward function
    def forward(self, 
                input_batch):
        """

        :param input_batch: a batch of tensors corresponding to the data; shape: [batch_size, input_length]
        :return: the predictions of the model; shape: [batch_size, number_classes]
        """
        # feed input batch into the first layer
        out_first_layer = self.layer1(input_batch)

        # apply (sigmoid) activation function
        out_first_activation = nn.functional.sigmoid(out_first_layer)

        # feed output of activation function to the second layer
        out_sec_layer = self.layer2(out_first_activation)

        return out_sec_layer
        
        #         FOR ALTERNATIVE:
        #  Applying the activation function to second layer output; passing it to the third layer; returning the output of the third layer
        #out_sec_activation = nn.functional.relu(out_sec_layer)
        #out_third_layer = self.layer3(out_sec_activation)
        
#        return out_sec_layer
        #return out_third_layer

In [11]:
####################
# Initialize Model #
####################

# create an instance of the model you would like to train

HIDDEN_SIZE = 128
input_size = len(train_input[0])
num_classes = 2

classifier = MushroomClassifier(input_size=input_size, number_classes=num_classes,
                                hidden_size=HIDDEN_SIZE)

# move model to device (default - CPU)
classifier.to(device)

MushroomClassifier(
  (layer1): Linear(in_features=22, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=2, bias=True)
)

In [12]:
#############################
# Set Hyperparameter Values #
#############################

In [13]:
### Training Parameters ###

# number of epochs
NUM_EPOCHS = 5

# the learning rate
LEARNING_RATE = 0.001

# the optimizer to use
OPTIMIZER = optim.SGD

# loss function 
LOSS_FUNCTION = nn.functional.mse_loss

# number of instances per batch
BATCH_SIZE = 1

In [14]:
#################
# Training loop #
#################

In [19]:
def train_model(model: Module, 
                train_data: DataLoader,
                num_epochs: int,
                optimizer_type,
                loss_function,
                learning_rate: float) -> None:
    """

    :param model: a pytorch model
    :param train_data: a dataloader for getting the training instances
    :param num_epochs: the number of epochs to train
    :param optimizer_type: the type of optimizer to use for training
    :param loss_function: the type of loss function to use
    :param learning_rate: the learning rate for the optimizer
    :return:
    """
    
    print(f'--------- Start Training ------------')

    # TODO: remove
    batch_losses = []

    # Important: bring model into training mode
    model.train()

    optimizer = optimizer_type(params=model.parameters(), lr=learning_rate)

    # run training for specified number of epochs; use tqdm to keep track of progress / estimated run time 
    for epoch in tqdm(range(num_epochs), desc='Classifier Training\n'):
        
        print(f'---------- Started Epoch {epoch} -----------')

        for batch in train_data:

            # get the input instances (and move them to the device you use)
            input_attributes = batch[0].to(device)
            # get the corresponding labels
            gold_labels = batch[1].to(device)


            # compute model predictions with current model parameters
            model_output = model(input_attributes)

            # Compute Loss for current batch
            loss = loss_function(model_output, gold_labels)

            #print(f'Training Loss: {loss} \n')
            #print(type(loss))

            #TODO: remove
            batch_losses.append(float(loss))

            #Important: otherwise you add up your gradients for all batches
            optimizer.zero_grad()

            loss.backward()

            # Update parameters
            optimizer.step()

         # report mean loss of epoch
        average_loss = sum(batch_losses) / len(batch_losses)
        print(f'\nAverage loss on training data: {average_loss}')


        # the training loop function does not return anything because the model object gets changed itself
        

In [20]:
###################
# Train the model #
###################

# run a complete training loop
train_model(model=classifier, train_data=train_dataloader, num_epochs=NUM_EPOCHS,
            optimizer_type=OPTIMIZER, loss_function=LOSS_FUNCTION, learning_rate=LEARNING_RATE)

# now the model object you defined above in the initialization cell is trained

--------- Start Training ------------


Classifier Training
:   0%|                               | 0/5 [00:00<?, ?it/s]

---------- Started Epoch 0 -----------


Classifier Training
:  20%|████▌                  | 1/5 [00:03<00:13,  3.27s/it]


Average loss on training data: 0.03779443117974892
---------- Started Epoch 1 -----------


Classifier Training
:  40%|█████████▏             | 2/5 [00:06<00:09,  3.14s/it]


Average loss on training data: 0.03650831367259234
---------- Started Epoch 2 -----------


Classifier Training
:  60%|█████████████▊         | 3/5 [00:09<00:06,  3.17s/it]


Average loss on training data: 0.035346548067031254
---------- Started Epoch 3 -----------


Classifier Training
:  80%|██████████████████▍    | 4/5 [00:12<00:03,  3.21s/it]


Average loss on training data: 0.03430127108056262
---------- Started Epoch 4 -----------


Classifier Training
Classifier Training███████████| 5/5 [00:15<00:00,  3.18s/it]
: 100%|███████████████████████| 5/5 [00:15<00:00,  3.19s/it]


Average loss on training data: 0.03336245147667138





In [None]:
##############
# Evaluation #
##############

In [15]:
# bring model in evaluation mode
classifier.eval()

# important: otherwise you will compute gradients while running the model on your test data
with torch.no_grad():
    for batch in test_dataloader:
        test_instance = batch[0]
        test_target = batch[1]

        # run model on test instances
        # compute evaluation metrics

# bring back into train mode again
classifier.train()

MushroomClassifier(
  (layer1): Linear(in_features=22, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=2, bias=True)
)

In [None]:
##################################
# Save and Load Model Parameters #
##################################

In [17]:
# save the model parameters in a .pt file
torch.save(classifier.state_dict(), "./results/model_parameters_mushroom.pt")

# load trained model parameters again

# first create an instance of the model class
trained_classifier = MushroomClassifier(input_size=input_size, number_classes=num_classes, hidden_size=HIDDEN_SIZE)

# then load the trained parameters
trained_classifier.load_state_dict(torch.load("./results/model_parameters_mushroom.pt"))

<All keys matched successfully>