# Introduction to PyTorch
### DSC 399: Advanced Applications and Interpretability of Neural Networks 

## Imports and Data Set Up

In [43]:
#############
## IMPORTS ##
#############

# For Data Set Loading and Preprocessing
# Data set for the feedforward neural networks
from sklearn.datasets import load_breast_cancer
# Train test split function from sklearn
from sklearn.model_selection import train_test_split
# Standard scaler to normalize from sklearn
from sklearn.preprocessing import StandardScaler
# Data sets for CNNs and RNNs from Tensorflow
from tensorflow.keras.datasets import mnist, imdb
# Pad sequences from Keras for NLP data formatting
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tensorflow/Keras Imports
# Import base tensorflow
import tensorflow as tf
# Import the sequential model to create neural networks
from tensorflow.keras.models import Sequential
# Import different layer types needed from Keras
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, Embedding

# PyTorch Imports
# Import base PyTorch
import torch
# Import the neural network capabilities
import torch.nn as nn
# Import the optimizers
import torch.optim as optim

In [None]:
#################################
## IMPORT AND FORMAT DATA SETS ##
#################################
# This data set, which predicts if a person does or does not have breast cancer,
# is from sklearn. We will import it, perform a train test split, and format the 
# data set to be used in a neural network before returning the X and y components
# of the training and test data set.
def load_breast_cancer_data(test_size=0.2):
    # Load the breast cancer dataset from sklearn
    X,y = load_breast_cancer(return_X_y=True)

    # Split the dataset into training and testing sets
    # Using the test size specified in the arguments (default of 20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # Standardize/Normalize the features
    # Neural networks perform better with normalized features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Return the data
    return X_train, X_test, y_train, y_test

# This data set, containing images of hand written digits and the number
# being shown, will be used for convolutional neural networks
def load_mnist_data():
    # Load the MNIST dataset from tensorflow. It is already split into training
    # and test data
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Normalize the images to the range [0, 1] (note that for grayscale
    # images pixels range from 0 to 255).
    X_train = X_train / 255.0
    X_test = X_test / 255.0

    # Return the needed data
    return X_train, X_test, y_train, y_test

# This daat set, which contains the text from movie reviews and rather the review
# is positive or negative, will be used for the recurrent neural networks to perform
# natural language processing. Note that the data set is already tokenized. The 
# arguments are the number of unique words allowed across all review and the number of 
# words allowed in each review.
def load_imdb_data(num_words=10000, maxlen=500):
    # Load the IMDB dataset from tensorflow
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

    # Pad sequences to ensure uniform input size (i.e. all reviews have exactly
    # maxlen words in them). Adds zeros if the review is too short and truncates
    # if it is too long.
    X_train = pad_sequences(X_train, maxlen=maxlen)
    X_test = pad_sequences(X_test, maxlen=maxlen)

    # Return the needed data
    return X_train, X_test, y_train, y_test



## Feedforward Neural Networks

In [None]:
####################################################
## Feedforward Neural Network in Tensorflow/Keras ##
####################################################
# Create the model using Keras. It has an input layer of 30 neurons, a first hidden
# layer of 64 neurons with a relu activation function, a second hidden layer of 64
# neurons and a relu activation function, and an output layer with one neuron and
# a sigmoid activation function. This will perform the binary classification.
model = Sequential([
    Dense(64, activation='relu', input_dim=30),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  
])

# Compile the model using the Adam optimizer, the binary cross entropy loss function
# (for binary classification), and display the accuracy when training.
model.compile(optimizer="adam",loss='binary_crossentropy',metrics=['accuracy'])

# Show model summary
model.summary()

# Pull the training and test data from the previously defined function.
X_train, X_test, y_train, y_test = load_breast_cancer_data()

# Train the neural network using 20 epoch, no batch size, no validation split
# and no information printed each iteration/epoch
model.fit(X_train, y_train, epochs=20, batch_size=None, validation_split=None, 
          verbose=0)

# Test the model and print the test accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", test_acc*100, "%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Test Accuracy: 96.49122953414917 %


In [None]:
###########################################
## Feedforward Nerual Network in PyTorch ##
###########################################
# Neural networks in PyTorch are created with a class that inherits from the
# neural network functionality from PyTorch. There are two ways to create the class.
# The first is the most similar to Keras, but also has the least ability to 
# customize the network.
class FNN(nn.Module):
    # Define the initialization function of the class. It takes no arguments in
    # this case, but in general it can have as many arguments as needed.
    def __init__(self):
        # Initialize everything that is inherited from the parent class
        super().__init__()
        # Create a sequential model, this behaves like the Sequential model from
        # Keras. We name it self.net so that the variable belongs to the class
        # and not the function.
        self.net = nn.Sequential(
            # This is the first hidden layer, the numbers define the size of
            # the weights matrix. 30 is the number of values going into the layer,
            # in this case the number of input neurons. 64 is the number of neurons
            # in this layer. Linear is a simple feedfoward neural network layer.
            nn.Linear(30, 64),
            # Add a relu activation function to the output of the first hidden layer.
            nn.ReLU(),
            # Second hidden layer gets 64 inputs from the first hidden layer and has
            # 64 neurons. It also has a relu activation function.
            nn.Linear(64, 64),
            nn.ReLU(),
            # The output layer receives 64 inputs from the second hidden layer and
            # needs one neuron to make a binary classification with the sigmoid
            # loss function.
            nn.Linear(64, 1),
            nn.Sigmoid()   
        )
    # This creates the forward pass of the neural network, which runs data through
    # the sequential model in the order it is defined.
    def forward(self, x):
        return self.net(x)
    
# The below class makes the same neural network as the above class, but without
# using Sequential. This means that the user has more control over how data flows
# through the model, but it is more involved.
class FNN_V2(nn.Module):
    def __init__(self):
        super().__init__()
        # Create the layers and activiation functions that will be used to construct
        # the network, but none of them are connected at this point.
        self.hidden_layer1 = nn.Linear(30, 64)
        self.hidden_layer2 = nn.Linear(64, 64)
        self.output = nn.Linear(64, 1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # The forward pass will define how data moves through the network. First into
        # the first hidden layer, with a relu activation function, then into the second
        # hidden layer with a relu activation function, and finally into the output layer
        # with a sigmoid activation function.
        x = self.relu(self.hidden_layer1(x))
        x = self.relu(self.hidden_layer2(x))
        x = self.sigmoid(self.output(x))
        return x

# Get the training and test data from the function defined above.
X_train, X_test, y_train, y_test = load_breast_cancer_data()

# Unlike with Keras, we cannot use Numpy arrays for PyTorch. Before we pass
# the data to the network we need to convert it ot a torch tensor.
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create an instance of one of the classes
model = FNN()

# Define the loss function (binary cross entropy) and the optimizer (Adam)
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# This is the training process. We will have 20 epochs and will train the model
# one per epoch, with no batch size or validation data set.
epochs = 20
for epoch in range(epochs):
    # Train the model
    model.train()
    # Clear the old gradients so they do not affect the current pass
    optimizer.zero_grad()
    # Get the outputs of the model.
    outputs = model(X_train)
    # Compute values of the loss function and then do the backpropagation
    loss = loss_func(outputs, y_train)
    loss.backward()
    # Update the parameters of the model according to the backpropagation
    optimizer.step()

# This will allow us to test the trained model.
model.eval()
# Use no_grad to not track gradients and make the computation faster
with torch.no_grad():
    # Pass the test data to the model and get the probabilities
    probs = model(X_test)
    # If the probability is greater than or equal than 0.5 then predict a 1,
    # if lower then predict a 0.
    preds = (probs >= 0.5).float()
    # Comput the accuracy
    accuracy = (preds == y_test).sum() / y_test.size(0)
# Print the accuracy as a percent.
print("Test Accuracy:", test_acc*100, "%")

In [50]:
######################################################
## Convolutional Neural Network in Tensorflow/Keras ##
######################################################
# Create a convolutional neural network (CNN) using keras to predict the values 
# shown in the images of the MNIST data set.
model = Sequential([
    # Start with a convolutional layer with 32 filters, a 3x3 kernel size, and a 
    # relu activation function which receives images from the input layer that 
    # are 28x28 and are grayscale (1 value per pixel). The convolutional layer is 
    # followed by a max pooling layer with a pool size of 2x2.
    Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28,1)),
    MaxPooling2D(pool_size=(2, 2)),

    # This is followed by another set of convolutional and pooling layers, but in 
    # this case the convolutional layer as 64 filters.
    Conv2D(64, kernel_size=(3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),

    # Flatten the data as it is currently two dimensional but needs to be one 
    # dimensional to be processed by the dense layers.
    Flatten(),
    # Add a post-processing dense layer with 128 neurons and a relu activation
    # function. Then an output layer with 10 neurons (10 classes) and a softmax
    # activation function.
    Dense(128, activation="relu"),
    Dense(10, activation="softmax")
])

# Compile the model with the Adam optimizer, the sparse categorical cross entropy
# loss function, and show the accuracy during training.
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",
    metrics=["accuracy"])

# Print a summary of the model.
model.summary()

# Retrieve the training and test data from the previously defined function
X_train, X_test, y_train, y_test = load_mnist_data()

# Truncate the size of the training data to reduce the number of images and thus
# the training time
X_train = X_train[:10000]
y_train = y_train[:10000]

# Fit the neural network with the training data, 5 epochs, no batch size, and no 
# validation split.
model.fit(X_train, y_train, epochs=5, batch_size=None, validation_split=None, verbose=0)

# Create predictions using the test data and print the test accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", test_acc*100, "%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Test Accuracy: 97.71999716758728 %


In [None]:
#############################################
## Convolutional Neural Network in PyTorch ##
#############################################
# Create a class to recreate the above Keras CNN using PyTorch syntax.
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Flatten(),
            # 64*5*5 is how big the images are at the end as they are now
            # three dimensional tensors. This can be figured out with 
            # mathematics or by printing a summary of the model with just the
            # CNN layers.
            nn.Linear(64 * 5 * 5, 128),
            nn.ReLU(),
            nn.Linear(128, 10)   # logits
        )

    def forward(self, x):
        return self.model(x)
    
# Load data
X_train, X_test, y_train, y_test = load_mnist_data()

X_train = X_train[:10000]
y_train = y_train[:10000]

# Convert to PyTorch tensors and add channel dimension
X_train = torch.tensor(X_train).unsqueeze(1)  # (N, 1, 28, 28)
X_test  = torch.tensor(X_test).unsqueeze(1)

y_train = torch.tensor(y_train, dtype=torch.long)
y_test  = torch.tensor(y_test, dtype=torch.long)

model = CNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

epochs = 5
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

model.eval()
with torch.no_grad():
    outputs = model(X_test)
    preds = torch.argmax(outputs, dim=1)
    accuracy = (preds == y_test).float().mean()

print("Test Accuracy:", accuracy*100, "%")

Test accuracy: 0.691


In [34]:
##################################################
## Recurrent Neural Network in Tensorflow/Keras ##
##################################################

num_words = 1000
maxlen = 100
X_train, X_test, y_train, y_test = load_imdb_data(num_words, maxlen)

model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=maxlen),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()



In [None]:
model.fit(X_train, y_train,epochs=5,batch_size=None,validation_split=None,verbose=0)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.7084 - loss: 0.5480
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.8271 - loss: 0.3825
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.8530 - loss: 0.3386
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.8679 - loss: 0.3050
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.8775 - loss: 0.2938


<keras.src.callbacks.history.History at 0x3c6a2cfd0>

In [36]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.3f}")

Test accuracy: 0.841


In [37]:
#########################################
## Recurrent Neural Network in PyTorch ##
#########################################
class IMDBRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=64):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)           # (batch, seq_len, embed_dim)
        _, (h_n, _) = self.lstm(x)      # final hidden state
        x = h_n[-1]                     # (batch, hidden_dim)
        x = self.sigmoid(self.fc(x))    # probability
        return x


In [38]:
# Load data
num_words = 1000
maxlen = 100
X_train, X_test, y_train, y_test = load_imdb_data(num_words, maxlen)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
X_test  = torch.tensor(X_test, dtype=torch.long)

y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test  = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [39]:
model = IMDBRNN(num_words)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 5
for epoch in range(epochs):
    model.train()

    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [1/5], Loss: 0.6940
Epoch [2/5], Loss: 0.6920
Epoch [3/5], Loss: 0.6901
Epoch [4/5], Loss: 0.6883
Epoch [5/5], Loss: 0.6865


In [40]:
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    preds = (outputs >= 0.5).float()
    accuracy = (preds == y_test).float().mean()

print(f"Test accuracy: {accuracy.item():.3f}")

Test accuracy: 0.558
