# Pytorch

In [1]:
import numpy as np 

# install Pytorch in Python
import torch
import torch.nn as nn

### Tensors 
Tensor data structure is almost exactly like numpy arrays. The only difference is tensors can also be loaded onto GPUs.

In [2]:
data = [[1, 2],[3, 4]]
np_array = np.array(data)
np_array

array([[1, 2],
       [3, 4]])

In [3]:
tensor = torch.from_numpy(np_array)
tensor

tensor([[1, 2],
        [3, 4]])

In [4]:
tensor.shape

torch.Size([2, 2])

In [5]:
# We can also create a tensor directly 
tensor2 = torch.tensor([1, 2, 3, 4])
tensor2

tensor([1, 2, 3, 4])

In [6]:
# Check which device it's on
tensor.device

device(type='cpu')

In [7]:
# Cast back into numpy array 
np_array_back = tensor.detach().numpy() #.detach() is necessary if its on a GPU
np_array_back

array([[1, 2],
       [3, 4]])

### Logistic regression with Pytorch

In HW3, you implemnted binary logistic regression using numpy, let's do it with Pytorch. 

In [8]:
NUM_CLASSES = 2 #let's do binary logistic regression again 

# Rows: Examples 
# Columns: Features 
X_train = torch.tensor([[1.0, 0.0, 0.0], 
                        [0.0, 1.0, 1.0], 
                        [1.0, 1.0, 0.0], 
                        [0.0, 0.0, 1.0]])

Y_train = torch.tensor([0, 1, 0, 1])

num_examples = X_train.shape[0]
num_features = X_train.shape[1]

assert X_train.shape[0] == Y_train.shape[0]

In [9]:
# Create a linear layer (this is an object!)
# The object holds the internal state (weights, how those weights are manipulated)
# y = Wx + b
# Pytorch automatically initializes the weights randomly 
theta = nn.Linear(num_features, NUM_CLASSES)

In [10]:
theta

Linear(in_features=3, out_features=2, bias=True)

In [11]:
type(theta)

torch.nn.modules.linear.Linear

In [12]:
# Call the forward method 
out = theta.forward(X_train)
out.shape

torch.Size([4, 2])

In [13]:
# This notation is shortened! We don't have to call the forward
out = theta(X_train)
out.shape

torch.Size([4, 2])

In [14]:
out

tensor([[-0.7654, -0.0908],
        [-0.1242, -0.5379],
        [-0.3477, -0.5586],
        [-0.5419, -0.0702]], grad_fn=<AddmmBackward0>)

In [15]:
# Log softmax function (for numerical stability)
log_softmax = nn.LogSoftmax(dim=1)
log_probs = log_softmax(out)

In [16]:
# Get back to numpy 
log_probs_numpy = log_probs.detach().numpy()
log_probs_numpy

array([[-1.0862267 , -0.41171592],
       [-0.5075397 , -0.92124003],
       [-0.5932447 , -0.8041493 ],
       [-0.95656514, -0.48484987]], dtype=float32)

In [17]:
# How do we get probabilities out?
np.exp(log_probs_numpy)

array([[0.33748755, 0.6625125 ],
       [0.6019748 , 0.39802518],
       [0.5525316 , 0.4474684 ],
       [0.38421032, 0.61578965]], dtype=float32)

In [18]:
# Let's only get the positive class 

In [19]:
pred_pos_class = np.exp(log_probs_numpy)[:,1] # all rows, only second column
pred_pos_class

array([0.6625125 , 0.39802518, 0.4474684 , 0.61578965], dtype=float32)

In [20]:
y_pred = (pred_pos_class > 0.5).astype(int)
y_pred

array([1, 0, 0, 1])

In [21]:
# Check that it works on a toy example 
pred_pos_class = np.array([0.6, 0.2])
y_pred = (pred_pos_class > 0.5).astype(int)
y_pred

array([1, 0])

Let's put this back into the class strucutre we used for HW 3

In [22]:
class BinaryLogisticRegressionModel(nn.Module): #We say that it is class is inheriting from the nn.Module class, 
                                                #which is a built-in PyTorch class
    """
    Pytorch implementation for binary logistic regression 
    """
    def __init__(self, num_features, num_classes):
        super().__init__()
        self.theta = nn.Linear(num_features, num_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, X):
        out = self.theta(X)
        log_probs = self.log_softmax(out)
        return log_probs
        
    def predict(self, X): 
        self.eval()
        pred_log_probs = self.forward(X)
        log_probs_numpy = log_probs.detach().numpy()
        pred_pos_class = np.exp(log_probs_numpy)[:, 1]
        y_pred = (pred_pos_class > 0.5).astype(int)
        return y_pred, pred_pos_class   
    
    def train_model(self, X, Y, loss_fn, optimizer, num_iterations):
        # We'll cover this on Tuesday 
        ...

In [23]:
# Create an instance of the model 
model = BinaryLogisticRegressionModel(num_features, NUM_CLASSES) #initalize

In [24]:
# Inference time (predict on a single example) 
X_test = torch.tensor([[1.0, 0.0, 0.0]])
y_pred, pred_pos_class = model.predict(X_test)

In [25]:
# We can inspect our model here too 
print(f"Model structure: {model}\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: BinaryLogisticRegressionModel(
  (theta): Linear(in_features=3, out_features=2, bias=True)
  (log_softmax): LogSoftmax(dim=1)
)

Layer: theta.weight | Size: torch.Size([2, 3]) | Values : tensor([[ 0.4426,  0.3318, -0.1153],
        [ 0.3504,  0.0606, -0.0538]], grad_fn=<SliceBackward0>) 

Layer: theta.bias | Size: torch.Size([2]) | Values : tensor([0.0197, 0.2193], grad_fn=<SliceBackward0>) 



### More Pytorch

In [26]:
class BasicFeedForwardModel(nn.Module):
    """
    Pytorch implementation for single hidden layer deep learning model 
    """
    def __init__(self, num_features, num_classes, hidden_dim1):
        super().__init__()
        self.hidden1 = nn.Linear(num_features, hidden_dim1)
        self.theta = nn.Linear(hidden_dim1, num_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, X):
        hid1 = nn.functional.relu(self.hidden1(X))
        out = self.theta(hid1)
        log_probs = self.log_softmax(out)
        return log_probs
        
    def predict(self, X): 
        self.eval()
        pred_log_probs = self.forward(X)
        log_probs_numpy = log_probs.detach().numpy()
        pred_pos_class = np.exp(log_probs_numpy)[:, 1]
        y_pred = (pred_pos_class > 0.5).astype(int)
        return y_pred, pred_pos_class    
    
    def train_model(self, X, Y, loss_fn, optimizer, num_iterations): 
        # We'll cover this on Tuesday
        ...

In [27]:
HIDDEN_DIM = 5
model_ff = BasicFeedForwardModel(num_features, NUM_CLASSES, HIDDEN_DIM) #initalize

In [28]:
X_test = torch.tensor([[1.0, 0.0, 0.0]])
y_pred, pred_pos_class = model_ff.predict(X_test)

In [29]:
# We can also save and load the models (useful for long trainng)
torch.save(model, 'model.pth')

In [30]:
# Load model 
model = torch.load('model.pth', weights_only=False)