In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
def seed_all(seed=42):
    np.random.seed(seed)
    torch.random.manual_seed(seed)

In [3]:
seed_all()

In [4]:
def make_regression_outputs_and_targets(n_samples, out_min=20, out_max=200, noise_loc=0, noise_scale=5):
    outputs = torch.tensor(np.random.uniform(20, 100, n_samples))
    noise = torch.tensor(np.random.normal(noise_loc, noise_scale, n_samples))
    targets = outputs + noise
    return outputs, targets

In [5]:
outputs, targets = make_regression_outputs_and_targets(20)

In [6]:
outputs

In [7]:
targets

In [8]:
error = targets - outputs
error

In [9]:
squared_error = error.square()
squared_error

In [10]:
mean_squared_error = squared_error.mean()
mean_squared_error

In [11]:
def mse(yhat, y):
    return (y - yhat).square().mean()

In [12]:
mse(outputs, targets)

In [13]:
mse = nn.MSELoss()

In [14]:
mse(outputs, targets)

In [15]:
F.mse_loss(outputs, targets)

In [16]:
errors = torch.linspace(-10, 10, 50)
squared_erros = errors.square()

In [17]:
mse_fig, ax = plt.subplots(figsize=(10,10))
ax.plot(errors, squared_erros)
ax.set_xlabel("Error (yhat - y)")
ax.set_ylabel("Squared Error")

In [18]:
# Check our work
-np.log(0.7)

In [19]:
# Define a cross entropy function
def H(p, q):
    return (-1 * q * p.log()).sum()

In [20]:
# Test it on our example
H(torch.tensor([0.1, 0.7, 0.2]), torch.tensor([0, 1, 0]))

In [21]:
t = torch.tensor([0.1, 0.7, 0.2])

In [22]:
def our_cross_entropy(yhat, y):
    act = yhat[y]
    return -act.log()

In [23]:
our_cross_entropy(t, 1)

In [24]:
def avg_cross_entropy(yhat, y):
    return -yhat[:,y].log().mean()

In [25]:
t = torch.randn(3, 3).softmax(dim=1)
t

In [26]:
y = torch.randint(low=0, high=3, size=(3,))
y

In [27]:
avg_cross_entropy(t, y)

In [28]:
a = 0.00000000000000000000000000001
a * a

In [29]:
a = 100000000000000000000000000000.
a * a

In [30]:
def make_classification_logits(n_classes, n_samples, pct_correct, confidence=1):
    """
    This function returns simulated logits and classes.
    
    n_classes: nuber of classes
    n_samples: number of rows
    pct_correct: float between 0 and 1. The higher it is, 
                 the higher the % of logits that will 
                 generate the correct output.
    confidence: controls how confident our logits are.
                Closer to 0: less confident
                Larger: more confident
    """
    classes = list(range(n_classes))
    # Randomly make logits
    logits = np.random.uniform(-5., 5., (n_samples, n_classes))
    # Randomly make labels
    labels = np.random.choice(classes, size=(n_samples))
    # Find the max of each row in logits
    maxs = np.abs(logits).max(axis=1)
    # For each row...
    for i in range(len(maxs)):
        # If we want the answer to be right...
        if np.random.random() <= pct_correct:
            # Make the correct item the highest logit
            logits[i, labels[i]] = maxs[i] + np.random.random()*confidence
        # If we want it to be wrong...
        else:
            # Make the highest logit a different index
            _c = classes.copy()
            _c.remove(classes[labels[i]])
            _i = np.random.choice(_c)
            logits[i, _i] = maxs[i] + np.random.random()/10        
    
    # Return logits and labels
    return torch.FloatTensor(logits), torch.tensor(labels)

In [31]:
# Create some logits and associated labels.
# There will be some error here!
logits, labels = make_classification_logits(3, 10, 0.8, confidence=1)
logits

In [32]:
# What are the normalized predicted probabilities for each class?
logits.softmax(dim=1)

In [33]:
# What are the indices (classes) of the highest predicted probas?
logits.softmax(dim=1).argmax(dim=1)

In [34]:
# How well do they match with our labels?
labels

In [35]:
cross_entropy = nn.CrossEntropyLoss()

In [36]:
cross_entropy(logits, labels)

In [37]:
F.cross_entropy(logits, labels)

In [38]:
torch.mean(
    torch.tensor(
        [our_cross_entropy(lo, la) 
         for lo, la # softmax of logits, labels
         in zip(logits.softmax(dim=1), labels)
        ]
    )
)

In [39]:
logits, labels = make_classification_logits(2, 10, 1., confidence=1)
logits

In [40]:
logits.softmax(dim=1)

In [41]:
labels

In [42]:
F.cross_entropy(logits, labels)

In [43]:
# Make some logits and labels, making sure not to get everything correct
logits = torch.tensor([2.8, -1.4, 1.1, -.8])
labels = torch.tensor([1., 0., 0., 0.])

In [44]:
# View the logits
logits

In [45]:
# Normalize to probabilities
logits.sigmoid()

In [46]:
# Calculate the loss with the logits
F.binary_cross_entropy_with_logits(logits, labels)

In [47]:
# Calculate the loss with the logits
nn.BCEWithLogitsLoss()(logits, labels)

In [48]:
F.binary_cross_entropy(logits.sigmoid(), labels)

In [49]:
nn.BCELoss()(logits.sigmoid(), labels)

In [50]:
# Values are close...
torch.allclose(F.binary_cross_entropy_with_logits(logits, labels), F.binary_cross_entropy(logits.sigmoid(), labels))

In [51]:
# ... but not the same
F.binary_cross_entropy_with_logits(logits, labels) == F.binary_cross_entropy(logits.sigmoid(), labels)

In [52]:
F.binary_cross_entropy_with_logits(logits, labels) - F.binary_cross_entropy(logits.sigmoid(), labels)

In [53]:
# Column 0 may be the p(happy) = 1 - p(sad), 
# and column 2 may be p(lakers) = 1 - p(not lakers)
logits = torch.randn(10, 2) * 2
logits

In [54]:
labels = torch.randint(0, 2, (10, 2)).float()
labels

In [55]:
logits.sigmoid()

In [56]:
bce = nn.BCEWithLogitsLoss()

In [57]:
bce(logits, labels)

In [58]:
bce(logits[:,0], labels[:,0])

In [59]:
bce(logits[:,1], labels[:,1])

In [60]:
(bce(logits[:,0], labels[:,0]) + bce(logits[:,1], labels[:,1])) / 2