In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import timeit

from torch.autograd import Variable

import visdom
import numpy as np
import matplotlib.pyplot as plt
import sklearn

%matplotlib inline
%load_ext autoreload
%autoreload 2

from sklearn.preprocessing import OneHotEncoder
from uppercase import Dataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
window_size = 5
alphabet_size = 100

train = Dataset("data/uppercase_data_train.txt", window_size, alphabet=alphabet_size)
dev = Dataset("data/uppercase_data_dev.txt", window_size, alphabet=train.alphabet)
test = Dataset("data/uppercase_data_test.txt", window_size, alphabet=train.alphabet)

In [18]:
X, y = test.all_data()
X_test, y_test = X[:, :], y[:,]
X_test = OneHotEncoder(alphabet_size, sparse=False).fit_transform(X_test)

In [19]:
import visdom
vis = visdom.Visdom()

def torch_batch(batch, onehot=False):
    X, y = batch
    if onehot:
        X = OneHotEncoder(alphabet_size, sparse=False).fit_transform(X)
        
    X = Variable(torch.from_numpy(X).float())
    y = Variable(torch.from_numpy(y.astype(np.long)))
    
    return X, y

def accuracy(scores, y):
    return (scores.round().long() == y).float().mean().data[0]

In [16]:
class Ravel(nn.Module):
    def forward(self, x):
        return x.view(-1)

model = nn.Sequential(
    nn.Linear(alphabet_size*(2*window_size+1), 200),
    nn.ReLU(),
    nn.Linear(200, 200),
    nn.ReLU(),
    nn.Linear(200, 200),
    nn.ReLU(),
    nn.Linear(200, 200),
    nn.ReLU(),
    nn.Linear(200, 1),
    nn.Sigmoid(),
    Ravel(),
)

optimizer = optim.Adam(model.parameters())
loss = nn.BCELoss()

In [20]:
losses = []
accuracies = []

dev_losses = []
dev_accuracies = []

i = 0
for _ in range(2):
    while not train.epoch_finished():
        i += 1
        X, y = torch_batch(train.next_batch(20), onehot=True)
        
        optimizer.zero_grad()
        scores = model(X)
        
        output = loss(scores, y.float())
        output.backward()
        optimizer.step()
        
        if i % 5 == 0:
            losses.append(output.data[0])
            accuracies.append(accuracy(scores, y))

            vis.line(np.array(losses), win="loss", opts={"title": "train/loss"})
            vis.line(np.array(accuracies), win="acc", opts={"title": "train/acc"})

        if i % 200 == 0:
            idx = np.random.choice(y_test.shape[0], 5000)
            X, y = torch_batch((X_test[idx,:], y_test[idx]))
            
            scores = model(X)
            output = loss(scores, y.float())
        
            dev_losses.append(output.data[0])
            dev_accuracies.append(accuracy(scores, y))
            vis.line(np.array(dev_losses), win="dev/loss", opts={"title": "dev/loss"})
            vis.line(np.array(dev_accuracies), win="dev/acc", opts={"title": "dev/acc"})


KeyboardInterrupt: 

In [None]:
3 + Variable(torch.ones(1))

In [None]:
vis.line(np.array([2,4,2,3,4,3,3,2]), win="gege")

In [None]:
x = Variable(torch.ones(2, 2), requires_grad=True)
y = Variable(torch.ones(2, 2))

In [None]:
z = (x*x + y*y).mean()

In [None]:
z.backward(retain_graph=True)

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import operator
from functools import reduce

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        x = x.view(-1, self.num_flat_features(x))

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

    def num_flat_features(self, x):
        return reduce(operator.mul, x.size()[1:], 1)

net = Net()
print(net)

In [None]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

In [None]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

In [None]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [None]:
list(net.conv1.parameters())[0].grad.unsqueeze(0).shape

In [None]:
output = net(input)
target = Variable(torch.arange(1, 11))
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

In [None]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

In [None]:
net.zero_grad()

print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)

loss.backward()

print("conv2.bias.grad after backward")
print(net.conv1.bias.grad)

In [None]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [None]:
import torch.optim as optim

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.01)

In [None]:
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

In [2]:
import torchvision
import torchvision.transforms as transforms

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [5]:
batch_size = 4
num_workers = 4

trainset = torchvision.datasets.CIFAR10(root="./data/cifar",
                                       train=True,
                                       download=True,
                                       transform=transform)

trainloader = troch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                         shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root="./data/cifar",
                                      train=False,
                                      download=True,
                                      transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                        shuffle=False, num_workers=num_workers)

Files already downloaded and verified


NameError: name 'troch' is not defined

In [None]:
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [21]:
vis.image(np.random.randn(50, 20))

'window_3609fa1b30bdba'

In [22]:
vis.line(np.sin(np.arange(1000)))

'window_3609fa28a9d828'