In [1]:
import torch
import json

In [2]:
TRAIN_DEV_SPLIT = 50000

# LOAD DATA
train = []
test = []

with open('mnist_rowmajor.jsonl') as file:
    for line in file:
        data = json.loads(line)
        
        img = torch.tensor(data['image'])
        label = data['label']
        
        if data['split'] == 'train':
            train.append((img, label))
        else:
            test.append((img, label))

int_train = train[:TRAIN_DEV_SPLIT]
int_dev = train[TRAIN_DEV_SPLIT:]

## Question 3

In [3]:
# Q3: most frequent baseline classifier
class BaselineClassifier:
    def __init__(self):
        self.label_count = torch.zeros(10)

    def forward(self, x):
        return torch.argmax(self.label_count)

    def train(self, x, y):
        self.label_count[y].add_(1)

model = BaselineClassifier()

# train model
for x, y in int_train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in int_dev:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(int_dev))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 11.47%


## Question 4

In [4]:
# Q4: multiclass perceptron - no bias
D = 784
class MulticlassPerceptron:
    def __init__(self):
        self.w = torch.zeros(10, D)

    def forward(self, x):
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x)
        return torch.argmax(a)

    def train(self, x, y):
        y_pred = self.forward(x)
        if y_pred != y:
            self.w[y].add_(torch.mul(x, y))
            self.w[y_pred].sub_(torch.mul(x, y))

model = MulticlassPerceptron()

# train model
for x, y in int_train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in int_dev:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(int_dev))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 79.28%


In [5]:
# Q4: multiclass perceptron - with bias
D = 784
class MulticlassPerceptron:
    def __init__(self):
        self.w = torch.zeros(10, D)
        self.b = torch.zeros(10)

    def forward(self, x):
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x) + self.b[i]
        return torch.argmax(a)

    def train(self, x, y):
        y_pred = self.forward(x)
        if y_pred != y:
            self.w[y].add_(torch.mul(x, y))
            self.w[y_pred].sub_(torch.mul(x, y))
            self.b[y].add_(y)
            self.b[y_pred].sub_(y)

model = MulticlassPerceptron()

# train model
for x, y in int_train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in int_dev:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(int_dev))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 80.06%


In [6]:
# Q4: multiclass perceptron - using nn.MultiMarginLoss
D = 784
loss = torch.nn.MultiMarginLoss()
class MulticlassPerceptron:
    def __init__(self):
        self.w = torch.zeros(10, D, requires_grad=True)

    def forward(self, x):
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x)
        return torch.argmax(a)

    def train(self, x, y):
        # forward step
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x)
        y_pred = torch.argmax(a)
        
        # calculate loss
        l = loss(a, torch.tensor(y))
        
        # calculate grad
        l.backward()
        
        # update weights
        with torch.no_grad():
            self.w.sub_(self.w.grad)
            
        # reset gradient
        self.w.grad.zero_()

model = MulticlassPerceptron()

# train model
for x, y in int_train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in int_dev:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(int_dev))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 88.40%


## Question 5

In [7]:
# Q5: most frequent baseline classifier
class BaselineClassifier:
    def __init__(self):
        self.label_count = torch.zeros(10)

    def forward(self, x):
        return torch.argmax(self.label_count)

    def train(self, x, y):
        self.label_count[y].add_(1)

model = BaselineClassifier()

# train model
for x, y in train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in test:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(test))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 11.35%


In [8]:
# Q5: multiclass perceptron - using nn.MultiMarginLoss
D = 784
loss = torch.nn.MultiMarginLoss()
class MulticlassPerceptron:
    def __init__(self):
        self.w = torch.zeros(10, D, requires_grad=True)

    def forward(self, x):
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x)
        return torch.argmax(a)

    def train(self, x, y):
        # forward step
        a = torch.zeros(10)
        for i in range(0,10):
            a[i] = torch.dot(self.w[i],x)
        y_pred = torch.argmax(a)
        
        # calculate loss
        l = loss(a, torch.tensor(y))
        
        # calculate grad
        l.backward()
        
        # update weights
        with torch.no_grad():
            self.w.sub_(self.w.grad)
            
        # reset gradient
        self.w.grad.zero_()

model = MulticlassPerceptron()

# train model
for x, y in train:
    model.train(x, y)

# eval model
score = 0.0
for x, y in test:
    y_pred = model.forward(x)
    score += y_pred == y

# print accuracy
accuracy = 100*(score/len(test))
print("Accuracy {:.2f}%".format(accuracy))

Accuracy 86.29%


## Verification

In [9]:
# binary perceptron to test correctness
class BinaryPerceptron:
    def __init__(self):
        self.w = torch.zeros(2)

    def forward(self, x):
        self.a = torch.dot(self.w,x)
        return 1 if self.a > 0 else -1

    def train(self, x, y):
        y_pred = self.forward(x)
        print("{:^14}{:^7}{:^7}".format( str(self.w.tolist()), self.a, y_pred, ))
        if y_pred != y:
            self.w.add_(torch.mul(x, y))
        return y_pred

model = BinaryPerceptron()
# X = [(-1,1) (-1,-1) (0.5,0.5) (1,-1) (0.5,-1]]
X = torch.tensor([
    [-1,1],
    [-1,-1],
    [0.5,0.5],
    [1,-1],
    [0.5,-1]
])
Y = torch.tensor([1,-1,1,1,-1])

print("{:^14}{:^7}{:^7}".format( "weights", "a", "y_pred"))
for i in range(10):
    y_pred = model.train(X[i%5],Y[i%5])

   weights       a   y_pred 
  [0.0, 0.0]    0.0    -1   
 [-1.0, 1.0]    0.0    -1   
 [-1.0, 1.0]    0.0    -1   
 [-0.5, 1.5]   -2.0    -1   
  [0.5, 0.5]   -0.25   -1   
  [0.5, 0.5]    0.0    -1   
 [-0.5, 1.5]   -1.0    -1   
 [-0.5, 1.5]    0.5     1   
 [-0.5, 1.5]   -2.0    -1   
  [0.5, 0.5]   -0.25   -1   
