In [23]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision 
from torchvision import datasets,transforms 
import os 


In [24]:
mean_gray = 0.1307
stddev_gray = 0.3081

data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean_gray,),(stddev_gray,))
])

#Load MNIST data 
train_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=True,
    transform = data_transform,
    download=False
)

test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform = data_transform,
    download=False
)

In [26]:
print('Train data shape is {}'.format(len(train_dataset)))
print('Test data shape is {}'.format(len(test_dataset)))

Train data shape is 60000
Test data shape is 10000


In [27]:
batch_size = 32

train_load = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = batch_size,
    shuffle = True
)

test_load = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle=False
)

In [29]:
class LinearNet(nn.Module):
    def __init__(self,dropout=0.5):
        super(LinearNet,self).__init__()
        self.linear1 = nn.Linear(784,1200)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=dropout)
        self.linear2 = nn.Linear(1200,1200)
        self.linear3 = nn.Linear(1200,10)
        
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(x)
        out = self.linear2(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.linear3(out)
        return out

In [43]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
big_model = LinearNet().to(device)

load_path = './teacher_linear_model/'
if torch.cuda.is_available():
    checkpoint = torch.load(load_path + 'modelo')
else:
    checkpoint = torch.load(load_path + 'modelo',  map_location=torch.device('cpu'))
    
big_model.load_state_dict(checkpoint['model_state_dict'],strict=False)
big_model.eval()

LinearNet(
  (linear1): Linear(in_features=784, out_features=1200, bias=True)
  (relu): ReLU(inplace=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (linear2): Linear(in_features=1200, out_features=1200, bias=True)
  (linear3): Linear(in_features=1200, out_features=10, bias=True)
)

In [51]:
def evaluate(model, dataset, max_ex=0):
    acc = 0
    N = len(dataset) * batch_size
    for i, (features, labels) in enumerate(dataset):
        scores = model(features)
        pred = torch.argmax(scores, dim=1)
        acc += torch.sum(torch.eq(pred, labels)).item()
        if max_ex != 0 and i >= max_ex:
            break
    # print(i)
    return (acc * 100 / ((i+1) * batch_size) )

In [54]:
train_acc = evaluate(big_model,train_load)
test_acc = evaluate(big_model,test_load)

print("\nTrain accuracy: %.2f%%" % train_acc)
print("Test accuracy: %.2f%%" % test_acc)

RuntimeError: size mismatch, m1: [28 x 28], m2: [784 x 1200] at /tmp/pip-req-build-rc66hrpz/aten/src/TH/generic/THTensorMath.cpp:41

In [46]:
class SmallLinearNet(nn.Module):
    def __init__(self):
        super(SmallLinearNet,self).__init__()
        self.linear1 = nn.Linear(784,50)
        self.relu = nn.ReLU(inplace=True)
        self.linear2 = nn.Linear(50,10)
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out

In [48]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
small_model = SmallLinearNet().to(device)

load_path = './small_linear_model/'
checkpoint = torch.load(load_path+'modelo',map_location=torch.device('cpu'))
small_model.load_state_dict(checkpoint['model_state_dict'],strict=False)
small_model.eval()

SmallLinearNet(
  (linear1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU(inplace=True)
  (linear2): Linear(in_features=50, out_features=10, bias=True)
)