In [23]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision 
from torchvision import datasets,transforms 
import os 


In [116]:
mean_gray = 0.1307
stddev_gray = 0.3081

data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean_gray,),(stddev_gray,))
])

#Load MNIST data 
train_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=True,
    transform = data_transform,
    download=False
)

test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform = data_transform,
    download=False
)


(1875, 313)

In [121]:
print('Train data shape is {}'.format(len(train_dataset)))
print('Test data shape is {}'.format(len(test_dataset)))

Train data shape is 60000
Test data shape is 10000


In [133]:
batch_size = 32

train_load = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = batch_size,
    shuffle = True
)

test_load = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle=False
)

In [134]:
class LinearNet(nn.Module):
    def __init__(self,dropout=0.5):
        super(LinearNet,self).__init__()
        self.linear1 = nn.Linear(784,1200,bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=dropout)
        self.linear2 = nn.Linear(1200,1200,bias=False)
        self.linear3 = nn.Linear(1200,10,bias=False)
        
    def forward(self,x):
        out = x.view(x.size(0),-1)
        out = self.linear1(out)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.linear3(out)
        return out

In [141]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
big_model = LinearNet().to(device)

load_path = './teacher_linear_model/'
if torch.cuda.is_available():
    checkpoint = torch.load(load_path + 'model.pth.tar')
else:
    checkpoint = torch.load(load_path + 'model.pth.tar',  map_location=torch.device('cpu'))
    
big_model.load_state_dict(checkpoint['model_state_dict'],strict=False)
big_model.eval()

KeyError: 'model_state_dict'

In [140]:
#train 
num_epochs = 5 
batch_size = 32 

train_loss = []
train_accuracy = [] 

model = LinearNet()

loss_fn = nn.CrossEntropyLoss()        
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

for epoch in range(num_epochs):
    correct = 0
    iterations = 0 
    iter_loss = 0.0
    for i,(images,labels) in enumerate(train_load):
        outputs = model(images)
        loss = loss_fn(outputs,labels)
        iter_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _,predict = torch.max(outputs,dim=1)
        correct += (predict == labels).sum() 
        iterations += 1 
    train_loss.append(iter_loss / iterations)
    train_accuracy.append((100 * correct // len(train_dataset)))
    print(train_accuracy)
    torch.save(model.state_dict(), 'model.pth.tar')

[tensor(83)]
[tensor(83), tensor(85)]
[tensor(83), tensor(85), tensor(84)]
[tensor(83), tensor(85), tensor(84), tensor(84)]
[tensor(83), tensor(85), tensor(84), tensor(84), tensor(83)]


In [145]:
load_path = './teacher_linear_model/'
load_path = load_path + 'model.pth.tar'
model.load_state_dict(torch.load(load_path))
model.eval()

LinearNet(
  (linear1): Linear(in_features=784, out_features=1200, bias=False)
  (relu): ReLU(inplace=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (linear2): Linear(in_features=1200, out_features=1200, bias=False)
  (linear3): Linear(in_features=1200, out_features=10, bias=False)
)

In [154]:
#Testing

with torch.no_grad():
    loss = 0.0
    correct = 0
    iterations = 0
    test_loss = []
    test_accuracy = []
    for i,(images,labels) in enumerate(test_load):
        outputs = model(images)
        loss = loss_fn(outputs,labels)
        loss += loss.item()
        _,predict = torch.max(outputs,dim=1)
        correct += (predict == labels).sum()
        iterations += 1
    
test_loss.append(loss/iterations)
test_accuracy.append(100 * correct //len(test_dataset))

test_accuracy

[tensor(90)]

In [46]:
class SmallLinearNet(nn.Module):
    def __init__(self):
        super(SmallLinearNet,self).__init__()
        self.linear1 = nn.Linear(784,50)
        self.relu = nn.ReLU(inplace=True)
        self.linear2 = nn.Linear(50,10)
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out

In [48]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
small_model = SmallLinearNet().to(device)

load_path = './small_linear_model/'
checkpoint = torch.load(load_path+'modelo',map_location=torch.device('cpu'))
small_model.load_state_dict(checkpoint['model_state_dict'],strict=False)
small_model.eval()

SmallLinearNet(
  (linear1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU(inplace=True)
  (linear2): Linear(in_features=50, out_features=10, bias=True)
)