In [106]:
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import numpy as np
import os
import os.path

In [107]:
batch_size = 64
label_idx = {'Class_1':1, 'Class_2':2, 'Class_3':3, 'Class_4':4, 'Class_5':5,
            'Class_6':6, 'Class_7':7, 'Class_8':8,'Class_9':9}

class OttoDataset(Dataset):
    root = './data/otto/'
    training_file = 'train.csv'
    test_file = 'test.csv'
    def __init__(self, train=True, transform=None, target_transform=None):
        self.train = train
        self.transform = transform
        self.target_transform = target_transform
        
        if not self._check_exists():
            raise RuntimeError('Dataset not found.')
        
        if self.train:
            self.train_data, self.train_labels = self.read_file(self.root + self.training_file)
        else:
            self.test_data, self.test_labels = self.read_file(self.root + self.test_file)

    def read_file(self, filename):
        data = np.loadtxt(filename, delimiter=',', dtype=np.float32,
                          usecols=range(1,94), skiprows=1)
        labels = np.loadtxt(filename, delimiter=',', dtype='str',
                            usecols=94, skiprows=1)
        
        data = torch.from_numpy(data)
        labels = list(map((lambda x: torch.LongTensor([label_idx[x]])), labels))

        return data, labels

    def __getitem__(self, index):
        if self.train:
            data, target = self.train_data[index], self.train_labels[index]
        else:
            data, target = self.test_data[index], self.test_labels[index]

        if self.transform is not None:
            data = self.transform(data)
            
        if self.target_transform is not None:
            target = self.target_transform(target)
            
        return data, target
            
    def __len__(self):
        if self.train:
            return len(self.train_data)
        else:
            return len(self.test_data)

    def _check_exists(self):
        return os.path.exists(os.path.join(self.root, self.training_file)) and \
                os.path.exists(os.path.join(self.root, self.test_file))

train_dataset = OttoDataset(train=True)
# test_dataset = OttoDataset(train=False)

# Data Loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [108]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(93, 80)
        self.l2 = torch.nn.Linear(80, 70)
        self.l3 = torch.nn.Linear(70, 50)
        self.l4 = torch.nn.Linear(50, 30)
        self.l5 = torch.nn.Linear(30, 10)
    
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)

In [109]:
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [110]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target.view(-1))
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        
        test_loss += criterion(output, target).data[0]
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
for epoch in range(1, 10):
    train(epoch)
#     test()

