In [70]:
import torch
import torch.utils
import sklearn.datasets
import os
import numpy as np

In [6]:
data = sklearn.datasets.load_svmlight_file('part-00000-bda45ecd-9518-4fb7-9339-694ca61fbbca-c000.libsvm')

In [81]:
np.concatenate([np.array([1, 2, 3])], axis=0)

array([1, 2, 3])

In [87]:
class MNISTDataset(torch.utils.data.Dataset):
    """Simple MNIST Dataset"""
    
    def __init__(self, training_dir): 
        
        files = os.listdir(training_dir)
        print(files)
        svms = [sklearn.datasets.load_svmlight_file(os.path.join(training_dir, f)) for f in files]
        data = np.concatenate([f[0].todense() for f in svms], axis=0)
        targets = np.concatenate([f[1] for f in svms], axis=0)
        
        self.data = torch.FloatTensor(data)
        self.targets = torch.LongTensor(targets)

    def __getitem__(self, index):
        img, target = self.data[index].reshape((1, 28, 28)), self.targets[index]
        return img, target

    def __len__(self):
        return len(self.data)

In [88]:
temp_dataset = MNISTDataset('temp_train')

['part-00003-bda45ecd-9518-4fb7-9339-694ca61fbbca-c000.libsvm', 'part-00000-bda45ecd-9518-4fb7-9339-694ca61fbbca-c000.libsvm']


In [91]:
temp_dataset.__getitem__(0)[0].shape

torch.Size([1, 28, 28])

# Scrap

In [17]:
from torchvision import datasets, transforms

In [20]:
dataset = datasets.MNIST('', download=True, train=True, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
]))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [63]:
dataset.__getitem__(0)[0].shape

torch.Size([1, 28, 28])

In [64]:
dataset.__getitem__(0)[1].shape

torch.Size([])

In [35]:
torch.Tensor([5])

tensor([5.])

In [52]:
torch.LongTensor([5])[0]

tensor(5)

In [54]:
torch.FloatTensor([1, 2, 3]).dtype

torch.float32

In [48]:
dataset.train_labels

tensor([5, 0, 4,  ..., 5, 6, 8])

In [92]:
def model_fn(model_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = torch.nn.DataParallel(Net())
    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
    return model.to(device)

In [95]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

In [96]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [97]:
m = model_fn('../../')

In [98]:
m

DataParallel(
  (module): Net(
    (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (conv2_drop): Dropout2d(p=0.5)
    (fc1): Linear(in_features=320, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [103]:
def predict_fn(input_data, model):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    with torch.no_grad():
        return model(input_data.to(device))

In [108]:
def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs):
    dataset = MNISTDataset(training_dir)
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, **kwargs)

In [110]:
for batch_idx, (data, target) in enumerate(_get_train_data_loader(1, '../testing/temp_train', False), 1):
    print(predict_fn(data, m))
    break

['part-00003-bda45ecd-9518-4fb7-9339-694ca61fbbca-c000.libsvm', 'part-00000-bda45ecd-9518-4fb7-9339-694ca61fbbca-c000.libsvm']
tensor([[-35.1372, -33.7277, -25.1957, -30.9751,   0.0000, -25.8668, -24.3048,
         -26.7323, -20.9381, -17.4964]])
