In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import cv2
import albumentations as albu
from albumentations.pytorch import ToTensor

In [4]:
import torch
import torch.nn as nn

from torchvision import transforms

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
train = pd.read_csv("./input/train.csv")
test = pd.read_csv("./input/test.csv")

In [7]:
train

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Data

In [8]:
def Data(train, test):
    y = train["label"].values
    X = train.drop("label", axis=1).values
    test = test.values
    
    X = X.reshape(-1, 1, 28, 28)
    test = test.reshape(-1, 1, 28, 28)
    
    X = X/255
    test = test/255
    
    train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.3)
    
    train_X = torch.tensor(train_X, dtype=torch.float32)
    train_y = torch.tensor(train_y, dtype=torch.int64)
    
    val_X = torch.tensor(val_X, dtype=torch.float32)
    val_y = torch.tensor(val_y, dtype=torch.int64)
    
    test_X = torch.tensor(test, dtype=torch.float32)
    
    train_set = torch.utils.data.TensorDataset(train_X, train_y)
    val_set = torch.utils.data.TensorDataset(val_X, val_y)
    test_set = torch.utils.data.TensorDataset(test_X)
    
    batch = 128
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch, shuffle=False)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [9]:
train_loader, val_loader, test_loader = Data(train, test)

In [10]:
for image, label in train_loader:
    print(image.shape)
    break

torch.Size([128, 1, 28, 28])


# torch.utils.data.Datasetの継承

In [59]:
train = pd.read_csv("./input/train.csv")
test = pd.read_csv("./input/test.csv")

y = train["label"].values
X = train.drop("label", axis=1).values
test = test.values
    
X = X.reshape(-1, 28, 28, 1).astype(np.float32)
test = test.reshape(-1, 28, 28, 1).astype(np.float32)

#X = X.reshape(-1, 28, 28)
#test = test.reshape(-1, 28, 28)

train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.3)
    

## Albumentation

In [60]:
class MNIST(torch.utils.data.Dataset):
    
    def __init__(self, images, labels, trans):
        super().__init__()
        
        self.trans = trans
        
        self.images = images
        self.labels = labels
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        image = self.trans(image=image)['image']
        
        return image, label

In [61]:
def train_augmentation():
    transform = [
        albu.Normalize(mean=(0.485,), std=(0.229,)),
        ToTensor(),
    ]
    return albu.Compose(transform)

def test_augmentation():
    transform = [
        albu.Normalize(mean=(0.485,), std=(0.229,)),
        ToTensor(),
    ]
    return albu.Compose(transform)

In [62]:
train_loader = torch.utils.data.DataLoader(MNIST(train_X, train_y, train_augmentation()), batch_size=64, shuffle=False)

In [63]:
for image, label in train_loader:
    print(image.shape)
    break

torch.Size([64, 1, 28, 28])


## torchvision

In [64]:
class MNIST(torch.utils.data.Dataset):
    
    def __init__(self, images, labels, trans):
        super().__init__()
        
        self.trans = trans
        
        self.images = images
        self.labels = labels
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        image = self.trans(image)
        
        return image, label

In [65]:
def train_transforms():
    transform = [
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485], [0.229,]),
    ]
    return transforms.Compose(transform)

def test_transforms():
    transform = [
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485,], [0.229,]),
    ]
    return transforms.Compose(transform)

In [66]:
train_loader = torch.utils.data.DataLoader(MNIST(train_X, train_y, train_transforms()), batch_size=64, shuffle=False)

In [67]:
for image, label in train_loader:
    print(image.shape)
    break

torch.Size([64, 1, 28, 28])
