In [51]:
import os
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from PIL import Image
from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270
from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import np
from torch.autograd import Variable
from torchvision import transforms

In [3]:
IMG_PATH = '/data/amazon/train-jpg/'
IMG_EXT = '.jpg'
TRAIN_DATA = '/data/amazon/train_v2.csv'

In [30]:
class KaggleAmazonDataset(Dataset):
    def __init__(self, csv_path, img_path, img_ext, transform=None):
        tmp_df = pd.read_csv(csv_path)
        check_file = lambda x: os.path.isfile(img_path+x+img_ext)
        assert tmp_df['image_name'].apply(check_file).all(), \
                                "Some images referenced in CSV file were not found"
            
        self.mlb = MultiLabelBinarizer()
        self.img_path = img_path
        self.img_ext = img_ext
        self.transform = transform
        self.X_train = tmp_df['image_name']
        self.y_train = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32)
        
    def __getitem__(self, index):
        img = Image.open(self.img_path + self.X_train[index] + self.img_ext)
        img = img.convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
            
        label = torch.from_numpy(self.y_train[index])
        return img, label
    
    def __len__(self):
        return len(self.X_train.index)

In [31]:
transformations = transforms.Compose([transforms.Scale(32), transforms.ToTensor()])
dset_train = KaggleAmazonDataset(TRAIN_DATA, IMG_PATH, IMG_EXT, transformations)

In [58]:
train_loader = DataLoader(dset_train, batch_size=64, shuffle=True, num_workers=8)

In [59]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(2304, 256)
        self.fc2 = nn.Linear(256, 17)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.sigmoid(x)

In [60]:
model = Net().cuda()

In [61]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [64]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(async=True), target.cuda(async=True)
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.binary_cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

In [66]:
for epoch in range(5):
    train(epoch)





In [6]:
class AmazonDataset(Dataset):
    def __init__(self, csv_path, img_path, img_ext, transform=None):
        self.df = pd.read_csv(csv_path)
        func = lambda x: os.path.isfile(img_path + x + img_ext)
        assert self.df['image_name'].apply(func).all(), 'some images in csv file not found'
        
        self.mlb = MultiLabelBinarizer(classes=['clear', 'cloudy', 'haze','partly_cloudy',
                                                'agriculture','artisinal_mine',
                                                'bare_ground','blooming','blow_down',
                                                'conventional_mine','cultivation',
                                                'habitation','primary','road',
                                                'selective_logging','slash_burn','water'])
        
        self.img_path = img_path
        self.img_ext = img_ext
        self.transform = transform
        self.X = self.df['image_name']
        self.y = self.mlb.fit_transform(self.df['tag'].str.split()).astype(np.float32)
    
    def __getitem__(self, index):
        img = Image.open(self.img_path + self.X[index] + self.img_ext)
        img = img.convert('RGB')
        if self.transform is not None: img = self.transform(img)
        label = from_numpy(self.y[index])
        return img, label
    
    def __len__(self): 
        return len(self.df.index)
    
    def getLabelEncoder(self): 
        return self.mlb
    
    def getDF(self): 
        return self.df
    
    def X(self): 
        return self.X