In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np # Torch wrapper for Numpy

import os
from PIL import Image

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from sklearn.preprocessing import MultiLabelBinarizer


In [None]:
IMG_PATH = '/kaggle/input/planets-dataset/planet/planet/train-jpg/'
IMG_EXT = '.jpg'
TRAIN_DATA = '/kaggle/input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
class KaggleAmazonDataset(Dataset):
    """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.

    Arguments:
        A CSV file path
        Path to image folder
        Extension of images
        PIL transforms
    """

    def __init__(self, csv_path, img_path, img_ext, transform=None):
    
        tmp_df = pd.read_csv(csv_path)
        assert tmp_df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
        "Some images referenced in the CSV file were not found"
        
        self.mlb = MultiLabelBinarizer()
        self.img_path = img_path
        self.img_ext = img_ext
        self.transform = transform

        self.X_train = tmp_df['image_name']
        self.y_train = self.mlb.fit_transform(tmp_df['tags'].str.split()).astype(np.float32)

    def __getitem__(self, index):
        img = Image.open(self.img_path + self.X_train[index] + self.img_ext)
        img = img.convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        
        label = torch.from_numpy(self.y_train[index])
        return img, label

    def __len__(self):
        return len(self.X_train.index)
transformations = transforms.Compose([transforms.Scale(32), 
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
#                                       transforms.Normalize(mean=[-1,-1,-1],std=[2,2,2])
                                     ])

dataset = KaggleAmazonDataset(TRAIN_DATA,IMG_PATH,IMG_EXT,transformations)

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

batch_size = 256
validation_split = .2
shuffle_dataset = True
random_seed= 42

dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_loader = DataLoader(dataset,
                          batch_size=256,
                          sampler=train_sampler,
                          num_workers=1, # 1 for CUDA
                          pin_memory=True # CUDA only
                         )

test_loader = DataLoader(dataset,
                          batch_size=256,
                          sampler=valid_sampler,
                          num_workers=1, # 1 for CUDA
                          pin_memory=True # CUDA only
                         )

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

image, label = next(iter(train_loader.dataset))

plt.imshow(image[1])

In [None]:
class AmazonModel(nn.Module):
    def __init__(self, pretrained_model, in_features, out_features):
        super(AmazonModel, self).__init__()
        
        self.pretrained_model = pretrained_model
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features,out_features)
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self, x):
        x = self.pretrained_model(x)
        x = self.relu(x)
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x

In [None]:
from torchvision import models

model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.out_features
no_label = len(train_loader.dataset.y_train[0])

model.fc.weight.requires_grad = True
model.fc.bias.requires_grad = True

model = AmazonModel(model, num_ftrs, no_label)
model = model.to(device)

torch.backends.cudnn.benchmark=True

# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

In [None]:
model.train()

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.003)

In [None]:
from sklearn import metrics

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
#         data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.binary_cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset) - split,
                100. * batch_idx / len(train_loader), loss.data.item()))

In [None]:
for epoch in range(2):
    train(epoch)

In [None]:
from sklearn.metrics import fbeta_score, confusion_matrix

f_scores = list()

def test(epoch):
    model.eval()
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
#         data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.binary_cross_entropy(output, target)
        if batch_idx % 10 == 0:
            print('Test Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), split,
                100. * batch_idx / len(test_loader), loss.data.item()))
            f_scores.append([target, output])
            

In [None]:
import os

# /kaggle/input/planet-understanding-the-amazon-from-space/Kaggle-planet-test-tif.torrent
# /kaggle/input/planet-understanding-the-amazon-from-space/Kaggle-planet-train-tif.torrent

# /kaggle/input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv


TEST_IMG_PATH = '/kaggle/input/planets-dataset/planet/planet/test-jpg/'
TEST_IMG_EXT = '.jpg'
SUBMISSION_FILE = '/kaggle/input/planet-understanding-the-amazon-from-space/sample_submission_v2.csv/sample_submission_v2.csv'
SUB_MAPPING = '/kaggle/input/planet-understanding-the-amazon-from-space/test_v2_file_mapping.csv/test_v2_file_mapping.csv'

sub = pd.read_csv(SUBMISSION_FILE)
# os.listdir('../working/')
for index in range(len(sub)):
    img = Image.open(TEST_IMG_PATH + sub['image_name'][index] + TEST_IMG_EXT)
    img = img.convert('RGB')
    img = transformations(img)
    Y_ = model(img.unsqueeze(0).to(device))
    sub['tags'][index] = ' '.join(list(dataset.mlb.inverse_transform(np.where(Y_.cpu().detach().numpy() > 0.5, 1, 0))[0]))
    if index % 10 == 0:
        print('{} Files Completed!')

In [None]:
sub.to_csv('submission_resnet18_2.csv', index = False)
os.listdir('../working/')