In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim

# Ref: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py
# Install: pip install timm
import timm
from timm.models.efficientnet import default_cfgs 

from tensorboardX import SummaryWriter

import os
import shutil
import sys
import time
from glob import glob
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image

from torchvision import transforms as T

from sklearn.model_selection import train_test_split

device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
npy_path = glob("./datasets/training_data_5d/test/*.npy")
df = pd.Series(npy_path,name="npy_path").to_frame()
# df['classes'] = df['npy_path'].apply(lambda x: int(x.split('.')[1][-1])-1)
# df['area_id'] = df['npy_path'].apply(lambda x: x.split('_')[2])
df['area_id'] = df['npy_path'].apply(lambda x: x.split('_')[3])
df

Unnamed: 0,npy_path,area_id
0,./datasets/training_data_5d/test/test_0000_000...,0000
1,./datasets/training_data_5d/test/test_0001_000...,0001
2,./datasets/training_data_5d/test/test_0002_000...,0002
3,./datasets/training_data_5d/test/test_0003_000...,0003
4,./datasets/training_data_5d/test/test_0004_000...,0004
...,...,...
11811,./datasets/training_data_5d/test/test_0564_179...,0564
11812,./datasets/training_data_5d/test/test_0564_180...,0564
11813,./datasets/training_data_5d/test/test_0564_181...,0564
11814,./datasets/training_data_5d/test/test_0564_182...,0564


In [64]:
# Dataset Loader
class SatDataset(torch.utils.data.Dataset):
    def __init__(self,df):
        self.npy_path = list(df['npy_path'])
        self.area_id = list(df['area_id'])

    def __len__(self):
        return len(self.npy_path)
    def __getitem__(self, idx):
        data = np.load(self.npy_path[idx])
        data = torch.from_numpy(np.swapaxes(data, -3, -1)).float()
        return {'image': data, 'area_id': self.area_id[idx]}

test_dataset = SatDataset(df)


# dataloaders
batch_size = 1024
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
# class ClassifierModel(torch.nn.Module):
#     def __init__(self,num_class=4):
#         super(ClassifierModel, self).__init__() 
#         self.backbone = torch.nn.Sequential(
#             torch.nn.Conv2d(36, 48, kernel_size=5),
#             torch.nn.BatchNorm2d(48),
#             torch.nn.LeakyReLU(),
#             torch.nn.Conv2d(48, 64, kernel_size=5),
#             torch.nn.BatchNorm2d(64),
#             torch.nn.LeakyReLU(),
#             torch.nn.Conv2d(64, 128, kernel_size=5),
#             torch.nn.BatchNorm2d(128),
#             torch.nn.LeakyReLU(),
#             torch.nn.Conv2d(128, 256, kernel_size=4),
#             torch.nn.LeakyReLU(),
#             torch.nn.Flatten()
#             )
#         self.classifier = torch.nn.Sequential(
#             torch.nn.Linear(256, num_class),
#             torch.nn.BatchNorm1d(num_class),
#             # torch.nn.Softmax(dim=1)
#             )


#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.classifier(x)
#         return x
    
class ClassifierModel(torch.nn.Module):
    def __init__(self,num_class=4):
        super(ClassifierModel, self).__init__() 
        self.backbone = torch.nn.Sequential(
            torch.nn.Conv2d(5, 5, kernel_size=3, padding='same', bias = False),
            torch.nn.MaxPool2d(2),
            torch.nn.BatchNorm2d(5),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(5, 5, kernel_size=3, padding='same', bias = False),
            torch.nn.MaxPool2d(2),
            torch.nn.BatchNorm2d(5),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(5, 5, kernel_size=3, padding='same', bias = False),
            torch.nn.MaxPool2d(2),
            torch.nn.AvgPool2d(2),
            torch.nn.Flatten()
            )
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(5*12, 16),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(16, num_class),
            )


    def forward(self, x):
        x = torch.cat([self.backbone(x[:,i*5:i*5+5]) for i in range(12)],axis=-1)
        x = self.classifier(x)
        return x
    
model = ClassifierModel()
x = torch.rand(5,60,16,16)
model(x).shape

torch.Size([5, 4])

In [11]:
model = ClassifierModel()
model.load_state_dict(torch.load('./weight/M1-classifier_model_checkpoint_add_dropout/best_checkpoint.pth.tar')['state_dict'])
model.cuda().eval()

ClassifierModel(
  (backbone): Sequential(
    (0): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): BatchNorm2d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (11): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): Sequential(
    (0): Linear(in_features=60, out_features=16, bias=True)
    (1): 

In [67]:
pred_label = []
label_masks_score = []
area_ids = []
for batch in test_loader:
    batch_sz = batch['image'].shape[0]
    
    img = batch['image'].cuda()
    with torch.no_grad():
        logits = model(img).cpu().softmax(axis=1).numpy()
    pred_label += [logits]
    area_ids += batch['area_id']
    label_masks_score += [(img.cpu().numpy()!=0).reshape((batch_sz,-1)).sum(axis=1)/np.prod(batch['image'].shape[1:])]
pred_label = np.concatenate(pred_label)
label_masks_score = np.concatenate(label_masks_score)


In [68]:
pred_dict = dict()
for area_id,logit,mask_score in zip(area_ids,pred_label,label_masks_score):
    if area_id in pred_dict.keys():
        pred_dict[area_id] += mask_score*(logit)
    else:
        pred_dict[area_id] = mask_score*logit
pred_dict = {int(k):np.argmax(v) for k,v in pred_dict.items()}

In [69]:
df_output = pd.Series(pred_dict,name='crop_type').to_frame()
df_output['crop_type'] += 1
df_output

Unnamed: 0,crop_type
0,1
1,3
2,1
3,2
4,1
...,...
560,1
561,1
562,1
563,1


In [70]:
df_output.to_csv("./submissions/M1_DeepLearningClassification_addDropout_output.csv")

In [58]:
df_output['crop_type'].value_counts()

1    218
2    158
3    157
4     31
Name: crop_type, dtype: int64

# Predict in Validate

In [7]:
npy_path = glob("./datasets/training_data_5d/train/*.npy")
df = pd.Series(npy_path,name="npy_path").to_frame()
df['classes'] = df['npy_path'].apply(lambda x: int(x.split('.')[1][-1])-1)
# df['area_id'] = df['npy_path'].apply(lambda x: x.split('_')[2])
df['area_id'] = df['npy_path'].apply(lambda x: x.split('_')[3])
df

Unnamed: 0,npy_path,classes,area_id
0,./datasets/training_data_5d/train/train_0000_0...,0,0000
1,./datasets/training_data_5d/train/train_0001_0...,0,0001
2,./datasets/training_data_5d/train/train_0001_0...,0,0001
3,./datasets/training_data_5d/train/train_0002_0...,0,0002
4,./datasets/training_data_5d/train/train_0002_0...,0,0002
...,...,...,...
5914,./datasets/training_data_5d/train/train_1315_0...,2,1315
5915,./datasets/training_data_5d/train/train_1316_0...,2,1316
5916,./datasets/training_data_5d/train/train_1316_0...,2,1316
5917,./datasets/training_data_5d/train/train_1316_0...,2,1316


In [8]:
train_id,test_id = train_test_split(df['area_id'].unique(), test_size=0.1, random_state=42)

# df_train, df_val = train_test_split(df, test_size=0.1, random_state=42)
df_train = df.loc[[(i in train_id) for i in df['area_id']]]
df_val = df.loc[[(i in test_id) for i in df['area_id']]]

df_train.shape,df_val.shape

((5446, 3), (473, 3))

In [13]:
# Dataset Loader
class SatDataset(torch.utils.data.Dataset):
    def __init__(self,df):
        self.npy_path = list(df['npy_path'])
        self.label = list(df['classes'])
        self.area_id = list(df['area_id'])

    def __len__(self):
        return len(self.npy_path)
    def __getitem__(self, idx):
        data = np.load(self.npy_path[idx])
        data = torch.from_numpy(np.swapaxes(data, -3, -1)).float()
        return {'image': data, 'label': self.label[idx],'area_id':self.area_id[idx]}

val_dataset = SatDataset(df_val)


# dataloaders
batch_size = 1024
test_loader  = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [14]:
pred_label = []
label_masks_score = []
area_ids = []

for batch in test_loader:
    batch_sz = batch['image'].shape[0]
    
    img = batch['image'].cuda()
    with torch.no_grad():
        logits = model(img).cpu().softmax(axis=1).numpy()
    pred_label += [logits]
    area_ids += batch['area_id']
    label_masks_score += [(img.cpu().numpy()!=0).reshape((batch_sz,-1)).sum(axis=1)/np.prod(batch['image'].shape[1:])]
pred_label = np.concatenate(pred_label)
label_masks_score = np.concatenate(label_masks_score)


In [15]:
pred_dict = dict()
for area_id,logit,mask_score in zip(area_ids,pred_label,label_masks_score):
    if area_id in pred_dict.keys():
        pred_dict[area_id] += mask_score*(logit)
    else:
        pred_dict[area_id] = mask_score*logit
pred_dict = {int(k):np.argmax(v) for k,v in pred_dict.items()}

In [27]:
df_val_res = pd.DataFrame([{int(i):j for i,j in zip(df_val.area_id,df_val.classes)},pred_dict],index = ['act_label','pred_label']).T
act_label = df_val_res.act_label.values
pred_label = df_val_res.pred_label.values
(act_label==pred_label).sum()/len(act_label)

0.6439393939393939

In [28]:
from sklearn.metrics import classification_report
print(classification_report(act_label, pred_label))

              precision    recall  f1-score   support

           0       0.70      0.70      0.70        60
           1       0.68      0.71      0.70        21
           2       0.57      0.70      0.63        37
           3       0.50      0.14      0.22        14

    accuracy                           0.64       132
   macro avg       0.61      0.56      0.56       132
weighted avg       0.64      0.64      0.63       132



In [11]:
import pandas as pd
import numpy as np
df = pd.read_csv('submissions/S03_M2_ClassificationTraining_CONV1D_Result.csv')
df = df[['crop_type']]
df = pd.Series(df.iloc[:12]['crop_type'].tolist()+[np.nan]+df.iloc[12:]['crop_type'].tolist(),name='crop_type').to_frame()
df = df.dropna()
df

Unnamed: 0,crop_type
0,4.0
1,2.0
2,2.0
3,2.0
4,1.0
...,...
560,2.0
561,3.0
562,1.0
563,1.0


In [None]:
df.to_csv('')