In [1]:
import os
import numpy as np
import torch
import pandas as pd
import geopandas
import albumentations
import pytorch_lightning
from functools import partial
from argparse import ArgumentParser, Namespace

from pytorch_lightning.callbacks import ModelCheckpoint, LambdaCallback
from torchvision.models import resnet18

from dataset import BaseDataModule
from utils import generate_or_read_labels
from pytorch_model import BaseModel, pytorch_transform, log_weights
from runners.predict_pytorch import predict
from pytorch_model_resnet import ResnetModel

In [2]:
options = {
    'training_image_path': '/home/fila/data/soil_line/unusable/CH', 
    'validation_image_path': '/home/fila/data/soil_line/unusable/CH', 
    'shape_path': '/home/fila/data/soil_line/unusable/fields_v2/fields.shp', 
    'excel_path': '/home/fila/data/soil_line/unusable/fields_v2/flds_all_good.xls', 
    'log_path': '/home/soilnn/logs/unusable', 
    'n_training_batches': 100, 
    'n_validation_batches': 10, 
    'batch_size': 64, 
    'n_processes': 16, 
    'buffer_size': 1, 
    'buffer_update_size': 16, 
    'image_size': 128, 
    'resolution': 30.0
}
options = Namespace(**options)

In [3]:
trainer = pytorch_lightning.Trainer(
    gpus=1 if torch.cuda.is_available() else 0,
    max_epochs=120,
    limit_train_batches=options.n_training_batches,
    limit_val_batches=options.n_validation_batches,
    val_check_interval=options.n_training_batches,
    default_root_dir=options.log_path,
    num_sanity_val_steps=0,
    log_every_n_steps=10,
    gradient_clip_val=0.5,
    callbacks=[
        ModelCheckpoint(every_n_epochs=12, filename='{epoch:02d}-{val_precision:.2f}', save_top_k=-1),
        # LambdaCallback(on_after_backward=log_weights)  # uncomment for logging weights and grads
    ]
)
fields = geopandas.read_file(options.shape_path).set_index('name')
label_lambda = partial(
    generate_or_read_labels,
    excel_path=options.excel_path,
    fields=fields
)
data_module = BaseDataModule(
    training_labels=label_lambda(
        image_path=options.training_image_path,
        label_path=os.path.join(os.path.dirname(options.log_path), 'training_173174.csv')
    ),
    validation_labels=label_lambda(
        image_path=options.validation_image_path,
        label_path=os.path.join(os.path.dirname(options.log_path), 'validation_173174.csv')
    ),
    training_image_path=options.training_image_path,
    validation_image_path=options.validation_image_path,
    training_transform=partial(
        pytorch_transform,
        augmentation=albumentations.Compose([
            albumentations.HorizontalFlip(),
            albumentations.VerticalFlip()
        ]),
    ),
    validation_transform=pytorch_transform,
    fields=fields,
    batch_size=options.batch_size,
    n_processes=options.n_processes,
    buffer_size=options.buffer_size,
    buffer_update_size=options.buffer_update_size,
    image_size=options.image_size,
    resolution=options.resolution,
    get_current_epoch=lambda: trainer.current_epoch
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [4]:
model = ResnetModel.load_from_checkpoint('/home/soilnn/logs/unusable/lightning_logs/version_113/checkpoints/epoch=119-val_precision=0.89.ckpt')
model.eval()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

  stream(template_mgs % msg_args)


ResnetModel(
  (model): Sequential(
    (0): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Module(
        (0): Module(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): Module(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e

In [None]:
trainer.fit(model, datamodule=data_module)

In [6]:
models_dir = '/home/soilnn/logs/unusable/lightning_logs/version_114/checkpoints'
for ckpt in os.listdir(models_dir):
    model_path = models_dir + '/' + ckpt
    model = ResnetModel.load_from_checkpoint(model_path)
    model.eval()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    
    train_predictions = predict(model, options, options.training_image_path, label_path='/home/soilnn/logs/training_173174.csv')
    result_path = model_path + '_results/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    train_predictions.to_csv(os.path.join(result_path, 'train_predictions_173174.csv'))
    
    # valid_predictions = predict(model, options, options.validation_image_path, label_path='/home/soilnn/logs/validation.csv')
    # result_path = model_path + '_results/'
    # if not os.path.exists(result_path):
    #     os.makedirs(result_path)
    # valid_predictions.to_csv(os.path.join(result_path, 'validation_predictions.csv'))

In [12]:
train_predictions = pd.read_csv('/home/soilnn/logs/unusable/lightning_logs/version_112/checkpoints/epoch=119-val_precision=0.91.ckpt_results/train_predictions_173174.csv', index_col=0)
train_labels = data_module.training_labels
valid_labels = data_module.validation_labels

In [13]:
# adjust weights for bad scenes
pred__ = train_predictions
labels = train_labels

file_names_false = data_module.trd.dataset.base_file_names[0]
weights_false = ((pred__ > 0.55)[labels==0].sum(axis=1))**2 + 100
weights_false = weights_false[file_names_false]
weights_false = weights_false / weights_false.sum()

file_names_true = data_module.trd.dataset.base_file_names[1]
weights_true = ((pred__ < 0.5)[labels==1].sum(axis=1))**2 + 3
weights_true = weights_true[file_names_true]
weights_true = weights_true / weights_true.sum()

column_weights_false = pred__[labels==0] ** 2 + 0.01
column_weights_false = column_weights_false.loc[file_names_false]
column_weights_false = column_weights_false.div(column_weights_false.sum(axis=1), axis=0)

column_weights_true = (1 - pred__[labels==1]) ** 2 + 0.01
column_weights_true = column_weights_true.loc[file_names_true]
column_weights_true = column_weights_true.div(column_weights_true.sum(axis=1), axis=0)

data_module.trd.dataset.base_file_name_weights[0] = weights_false.to_numpy()
data_module.trd.dataset.column_weights[0] = column_weights_false
data_module.trd.dataset.base_file_name_weights[1] = weights_true.to_numpy()
data_module.trd.dataset.column_weights[1] = column_weights_true

# self = data_module.trd.dataset
# self.base_file_name_weights = [[1 / len(self.base_file_names[i]) for _ in self.base_file_names[i]] for i in
#                                range(self.n_classes)]
# self.column_weights = [
#     pd.DataFrame(data=(self.labels.to_numpy() == i) / (self.labels.to_numpy() == i).sum(axis=1, keepdims=True),
#                  index=self.labels.index,
#                  columns=self.labels.columns) for i in range(self.n_classes)]