<a href="https://colab.research.google.com/github/edrian-liao/harvestnet/blob/main/notebooks/HarvestNet2_Evaluate_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb -qU
!pip install torchmetrics -qU
!pip install accelerate -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.8/266.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m841.5/841.5 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import time
import torch
from torch.utils.data import DataLoader
import torchvision
import wandb
import numpy as np
from torchmetrics import Accuracy, F1Score, AUROC, Precision, Recall
from accelerate import Accelerator
from collections import defaultdict
import cv2

In [3]:
from tools.dataset import SkysatLabelled
from tools.config import Config_Resnet

In [4]:
# -----------------
# CONFIG
# -----------------

config = Config_Resnet()


# Seed
torch.manual_seed(config.seed)
np.random.seed(config.seed)


accelerator = Accelerator(
    log_with="wandb",
    mixed_precision=config.mixed_precision,
)
device = accelerator.device

# Log on each process the small summary:
if accelerator.is_main_process:
    print(f"Training/evaluation parameters:")
    print(config.__dict__)

accelerator.init_trackers(
    config.wandb_project,
    config=config,
    init_kwargs={
        "wandb": {
            "group": config.wandb_group,
            "reinit": True,
            "dir": os.path.join(config.working_dir),
        }
    },
)

LOGGING = True
if LOGGING:
    accelerator.init_trackers(
        config.wandb_project,
        config=config,
        init_kwargs={
            "wandb": {
                "group": config.wandb_group,
                "reinit": True,
                "dir": os.path.join(config.working_dir),
            }
        },
    )


# -----------------
# DATASET
# -----------------
if accelerator.is_main_process:
    print("Loading datasets")
transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.ToPILImage(),
        torchvision.transforms.Resize((224, 224)),
        # torchvision.transforms.RandomHorizontalFlip(),
        # torchvision.transforms.RandomVerticalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            mean=[0.412, 0.368, 0.326], std=[0.110, 0.097, 0.098]
        ),  # our dataset vals
    ]
)

Training/evaluation parameters:
{'working_dir': 'harvest-piles', 'dataset_path': 'dataset', 'wandb_project': 'harvest-piles', 'wandb_group': 'resnet50', 'seed': 2023, 'scheduler': 'one_cycle_lr', 'lr': 0.001, 'optimizer': 'madgrad', 'mixed_precision': 'fp16', 'num_train_epochs': 30, 'batch_size': 32}


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='0.012 MB of 0.012 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Loading datasets


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
!unzip "/content/drive/MyDrive/Academics/Spring 2024/ECE 661/Final Project/Dataset.zip" -d "/content/dataset"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/dataset/skysat_images/1572.tif  
  inflating: /content/dataset/skysat_images/157200.tif  
  inflating: /content/dataset/skysat_images/157201.tif  
  inflating: /content/dataset/skysat_images/157202.tif  
  inflating: /content/dataset/skysat_images/157203.tif  
  inflating: /content/dataset/skysat_images/157204.tif  
  inflating: /content/dataset/skysat_images/157205.tif  
  inflating: /content/dataset/skysat_images/157206.tif  
  inflating: /content/dataset/skysat_images/157207.tif  
  inflating: /content/dataset/skysat_images/157208.tif  
  inflating: /content/dataset/skysat_images/157209.tif  
  inflating: /content/dataset/skysat_images/157210.tif  
  inflating: /content/dataset/skysat_images/157211.tif  
  inflating: /content/dataset/skysat_images/157212.tif  
  inflating: /content/dataset/skysat_images/157213.tif  
  inflating: /content/dataset/skysat_images/157214.tif  
  inflating: /content/dat

In [42]:
# filter the broken images from train and test csv
import pandas as pd
test = pd.read_csv('dataset/labels_all.csv')
test = test.iloc[:, 1:]
test_filtered = test[~(test['filename']=='6014.tif')]
test_filtered.reset_index(inplace=True)

print(test_filtered.shape)

(6914, 14)


In [43]:
for index, row in test_filtered.iterrows():
    if int(row["filename"][:-4]) != index:
        print (index)
        break

# drop level_0 and index columns
test_filtered.drop(["index"], axis=1, inplace=True)
print(test_filtered.shape)

3
(6914, 13)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_filtered.drop(["index"], axis=1, inplace=True)


In [45]:
test_filtered.to_csv("dataset/labels_all.csv")

In [60]:
test_dataset = SkysatLabelled(
    os.path.join(config.dataset_path, "test.csv"),
    os.path.join(config.dataset_path, "skysat_images"),
    transform
)

In [61]:
len(test_dataset)

1383

In [62]:
test_dataloader = DataLoader(
    test_dataset,
    batch_size=config.batch_size,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

In [63]:
class ResNet50(torch.nn.Module):
    def __init__(self, num_classes, model_weights_path):
        super(ResNet50, self).__init__()
        self.resnet50 = torchvision.models.resnet50()
        num_features = self.resnet50.fc.out_features
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(num_features, num_classes),
            torch.nn.Sigmoid(),
        )
        # Load the state dictionary
        state_dict = torch.load(model_weights_path)
        # Assign the loaded state dictionary to the model
        self.load_state_dict(state_dict)

    def forward(self, x):
        logits = self.resnet50(x)
        return self.fc(logits)

# Example usage:
num_classes = 1  # Number of output classes
model_weights_path = 'best_resnet_augment_new_data.pt'
model = ResNet50(num_classes, model_weights_path)

In [68]:
import torch
import torch.nn as nn

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = nn.BCEWithLogitsLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

# Create the Focal Loss criterion
# criterion = FocalLoss(alpha=1, gamma=2)
criterion = torch.nn.BCELoss()

accuracy = Accuracy(task="binary")
f1_score = F1Score(task="binary")
auroc = AUROC(task="binary")
precision = Precision(task="binary")
recall = Recall(task="binary")


In [69]:
# -----------------
# ACCELERATOR
# -----------------
model, test_dataloader = accelerator.prepare(
    model, test_dataloader
)
accuracy, f1_score, auroc, precision, recall = accelerator.prepare(
    accuracy, f1_score, auroc, precision, recall
)

In [70]:
epoch_loss = 0
sum_metrics = defaultdict(float)

model.eval()

with torch.no_grad():
    for x, y, _ in test_dataloader:
        pred = model(x)
        loss = criterion(pred, y)
        evals = {
            "acc": accuracy(pred, y),
            "f1": f1_score(pred, y),
            "auroc": auroc(pred, y),
            "precision": precision(pred, y),
            "recall": recall(pred, y),
        }
        for k, v in evals.items():
            sum_metrics[k] += v

        epoch_loss += loss.item()

epoch_loss /= len(test_dataloader)
avg_metrics = {k: v / len(test_dataloader) for k, v in sum_metrics.items()}
print("Test: ", avg_metrics)

Test:  {'acc': tensor(0.7176, device='cuda:0'), 'f1': tensor(0.5287, device='cuda:0'), 'auroc': tensor(0.8307, device='cuda:0'), 'precision': tensor(0.9152, device='cuda:0'), 'recall': tensor(0.3868, device='cuda:0')}


In [None]:
len(predictions)

10188

In [None]:
predictions = np.where(np.array(predictions) >= 0.5, 1, 0)

In [None]:
import pandas as pd
df = pd.read_csv("dataset/train_buff_pos_subdivided.csv")

In [None]:
df

Unnamed: 0.1,Unnamed: 0,filename,lat_1,lon_1,lat_2,lon_2,activity,altitude,lat_mean,lon_mean,year,month,day,group
0,3807,16965_1.tif,14.114066,36.939437,14.111717,36.941785,1.0,783,14.112892,36.940611,2021.0,4.0,3.0,-1
1,3807,16965_2.tif,14.114066,36.939437,14.111717,36.941785,1.0,783,14.112892,36.940611,2021.0,4.0,3.0,-1
2,3807,16965_3.tif,14.114066,36.939437,14.111717,36.941785,1.0,783,14.112892,36.940611,2021.0,4.0,3.0,-1
3,3807,16965_4.tif,14.114066,36.939437,14.111717,36.941785,1.0,783,14.112892,36.940611,2021.0,4.0,3.0,-1
4,4361,22851_1.tif,12.960310,39.512394,12.957991,39.514714,1.0,2709,12.959150,39.513554,2022.0,12.0,8.0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10183,2253,8998_4.tif,14.110046,37.358114,14.107704,37.360456,1.0,1276,14.108875,37.359285,2020.0,11.0,19.0,-1
10184,2892,12217_1.tif,13.858246,36.872531,13.855908,36.874870,1.0,792,13.857077,36.873701,2020.0,12.0,2.0,4
10185,2892,12217_2.tif,13.858246,36.872531,13.855908,36.874870,1.0,792,13.857077,36.873701,2020.0,12.0,2.0,4
10186,2892,12217_3.tif,13.858246,36.872531,13.855908,36.874870,1.0,792,13.857077,36.873701,2020.0,12.0,2.0,4


In [None]:
df["activity"] = predictions
df.to_csv("dataset/train_mod_pos_subdivided.csv")

In [None]:
test_dataset = SkysatLabelled(
    os.path.join(config.dataset_path, "test_buff_pos_subdivided.csv"),
    os.path.join(config.dataset_path, "patch_images"),
    transform
)

In [None]:
test_dataloader = DataLoader(
    test_dataset,
    batch_size=config.batch_size,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

In [None]:
model.eval()
predictions=[]
with torch.no_grad():
  for x, y, _ in test_dataloader:
    pred = model(x)
    predictions.extend(np.array(pred.view(-1).cpu()))

  self.pid = os.fork()
  self.pid = os.fork()


In [None]:
predictions = np.where(np.array(predictions) >= 0.5, 1, 0)

In [None]:
df = pd.read_csv("dataset/test_buff_pos_subdivided.csv")
df

Unnamed: 0.1,Unnamed: 0,filename,lat_1,lon_1,lat_2,lon_2,activity,altitude,lat_mean,lon_mean,year,month,day,group
0,1175,3219_1.tif,14.205127,36.610659,14.202785,36.613002,1.0,618,14.203956,36.611830,2020.0,11.0,13.0,19
1,1175,3219_2.tif,14.205127,36.610659,14.202785,36.613002,1.0,618,14.203956,36.611830,2020.0,11.0,13.0,19
2,1175,3219_3.tif,14.205127,36.610659,14.202785,36.613002,1.0,618,14.203956,36.611830,2020.0,11.0,13.0,19
3,1175,3219_4.tif,14.205127,36.610659,14.202785,36.613002,1.0,618,14.203956,36.611830,2020.0,11.0,13.0,19
4,4946,38463_1.tif,12.483937,37.404269,12.481618,37.406588,1.0,1981,12.482778,37.405429,2020.0,12.0,20.0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2403,6747,157330_4.tif,11.971195,38.980651,11.968880,38.983003,1.0,1973,11.970037,38.981827,2022.0,11.0,5.0,-1
2404,4433,23133_1.tif,13.064686,39.526311,13.062366,39.528630,1.0,2331,13.063526,39.527471,2022.0,12.0,8.0,-1
2405,4433,23133_2.tif,13.064686,39.526311,13.062366,39.528630,1.0,2331,13.063526,39.527471,2022.0,12.0,8.0,-1
2406,4433,23133_3.tif,13.064686,39.526311,13.062366,39.528630,1.0,2331,13.063526,39.527471,2022.0,12.0,8.0,-1


In [None]:
df["activity"] = predictions
df.to_csv("dataset/test_mod_pos_subdivided.csv")