# Qualitative analyses
In this notebook, I include all of my qualitative anlayses/results.

## Setup
To pull from the GitHub repository in Colab:
```
%cd drive/MyDrive/Conservation\ Research/Code/counting-cranes
!git add .
!git stash
!git pull
```
----

In [1]:
#Mounting Google Drive...
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#Will have to restart runtime after running this cell!
!pip install -r "/content/drive/MyDrive/Conservation Research/Code/counting-cranes/requirements.txt"

Collecting torch==1.8.1
  Downloading torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl (804.1 MB)
[K     |████████████████████████████████| 804.1 MB 2.7 kB/s 
[?25hCollecting torchvision==0.9.1
  Downloading torchvision-0.9.1-cp37-cp37m-manylinux1_x86_64.whl (17.4 MB)
[K     |████████████████████████████████| 17.4 MB 118 kB/s 
[?25hCollecting torchtext==0.9.1
  Downloading torchtext-0.9.1-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)
[K     |████████████████████████████████| 7.1 MB 15.4 MB/s 
[?25hCollecting Pillow==8.1.0
  Downloading Pillow-8.1.0-cp37-cp37m-manylinux1_x86_64.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 25.3 MB/s 
[?25hCollecting jupyterlab==1.0.2
  Downloading jupyterlab-1.0.2-py2.py3-none-any.whl (17.4 MB)
[K     |████████████████████████████████| 17.4 MB 146 kB/s 
[?25hCollecting albumentations==1.0.0
  Downloading albumentations-1.0.0-py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 6.1 MB/s 
[?25hCollecting numpy==1.20

In [9]:
import json

config = json.load(open('/content/drive/MyDrive/Conservation Research/Code/counting-cranes/config.json', 'r'))
DATA_FP = config['data_filepath_colab']
CODE_FP = config['code_filepath_colab']
MODEL_SAVE_FP = config['model_saves_filepath_colab']
SEED = config['random_seed']
ASPDNET_HYPERPARAMETERS = config['ASPDNet_params']
FASTER_RCNN_HYPERPARAMETERS = config['faster_rcnn_params']
tile_size = tuple(config['tile_size'])

In [3]:
import sys
import os
sys.path.append(CODE_FP) 
sys.path.append(os.path.join(CODE_FP, 'density_estimation'))
sys.path.append(os.path.join(CODE_FP, 'object_detection'))
sys.path.append(os.path.join(CODE_FP, 'density_estimation', 'ASPDNet'))

from bird_dataset import *
from ASPDNet_model import *
from ASPDNet.model import ASPDNet
from faster_rcnn_model import *

import torch
from torch.utils.data import DataLoader
from pytorch_lightning import seed_everything

## General stuff

In [4]:
#Setting our random seed for all operations (PyTorch, numpy, python.random)
seed_everything(SEED);

Global seed set to 1693


In [10]:
#Establishing the datasets/dataloaders
bird_dataset_eval_aspdnet = BirdDataset(root_dir = DATA_FP, 
                                        transforms = get_transforms('density_estimation', train = False), 
                                        tiling_method = 'w_o_overlap', 
                                        annotation_mode = 'points', 
                                        tile_size = tile_size,
                                        sigma = 3)
bird_dataset_eval_frcnn = BirdDataset(root_dir = DATA_FP, 
                                      transforms = get_transforms('object_detection', train = False), 
                                      tiling_method = 'w_o_overlap', 
                                      tile_size = tile_size) 

#  recovering the test set
indices = torch.randperm(len(bird_dataset_eval_aspdnet)).tolist() #TODO: switch w/the custom permutation once you have the dataset all in order!
dataset_test_aspdnet = torch.utils.data.Subset(bird_dataset_eval_aspdnet, indices[28 : ])
dataset_test_frcnn = torch.utils.data.Subset(bird_dataset_eval_frcnn, indices[28 : ])

#  wrapping datasets in dataloaders
dataloader_test_aspdnet = DataLoader(dataset_test_aspdnet, batch_size = ASPDNET_HYPERPARAMETERS['batch_size'], shuffle = False, collate_fn = collate_tiles_density) 
dataloader_test_frcnn = DataLoader(dataset_test_frcnn, batch_size = FASTER_RCNN_HYPERPARAMETERS['batch_size'], shuffle = False, collate_fn = collate_tiles_object_detection) 

In [13]:
#Loading both models
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#  ASPDnet...
save_name = 'ASPDNet_no_neg_densities_200_epochs_7.7.2021.ckpt'
aspdnet = ASPDNet().to(device)
pl_model_aspdnet = ASPDNetLightning.load_from_checkpoint(os.path.join(MODEL_SAVE_FP, 'ASPDNet', save_name), model = aspdnet)

#  Faster R-CNN...
save_name = 'model1_6.23.2021.pth'
frcnn = get_faster_rcnn(backbone = 'ResNet50', num_classes = 2, **FASTER_RCNN_HYPERPARAMETERS['constructor_hyperparams']).to(device) 
frcnn.load_state_dict(torch.load(os.path.join(MODEL_SAVE_FP, 'faster_rcnn', '40_epoch_runs', save_name)))
pl_model_frcnn = FasterRCNNLightning(model = frcnn)

## Best/worst tiles
Seeing where each model struggles the most.

**NOTE: we'll split into negative (use MPE) and non-negative (use MAE) tiles.**

In [None]:
#Predict on all test tiles w/both models + save the predicted counts
#  - maybe save each tile w/it's true/pred counts in a dict
#  - OR we could save the parent img num + tile num and re-extract...

In [None]:
#Predicting on tiles for Faster R-CNN
#  When we save the tile results, we need to be able to pull it out of the corresponding BirdDataset subset again...
tile_results = {}
for i, (tiles, targets, img_fps, annot_fps) in enumerate(dataloader_test_frcnn): #TODO: will probably have to run this w/GPUs enabled!
  tiles = [t.to(device) for t in tiles]

  pred_counts = pl_model_frcnn.predict_counts(tiles)
  true_counts = [len(t['boxes'].tolist()) for t in targets]

  for j, (pred, truth) in enumerate(zip(pred_counts, true_counts)):
    tile_results[(i, j)] = (pred, truth) #saving pred count, true count for each tile

In [None]:
#Calculate MAE/MPE for tiles + pull out the tiles w/the highest/lowest errors
#  AS WE HAVE THINGS SET UP: this would be ith parent image in the corrsponding BirdDataset subset and the jth tile in the tiling w/o overlap set