# Quantitative analyses
In this notebook, I include all of my quantitative anlayses/results.

## Setup
To pull from the GitHub repository in Colab:
```
%cd drive/MyDrive/Conservation\ Research/Code/counting-cranes
!git add .
!git stash
!git pull
```
----

In [1]:
#Mounting Google Drive...
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#Will have to restart runtime after running this cell!
!pip install -r "/content/drive/MyDrive/Conservation Research/Code/counting-cranes/requirements.txt"

Collecting torch==1.8.1
  Downloading torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl (804.1 MB)
[K     |████████████████████████████████| 804.1 MB 2.8 kB/s 
[?25hCollecting torchvision==0.9.1
  Downloading torchvision-0.9.1-cp37-cp37m-manylinux1_x86_64.whl (17.4 MB)
[K     |████████████████████████████████| 17.4 MB 118 kB/s 
[?25hCollecting torchtext==0.9.1
  Downloading torchtext-0.9.1-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)
[K     |████████████████████████████████| 7.1 MB 29.4 MB/s 
[?25hCollecting Pillow==8.1.0
  Downloading Pillow-8.1.0-cp37-cp37m-manylinux1_x86_64.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 45.7 MB/s 
[?25hCollecting jupyterlab==1.0.2
  Downloading jupyterlab-1.0.2-py2.py3-none-any.whl (17.4 MB)
[K     |████████████████████████████████| 17.4 MB 162 kB/s 
[?25hCollecting albumentations==1.0.0
  Downloading albumentations-1.0.0-py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 7.4 MB/s 
[?25hCollecting numpy==1.20

In [41]:
import json

config = json.load(open('/content/drive/MyDrive/Conservation Research/Code/counting-cranes/config.json', 'r'))
DATA_FP = config['data_filepath_colab']
CODE_FP = config['code_filepath_colab']
MODEL_SAVE_FP = config['model_saves_filepath_colab']
SEEDS = config['random_seeds']
FIXED_PERM = config['fixed_data_permutation']
ASPDNET_HYPERPARAMETERS = config['ASPDNet_params']
FASTER_RCNN_HYPERPARAMETERS = config['faster_rcnn_params']
tile_size = tuple(config['tile_size'])

In [42]:
import sys
import os
sys.path.append(CODE_FP) 
sys.path.append(os.path.join(CODE_FP, 'density_estimation'))
sys.path.append(os.path.join(CODE_FP, 'object_detection'))
sys.path.append(os.path.join(CODE_FP, 'density_estimation', 'ASPDNet'))

from bird_dataset import *
from utils import *
from ASPDNet_model import *
from ASPDNet.model import ASPDNet
from faster_rcnn_model import *

import torch
from torch.utils.data import DataLoader
from pytorch_lightning import seed_everything
import pandas as pd
from itertools import chain

## General stuff

In [43]:
#Establishing the datasets/dataloaders
bird_dataset_eval_aspdnet = BirdDataset(root_dir = DATA_FP, 
                                        transforms = get_transforms('density_estimation', train = False), 
                                        tiling_method = 'w_o_overlap', 
                                        annotation_mode = 'points', 
                                        tile_size = tile_size,
                                        sigma = 3)
bird_dataset_eval_frcnn = BirdDataset(root_dir = DATA_FP, 
                                      transforms = get_transforms('object_detection', train = False), 
                                      tiling_method = 'w_o_overlap', 
                                      annotation_mode = 'bboxes',
                                      tile_size = tile_size) 

#  recovering the test set
indices = FIXED_PERM
print(f'Indices used: {indices[28 : ]}')
dataset_test_aspdnet = torch.utils.data.Subset(bird_dataset_eval_aspdnet, indices[28 : ])
dataset_test_frcnn = torch.utils.data.Subset(bird_dataset_eval_frcnn, indices[28 : ]) 

#  wrapping datasets in dataloaders
dataloader_test_aspdnet = DataLoader(dataset_test_aspdnet, batch_size = ASPDNET_HYPERPARAMETERS['batch_size'], shuffle = False, collate_fn = collate_tiles_density) 
dataloader_test_frcnn = DataLoader(dataset_test_frcnn, batch_size = FASTER_RCNN_HYPERPARAMETERS['batch_size'], shuffle = False, collate_fn = collate_tiles_object_detection) 

Indices used: [21, 25, 39, 13, 30, 32, 10, 28, 4, 9, 2, 1]


In [44]:
#Loading both models
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#  ASPDNet...
save_name = 'ASPDNet_no_neg_densities_200_epochs_7.7.2021.ckpt' #TODO: switch w/custom perm model save!
aspdnet = ASPDNet(allow_neg_densities = False).to(device)
pl_model_aspdnet = ASPDNetLightning.load_from_checkpoint(os.path.join(MODEL_SAVE_FP, 'ASPDNet', save_name), model = aspdnet)
pl_model_aspdnet.model.eval()

#  Faster R-CNN... 
save_name = 'faster_rcnn_custom_permutation_7.30.2021.pth'
frcnn = get_faster_rcnn(backbone = 'ResNet50', num_classes = 2, **FASTER_RCNN_HYPERPARAMETERS['constructor_hyperparams']).to(device) 
frcnn.load_state_dict(torch.load(os.path.join(MODEL_SAVE_FP, 'faster_rcnn', 'final_models', save_name)))
pl_model_frcnn = FasterRCNNLightning(model = frcnn)
pl_model_frcnn.model.eval();

## **Performance at different AGLs**
How does performance change at different flight heights?

In [71]:
#Prepping the metadata stuff... to keep track of pred/true counts and AGL
image_metadata = pd.read_csv(os.path.join(DATA_FP, 'dataset_metadata.csv'), index_col = 0) #pulling in the metadata file

#  grabbing the names of the test images
test_indices = FIXED_PERM[28 : ]
test_image_names = list(np.array(bird_dataset_eval_frcnn.image_fps)[test_indices])

#  getting metadata for just the test images
test_metadata = image_metadata[image_metadata['Image_ID'].str.upper().isin([fp.upper() for fp in test_image_names])]

### _Faster R-CNN_ 

In [46]:
#Predicting on all imagery and recording true/pred count
img_performances = {}

with torch.no_grad():
  for tiles, targets, image_names, _ in dataloader_test_frcnn:
    tiles = [t.to(device) for t in tiles]

    preds = pl_model_frcnn(tiles)
    true_bboxes = [t['boxes'] for t in targets]
    pred_bboxes = [pred_dict['boxes'] for pred_dict in preds]
    
    pred_count = len(list(chain(*pred_bboxes)))
    true_count = len(list(chain(*true_bboxes)))

    img_name = '_'.join(image_names[0].split('_')[ : -1]) + '.TIF'
    img_performances[img_name] = (pred_count, true_count)

### _ASPDNet_

In [65]:
#Predicting on all imagery and recording true/pred count
img_performances = {}

with torch.no_grad():
  for content_1, content_2 in zip(dataloader_test_aspdnet, dataloader_test_frcnn):
    tiles, densities, counts = content_1
    _, _, image_names, _ = content_2
    tiles = tiles.to(device)

    preds = pl_model_aspdnet(tiles)   
    pred_count = float(preds.sum())
    true_count = sum(counts)

    img_name = '_'.join(image_names[0].split('_')[ : -1]) + '.TIF'
    img_performances[img_name] = (pred_count, true_count)

In [72]:
#Adding these values to the CSV
test_metadata = test_metadata.sort_values('Image_ID', axis = 0)

performance_tuples = [img_performances[k] for k in sorted(img_performances.keys())]
test_metadata['pred_counts'] = [p[0] for p in performance_tuples]
test_metadata['true_counts'] = [p[1] for p in performance_tuples]

test_metadata = test_metadata.drop('num_bboxes', axis = 1)
test_metadata

Unnamed: 0,Image_ID,AGL_feet,pred_counts,true_counts
38,20180320_212958_600_9152.tif,5000,2043.822266,1928
36,20180320_213531_728_9420.tif,5000,1670.977295,2298
30,20180321_220440_395_1832.tif,4000,1764.5271,2882
35,20180321_223216_097_2818.tif,4000,1960.76416,1758
29,FLIR2_20210321_201851_358_2510.TIF,3500,5103.20752,4283
2,FLIR2_20210321_205850_355_4974.TIF,2500,1742.001221,1742
10,FLIR2_20210321_223145_103_6726.TIF,2000,1854.508423,1862
14,FLIR2_20210321_223624_804_6818.TIF,2000,1940.706543,2158
17,FLIR2_20210321_224952_670_7004.TIF,3500,179.527466,116
19,FLIR2_20210321_225423_648_7070.TIF,3000,180.429092,190


In [73]:
#CHECK: we should be able to recover MPE/RMSE/MAE metrics here - DELETE EVENTUALLY!
pc = test_metadata['pred_counts']
tc = test_metadata['true_counts']

pe = 100 * np.absolute(pc - tc) / tc
ae = np.absolute(pc - tc)
se = (pc - tc) ** 2

pe.mean()

15.940415397293298

## **Performance in 2018 vs 2021 imagery**
How does performance on the 2021 imagery match up against 2018 imagery performance?

*(Here, I'm just adding to the existing metadata/predictions dataframe!)*

In [75]:
def sort_2018_v_2021(image_fp):
  return 2021 if image_fp.startswith('FLIR2') else 2018

test_metadata['year_taken'] = test_metadata['Image_ID'].apply(sort_2018_v_2021)
test_metadata

Unnamed: 0,Image_ID,AGL_feet,pred_counts,true_counts,year_taken
38,20180320_212958_600_9152.tif,5000,2043.822266,1928,2018
36,20180320_213531_728_9420.tif,5000,1670.977295,2298,2018
30,20180321_220440_395_1832.tif,4000,1764.5271,2882,2018
35,20180321_223216_097_2818.tif,4000,1960.76416,1758,2018
29,FLIR2_20210321_201851_358_2510.TIF,3500,5103.20752,4283,2021
2,FLIR2_20210321_205850_355_4974.TIF,2500,1742.001221,1742,2021
10,FLIR2_20210321_223145_103_6726.TIF,2000,1854.508423,1862,2021
14,FLIR2_20210321_223624_804_6818.TIF,2000,1940.706543,2158,2021
17,FLIR2_20210321_224952_670_7004.TIF,3500,179.527466,116,2021
19,FLIR2_20210321_225423_648_7070.TIF,3000,180.429092,190,2021
