<a href="https://colab.research.google.com/github/YolandaMDavis/NSSADNN_IQA/blob/wildtrack-iqa/wildtrack_multitask_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Only needed to copy data to local drive can be skipped if zip file is already available in working folder
import shutil
from zipfile import ZipFile

# mount google drive
from google.colab import drive
drive.mount('/content/drive') # for google colab. adjust accordingly
PARENT_DIR = '/content/drive/MyDrive/Wildtrack Group/IQA' 

# copy and extract tar file
shutil.copy(PARENT_DIR + '/data/WildTrack_Raw.zip', 'WildTrack_Raw.zip')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


'WildTrack_Raw.zip'

In [2]:
# Clone repo and copy in images. change working directory to repo's wildtrack branch

!git clone https://github.com/YolandaMDavis/NSSADNN_IQA.git
!mv WildTrack_Raw.zip NSSADNN_IQA/.
%cd "NSSADNN_IQA"
!git checkout wildtrack-iqa

with ZipFile('WildTrack_Raw.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall()


Cloning into 'NSSADNN_IQA'...
remote: Enumerating objects: 80, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (70/70), done.[K
remote: Total 80 (delta 37), reused 32 (delta 9), pack-reused 0[K
Unpacking objects: 100% (80/80), done.
/content/NSSADNN_IQA
Branch 'wildtrack-iqa' set up to track remote branch 'wildtrack-iqa' from 'origin'.
Switched to a new branch 'wildtrack-iqa'


In [1]:
# director variables
import os
root_dir = '/content/NSSADNN_IQA'
data_dir = root_dir + '/RAW'
image_reference_file_suffix = '_image_references.csv'
%cd "/content/NSSADNN_IQA"

/content/NSSADNN_IQA


In [2]:
import os
import csv
import random
import torch
import yaml

from torch.utils.data import Dataset



def generate_data_files(sample_percentage=1):

    image_reference_list = []

    subdirectories = list(os.walk(data_dir, topdown=False))[:-1]
    for subdir in subdirectories:
        image_location = subdir[0]
        images = subdir[2]
        species_rating = image_location.rsplit('/', 1)[-1].replace('_', ' ')
        score = int(species_rating.rsplit(' ', 1)[-1])
        species_class = species_rating.rsplit(' ', 1)[:-1][0]
        if len(species_class.rsplit(' ', 1)) > 1:
            species = species_class.rsplit(' ')[0]
            animal_class = ' '.join(species_class.rsplit(' ')[1:])
        else:
            animal_class = 'Unknown'
            species = species_class

        for image in images:
            image_reference = (image_location, species, animal_class, image, score)
            image_reference_list.append(image_reference)

    # shuffle then split
    seed = 1234
    random.Random(seed).shuffle(image_reference_list)
    training = image_reference_list[:int(len(image_reference_list) * 0.6 * sample_percentage)]
    validation = image_reference_list[-int(len(image_reference_list) * 0.2 * sample_percentage):]
    testing = image_reference_list[-int(len(image_reference_list) * 0.2 * sample_percentage):]

    # generated reference splits
    for dataset in [('training', training), ('validation', validation), ('testing', testing)]:
        ref_file_name = root_dir + '/' + dataset[0] + image_reference_file_suffix
        with open(ref_file_name, 'w', newline='') as csvfile:
            image_ref_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
            image_ref_writer.writerows(dataset[1])


In [3]:
# take a percentage of the full data set as a training/test/validation sample
sample_size=.1
generate_data_files(sample_size)

In [4]:
import numpy as np
from scipy import stats
import torch.nn as nn
import random
from network import NSSADNN
from WildTrackDataset import WildTrackDataset

In [5]:
save_model = root_dir + "/model.pth"

seed = random.randint(10000000, 99999999)

torch.manual_seed(seed)
np.random.seed(seed)
print("seed:", seed)

config = {}
config["patch_size"] = 32
config["stride"] = 16

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

#device = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = WildTrackDataset(root_dir + '/' + 'training' + image_reference_file_suffix, config, "train")
val_dataset = WildTrackDataset(root_dir + '/' + 'validation' + image_reference_file_suffix, config, "validation")
test_dataset = WildTrackDataset(root_dir + '/' + 'testing' + image_reference_file_suffix, config, "testing")

seed: 68121562
Processing file number:0
Processing file number:1
Processing file number:2
Processing file number:3
Processing file number:4
Processing file number:5
Processing file number:6
Processing file number:7
Processing file number:8
Processing file number:9
Processing file number:10
Processing file number:11
Processing file number:12
Processing file number:13
Processing file number:14
Processing file number:15
Processing file number:16
Processing file number:17
Processing file number:18
Processing file number:19
Processing file number:20
Processing file number:21
Processing file number:22
Processing file number:23
Processing file number:24
Processing file number:25
Processing file number:26
Processing file number:27
Processing file number:28
Processing file number:29
Processing file number:30
Processing file number:31
Processing file number:32
Processing file number:33
Processing file number:34
Processing file number:35
Processing file number:36
Processing file number:37
Process

In [6]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
batch_size = 16
epochs = 10000
lr = 0.000001

train_loader = torch.utils.data.DataLoader(train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            pin_memory=True,
                                            num_workers=0,  drop_last=True )

val_loader = torch.utils.data.DataLoader(val_dataset)
valnum = len(val_dataset.label)


test_loader = torch.utils.data.DataLoader(test_dataset)
testnum = len(test_dataset.label)

model = NSSADNN().to(device)
classify_criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
best_SROCC = -1

# training 
for epoch in range(epochs):
      # train
      model.train()
      LOSS_all = 0
      for i, (label, features) in enumerate(train_loader):
          label = label.type(torch.FloatTensor) 
          label = label.to(device).reshape(-1,1)
          features = features.to(device).float()
          optimizer.zero_grad()
          results = model(features)         
          loss = classify_criterion(results, label)
          loss.backward()
          optimizer.step()
          LOSS_all += float(loss.item())
          
      train_loss_all = LOSS_all / (i + 1)
      
      print("Epoc {},Quality Loss={:.3f}".format(epoch,train_loss_all))
      
      # val
      y_pred = np.zeros(valnum)
      y_val = np.zeros(valnum)
      model.eval()
      L = 0
      with torch.no_grad():
          for i, (label, features) in enumerate(val_loader):
              label = label.type(torch.FloatTensor) 
              label = label.to(device).reshape(-1,1)
              y_val[i] = label.item()
              features = features.to(device).float()
              results = model(features)
              y_pred[i] = (results > .5).float() * 1 
              loss = classify_criterion(results, label)
              L = L + loss.item()
      
          val_loss = L / (i + 1)
          val_SROCC = stats.spearmanr(y_pred, y_val)[0]
          val_PLCC = stats.pearsonr(y_pred, y_val)[0]
          val_KROCC = stats.stats.kendalltau(y_pred, y_val)[0]
          val_RMSE = np.sqrt(((y_pred - y_val) ** 2).mean())

           
      print("Epoch {} Valid Results: loss={:.3f} SROCC={:.3f} PLCC={:.3f} KROCC={:.3f} RMSE={:.3f}".format(epoch,
                                                                                                            val_loss,
                                                                                                            val_SROCC,
                                                                                                            val_PLCC,
                                                                                                            val_KROCC,
                                                                                                            val_RMSE))

      if val_SROCC > best_SROCC and epoch > 100:
          print("Update Epoch {} best valid SROCC".format(epoch))
          print("Valid Results: loss={:.3f} SROCC={:.3f} PLCC={:.3f} KROCC={:.3f} RMSE={:.3f}".format(val_loss,
                                                                                                      val_SROCC,
                                                                                                      val_PLCC,
                                                                                                      val_KROCC,
                                                                                                      val_RMSE))

          torch.save(model.state_dict(), save_model)
          best_SROCC = val_SROCC

      #scheduler.step()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoc 7500,Quality Loss=0.596
Epoch 7500 Valid Results: loss=0.665 SROCC=0.425 PLCC=0.425 KROCC=0.425 RMSE=0.559
Epoc 7501,Quality Loss=0.583
Epoch 7501 Valid Results: loss=0.702 SROCC=0.213 PLCC=0.213 KROCC=0.213 RMSE=0.654
Epoc 7502,Quality Loss=0.615
Epoch 7502 Valid Results: loss=0.661 SROCC=0.394 PLCC=0.394 KROCC=0.394 RMSE=0.577
Epoc 7503,Quality Loss=0.624
Epoch 7503 Valid Results: loss=0.663 SROCC=0.366 PLCC=0.366 KROCC=0.366 RMSE=0.586
Epoc 7504,Quality Loss=0.579
Epoch 7504 Valid Results: loss=0.694 SROCC=0.318 PLCC=0.318 KROCC=0.318 RMSE=0.612
Epoc 7505,Quality Loss=0.630
Epoch 7505 Valid Results: loss=0.683 SROCC=0.406 PLCC=0.406 KROCC=0.406 RMSE=0.595
Epoc 7506,Quality Loss=0.623
Epoch 7506 Valid Results: loss=0.695 SROCC=0.365 PLCC=0.365 KROCC=0.365 RMSE=0.629
Epoc 7507,Quality Loss=0.622
Epoch 7507 Valid Results: loss=0.703 SROCC=0.237 PLCC=0.237 KROCC=0.237 RMSE=0.654
Epoc 7508,Quality Loss=0.614
Epoch 7508

In [7]:
# final test
torch.save(model.state_dict(), save_model)

model.load_state_dict(torch.load(save_model))
model.eval()
with torch.no_grad():
    y_pred = np.zeros(testnum)
    y_test = np.zeros(testnum)
    L = 0
    for i, ( label, features) in enumerate(test_loader):
        y_test[i] = label.item()
        label = label.to(device)
        features = features.to(device).float()
        results = model(features)
        y_pred[i] = (results > .5).float() * 1 
        loss = classify_criterion(results, label)
        L = L + loss.item()
test_loss = L / (i + 1)
SROCC = stats.spearmanr(y_pred, y_test)[0]
PLCC = stats.pearsonr(y_pred, y_test)[0]
KROCC = stats.stats.kendalltau(y_pred, y_test)[0]
RMSE = np.sqrt(((y_pred - y_test) ** 2).mean())

print("Final test Results: loss={:.3f} SROCC={:.3f} PLCC={:.3f} KROCC={:.3f} RMSE={:.3f}".format(test_loss,
                                                                                                  SROCC,
                                                                                                  PLCC,
                                                                                                  KROCC,
                                                                                                  RMSE))

Final test Results: loss=0.708 SROCC=0.213 PLCC=0.213 KROCC=0.213 RMSE=0.654


In [8]:
y_test

array([1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1.,
       1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 0.,
       0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0.,
       0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0.,
       0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1.])

In [9]:
y_pred

array([1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 1.,
       0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0.,
       1., 0., 1., 0., 1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 1.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1.])

In [10]:
from sklearn.metrics import precision_score, recall_score, balanced_accuracy_score, roc_auc_score

print(balanced_accuracy_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(roc_auc_score(y_test, y_pred))

0.6039136302294197
0.7352941176470589
0.43859649122807015
0.6039136302294197
