## Model Evaluation

In [1]:
import os
os.chdir('..')
%pwd

'/home/dain5832/cmu/deeplearning/counterfactual_fairness_emotional_recognition'

In [2]:
import pickle
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from train import test
from datasets import IEMOCAPEval

  from .autonotebook import tqdm as notebook_tqdm


### Setting up test dataset

In [3]:
test_dataset = IEMOCAPEval(partition='test')
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=1, pin_memory=True)

CHECKPOINT_MODEL_DIR = "checkpoint/20220430_052023/model_fairness_dataaug_ratio0.5_trial0_0.5730.pth"
model = torch.load(CHECKPOINT_MODEL_DIR).cuda()
criterion = nn.CrossEntropyLoss()

### Evaluate model on test set

In [7]:
test(model, test_loader, test_dataset, criterion, return_fairness_eval=True)



*****************************************************************
Evaluation on Test Set:
Test cost: 1.28
Test accuracy: 0.467
Test UAR: 0.5044
Test Confusion Matrix:["ang","sad","hap","neu"]
[[21  0  4  4]
 [ 9 69 10 22]
 [19 10 17 25]
 [47 52 26 93]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[0.97402597 0.73446848 0.23076923 0.61393805]
*****************************************************************


<utils.fairness_eval.FairnessEvaluation at 0x7f7a8db76b50>

## to see the results at once

In [8]:
from glob import glob
test_dataset = IEMOCAPEval(partition='test')
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=1, pin_memory=True)

for ratio in ['0.5', '0.6', '0.7', '0.8', '0.9', '1.0']:
    for trial in range(5):
        print("############", ratio, trial, '##################')
        CHECKPOINT_MODEL_DIR = glob("checkpoint/20220430_055023/model_fairness_dataaug_ratio{}_trial{}*.pth".format(ratio, trial))
        model = torch.load(CHECKPOINT_MODEL_DIR[0]).cuda()
        criterion = nn.CrossEntropyLoss()
        test(model, test_loader, test_dataset, criterion, return_fairness_eval=True)

############ 0.5 0 ##################
*****************************************************************
Evaluation on Test Set:
Test cost: 1.36
Test accuracy: 0.355
Test UAR: 0.5271
Test Confusion Matrix:["ang","sad","hap","neu"]
[[25  1  2  1]
 [16 63 26  5]
 [21  7 40  3]
 [43 59 92 24]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[0.84183673 0.66182874 0.74175824 0.64571429]
*****************************************************************
############ 0.5 1 ##################
*****************************************************************
Evaluation on Test Set:
Test cost: 1.24
Test accuracy: 0.495
Test UAR: 0.5131
Test Confusion Matrix:["ang","sad","hap","neu"]
[[17  1  9  2]
 [ 7 70 15 18]
 [ 8  7 27 29]
 [11 64 45 98]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[0.58441558 0.635506   0.39335664 0.58849558]
*****************************************************************
############ 0.5 2 ##################
**

  self.equal_opportunities = np.array([np.min((m[i, i] / f[i, i], f[i, i] / m[i, i])) for i in range(len(m))])


*****************************************************************
Evaluation on Test Set:
Test cost: 1.26
Test accuracy: 0.477
Test UAR: 0.5172
Test Confusion Matrix:["ang","sad","hap","neu"]
[[ 26   2   0   1]
 [ 17  60   7  26]
 [ 27   7   9  28]
 [ 51  47  11 109]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[0.78571429 0.76819407 0.         0.84769446]
*****************************************************************
############ 0.6 1 ##################


  self.equal_opportunities = np.array([np.min((m[i, i] / f[i, i], f[i, i] / m[i, i])) for i in range(len(m))])


*****************************************************************
Evaluation on Test Set:
Test cost: 1.59
Test accuracy: 0.0678
Test UAR: 0.25
Test Confusion Matrix:["ang","sad","hap","neu"]
[[ 29   0   0   0]
 [110   0   0   0]
 [ 71   0   0   0]
 [217   1   0   0]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[ 1. nan nan nan]
*****************************************************************
############ 0.6 2 ##################
*****************************************************************
Evaluation on Test Set:
Test cost: 1.33
Test accuracy: 0.374
Test UAR: 0.5087
Test Confusion Matrix:["ang","sad","hap","neu"]
[[ 23   3   3   0]
 [  3  75  22  10]
 [ 10  18  29  14]
 [ 17 123  45  33]]
Fairness Scores (in terms of equal opportunities):
["ang","sad","hap","neu"]
[0.68877551 0.7577187  0.45150502 0.98727876]
*****************************************************************
############ 0.6 3 ##################
*************************************