Aims:

- Part of evaluation loop
- RMSE/accuracy/confusion matrix
- Both for each individual image and across all images
- Compare array of each of the ground truth and the data from wherever it came from

In [None]:
import pandas as pd
import os
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix



In [3]:
PATH = ('/gws/nopw/j04/iecdt/JERMIT_the_frog')

In [4]:
# read in ground truth data
ground_truth = pd.read_csv(PATH + '/hydrometeors_time_aligned_classes.csv')
print(ground_truth)

          height  1691388010  1691388070  1691388130  1691388190  1691388250  \
0      114.54125           0           0           0           0           0   
1      144.51833           0           0           0           0           0   
2      174.49533           0           0           0           0           0   
3      204.47241           0           0           0           0           0   
4      234.44948           0           0           0           0           0   
..           ...         ...         ...         ...         ...         ...   
454  13724.13100           0           0           0           0           0   
455  13754.10800           0           0           0           0           0   
456  13784.08500           0           0           0           0           0   
457  13814.06200           0           0           0           0           0   
458  13844.03900           0           0           0           0           0   

     1691388310  1691388370  1691388430

In [5]:
# read in stereo data
# CURRENTLY, READ IN GROUND TRUTH AS STEREO FOR PROOF OF CONCEPT!!!!!
stereo = pd.read_csv(PATH + '/hydrometeors_time_aligned_classes.csv')
print(ground_truth)

          height  1691388010  1691388070  1691388130  1691388190  1691388250  \
0      114.54125           0           0           0           0           0   
1      144.51833           0           0           0           0           0   
2      174.49533           0           0           0           0           0   
3      204.47241           0           0           0           0           0   
4      234.44948           0           0           0           0           0   
..           ...         ...         ...         ...         ...         ...   
454  13724.13100           0           0           0           0           0   
455  13754.10800           0           0           0           0           0   
456  13784.08500           0           0           0           0           0   
457  13814.06200           0           0           0           0           0   
458  13844.03900           0           0           0           0           0   

     1691388310  1691388370  1691388430

In [23]:
# count number of 1s and 0s in columns excluding first two coluumns
# this is the number of true positives and true negatives
Ones = (stereo.iloc[:, 2:] == 1).sum().sum()
Zeros = (stereo.iloc[:, 2:] == 0).sum().sum()

print('True Positives:', Ones)
print('True Negatives:', Zeros)


True Positives: 1495483
True Negatives: 4819898


In [6]:
correct = 0
incorrect = 0
for column in ground_truth.iloc[:, 2:]:
    for i in range(len(ground_truth[column])):
        if ground_truth[column][i] == stereo[column][i]:
            correct += 1
        else:
            incorrect += 1


In [7]:
print('Correct:', correct)
print('Incorrect:', incorrect)
print('Accuracy:', correct / (correct + incorrect))

Correct: 6315381
Incorrect: 0
Accuracy: 1.0


In [8]:
# calculate rmse
rmse = np.sqrt(np.mean((ground_truth - stereo) ** 2))
print('RMSE:', rmse)


RMSE: 0.0


In [9]:
# calculate mae
mae = np.mean(np.abs(ground_truth - stereo))
print('MAE:', mae)

MAE: 0.0


In [None]:
# calculate confusion matrix
# Convert the dataframes to binary classification (0 or 1), excluding the first two columns
ground_truth_binary = (ground_truth.iloc[:, 2:] > 0).astype(int)
stereo_binary = (stereo.iloc[:, 2:] > 0).astype(int)

# Flatten the dataframes to 1D arrays
ground_truth_flat = ground_truth_binary.values.flatten()
stereo_flat = stereo_binary.values.flatten()

# Calculate confusion matrix
conf_matrix = confusion_matrix(ground_truth_flat, stereo_flat)
print(conf_matrix)


[[4819898       0]
 [      0 1495483]]


In [13]:
correct_columns = 0
incorrect_columns = 0
for column in ground_truth.iloc[:, 2:]:
    if ground_truth[column].equals(stereo[column]):
        correct_columns += 1
    else:
        incorrect_columns += 1


In [14]:
print('Correct:', correct_columns)
print('Incorrect:', incorrect_columns)
print('Accuracy:', correct_columns / (correct_columns + incorrect_columns))

Correct: 13759
Incorrect: 0
Accuracy: 1.0


In [None]:
# Calculate confusion matrix for entire columns
ground_truth_columns = ground_truth.iloc[:, 2:].apply(lambda col: ''.join(col.astype(str)), axis=0)
stereo_columns = stereo.iloc[:, 2:].apply(lambda col: ''.join(col.astype(str)), axis=0)

# Convert to binary classification (0 or 1) based on column equality
ground_truth_binary_columns = (ground_truth_columns == stereo_columns).astype(int)

# Calculate confusion matrix
conf_matrix_columns = confusion_matrix([1] * len(ground_truth_binary_columns), ground_truth_binary_columns, labels=[0, 1])
print(conf_matrix_columns)


[[    0     0]
 [    0 13759]]
