In [1]:
import datetime
import os
import sys
import glob
import pprint

import numpy as np

from osgeo import gdal
gdal.AllRegister()
import pandas as pd


In [2]:
def get_file(path, year):
    """

    :param path: Location to search for the appropriate input file based on year
    :type path: str
    :param name:
    :param year:
    :return: Item from templist based on the matching year
    """

    filelist = glob.glob("{p}{sep}*.tif".format(p=path, sep=os.sep))

    filelist.sort()

    templist = [item for item in filelist if year in os.path.basename(item)]

    if len(templist) == 0:

        print("\nCould not locate a file for year {} in the given path {}\n".format(year, path))

        print("Available files in path are:\n")

        pprint.pprint(filelist)

        sys.exit(0)

    elif len(templist) == 1:

        return templist[0]

    else:

        return templist[-1]

In [3]:
def readData(refdir, preddir, y):

    reffile = get_file(refdir, y)

    predfile = get_file(preddir, y)

    print("The reference file is:\n\t{}\n".format(reffile))

    print("The prediction file is:\n\t{}\n".format(predfile))

    # Load raster data into arrays
    refdata = gdal.Open(reffile, gdal.GA_ReadOnly).ReadAsArray()

    preddata = gdal.Open(predfile, gdal.GA_ReadOnly).ReadAsArray()

    # Obtain unique class values from the reference data array
    ref = np.unique(refdata)

    ref_ = list(ref.flatten().tolist())

    ccdc = np.unique(preddata)

    ccdc_ = list(ccdc.flatten().tolist())

    # combine both classes lists and remove duplicates
    classes = ref_ + list(set(ccdc_) - set(ref_))

    classes.sort()

    ref, ccdc = None, None

    return refdata, preddata, classes, reffile, predfile

In [4]:
def compute_confusion_matrix(truth, predicted, classes):

    total = float(len(classes) ** 2)

    # create boolean arrays of all zeros
    TP = np.zeros(truth.shape, np.bool)

    FP = np.zeros(truth.shape, np.bool)

    FN = np.zeros(truth.shape, np.bool)

    # create the confusion matrix, for now containing all zeros
    confusion_matrix = np.zeros((len(classes), len(classes)), np.int32)

    print("generating %s by %s confusion matrix" % (len(classes), len(classes)))

    # iterate through the unique classes
    counter = 1.0

    for c in classes:  # iterate through columns

        for r in classes:  # iterate through rows

            current = counter / total * 100.0  # as percent

            if c == r:  # TP case

                # print 'column: ', c, '\trow: ', r

                np.logical_and(truth == r, predicted == c, TP)

                confusion_matrix[classes.index(r), classes.index(c)] = np.sum(TP)

            elif classes.index(r) > classes.index(c):

                # print 'column: ', c, '\trow: ', r

                np.logical_and(truth == r, predicted == c, FP)

                confusion_matrix[classes.index(r), classes.index(c)] = np.sum(FP)

            elif classes.index(r) < classes.index(c):

                # print 'column: ', c, '\trow: ', r

                np.logical_and(truth == r, predicted == c, FN)

                confusion_matrix[classes.index(r), classes.index(c)] = np.sum(FN)

            # show the percent complete
            sys.stdout.write("\r%s%% Done " % str(current)[:5])

            # needed to display the current percent complete
            sys.stdout.flush()

            counter += 1.0

        sys.stdout.flush()

    # add row totals in a new column at the end
    x_sum = confusion_matrix.sum(axis=1)

    x_sum = np.reshape(x_sum, (len(classes), 1))

    confusion_matrix = np.append(arr=confusion_matrix, values=x_sum, axis=1)

    # add column totals in a new row at the end
    y_sum = confusion_matrix.sum(axis=0)

    y_sum = np.reshape(y_sum, (1, len(classes) + 1))

    confusion_matrix = np.append(arr=confusion_matrix, values=y_sum, axis=0)

    # insert a blank row and column at the top/left to contain class values
    confusion_matrix = np.insert(arr=confusion_matrix, obj=0, axis=0, values=0)

    confusion_matrix = np.insert(arr=confusion_matrix, obj=0, axis=1, values=0)

    # so len(classes) matches row/column shape of confusion matrix
    classes.insert(0, 0)

    # 99999999 instead of 'total' because can't have strings in array of numbers
    classes.append(99999999)

    # insert the class names into the blank columns/rows of the matrix
    for c in range(len(classes)):
        confusion_matrix[c, 0] = classes[c]

        confusion_matrix[0, c] = classes[c]

    return confusion_matrix

In [5]:
def write_to_csv(matrix, outdir, name):

    lookfor = '99999999'

    if os.path.exists('%s/%s.csv' % (outdir, lookfor)):
        os.remove('%s/%s.csv' % (outdir, lookfor))

    if os.path.exists('%s/%s.csv' % (outdir, name)):
        os.remove('%s/%s.csv' % (outdir, name))

    # save the confusion matrix to a temporary .csv file named 999999.csv
    np.savetxt('%s/%s.csv' % (outdir, lookfor), matrix, fmt='%d')

    # open the temp .csv file and a new final output csv file named with the fname variable
    with open('%s/%s.csv' % (outdir, lookfor), 'r') as f:

        text = f.read()

        text = text.replace(lookfor, 'Total')

    with open('%s/%s.csv' % (outdir, name), 'w') as out:

        out.write(text)

    for dirpath, folders, files in os.walk(outdir):

        for x in files:

            if x == '99999999.csv':
                os.remove(os.path.join(dirpath, x))

    return None


In [6]:
reference = r"Z:\ancillary\NLCD\Original\h27v08"
prediction = r"Z:\eval_materials\sites.prep\h27v08_v2017.08.18\CoverMaps\CoverPrim_color"
year = "1992"


refData, predData, Classes, ref_file, pred_file = readData(reference, prediction, year)

cnf_mat = compute_confusion_matrix(refData, predData, Classes)

The reference file is:
	Z:\ancillary\NLCD\Original\h27v08\nlcd_1992_30meter_whole.tif

The prediction file is:
	Z:\eval_materials\sites.prep\h27v08_v2017.08.18\CoverMaps\CoverPrim_color\CoverPrim_1992.tif

generating 23 by 23 confusion matrix
100.0% Done 

In [7]:
holder = np.copy(cnf_mat)

In [8]:
cnf_mat1 = np.copy(holder)

for row in range(np.shape(cnf_mat)[0]-1, -1, -1):

    try:

        test_row = cnf_mat[row, 1:]

        if np.all(test_row == 0):

            cnf_mat_ = np.delete(cnf_mat1, row, axis=0)

            cnf_mat1 = np.copy(cnf_mat_)

    except: IndexError

    # print(test_row)

    # print( cnf_mat[r, :])
print(cnf_mat1)

[[       0        0        1        2        3        4        5        6
         8        9       11       21       22       23       32       33
        41       42       43       81       82       85       91       92
  99999999]
 [      11       10    15837    34914        7    48088   236807      341
        19     2298        0        0        0        0        0        0
         0        0        0        0        0        0        0        0
    338321]
 [      21        0   412687    74254        6    30232      745        7
         7     1162        0        0        0        0        0        0
         0        0        0        0        0        0        0        0
    519100]
 [      22        0    52842     1661        0      272      158        1
         0      230        0        0        0        0        0        0
         0        0        0        0        0        0        0        0
     55164]
 [      23       86   185308    51838       47    15638     4760

In [9]:
cnf_mat2 = np.copy(cnf_mat1)
  
for c in range(np.shape(cnf_mat)[1]-1, -1, -1):

    try:

        test_col = cnf_mat[1:,c]

        if np.all(test_col == 0):

            cnf_mat_ = np.delete(cnf_mat2, c, axis=1)

            cnf_mat2 = np.copy(cnf_mat_)

    except: IndexError

print(cnf_mat2)
            

[[       0        0        1        2        3        4        5        6
         8        9 99999999]
 [      11       10    15837    34914        7    48088   236807      341
        19     2298   338321]
 [      21        0   412687    74254        6    30232      745        7
         7     1162   519100]
 [      22        0    52842     1661        0      272      158        1
         0      230    55164]
 [      23       86   185308    51838       47    15638     4760       15
        89     1683   259464]
 [      32        0    18266     9841     2346     8420      181        0
        20      427    39501]
 [      33        6    17208    25496      547    23496       64        0
       782      766    68365]
 [      41        0   175692   978206     1836  9191066    10283      488
      2906     2560 10363037]
 [      42        0    48727    86096       10   604698     5003       29
       307      743   745613]
 [      43        1    92515   284665       51   885592     2783

In [10]:
df_test1 = pd.DataFrame(holder)

In [11]:
df_test2 = pd.DataFrame(cnf_mat1)

In [32]:
df_test3 = pd.DataFrame(cnf_mat2[1:,1:], index=cnf_mat2[1:,0], columns=cnf_mat2[0,1:])

In [13]:
df_test1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0,0,1,2,3,4,5,6,8,9,...,33,41,42,43,81,82,85,91,92,99999999
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
df_test2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0,0,1,2,3,4,5,6,8,9,...,33,41,42,43,81,82,85,91,92,99999999
1,11,10,15837,34914,7,48088,236807,341,19,2298,...,0,0,0,0,0,0,0,0,0,338321
2,21,0,412687,74254,6,30232,745,7,7,1162,...,0,0,0,0,0,0,0,0,0,519100
3,22,0,52842,1661,0,272,158,1,0,230,...,0,0,0,0,0,0,0,0,0,55164
4,23,86,185308,51838,47,15638,4760,15,89,1683,...,0,0,0,0,0,0,0,0,0,259464
5,32,0,18266,9841,2346,8420,181,0,20,427,...,0,0,0,0,0,0,0,0,0,39501
6,33,6,17208,25496,547,23496,64,0,782,766,...,0,0,0,0,0,0,0,0,0,68365
7,41,0,175692,978206,1836,9191066,10283,488,2906,2560,...,0,0,0,0,0,0,0,0,0,10363037
8,42,0,48727,86096,10,604698,5003,29,307,743,...,0,0,0,0,0,0,0,0,0,745613
9,43,1,92515,284665,51,885592,2783,81,166,596,...,0,0,0,0,0,0,0,0,0,1266450


In [22]:
df_test3

Unnamed: 0,0,1,2,3,4,5,6,8,9,99999999
11,10,15837,34914,7,48088,236807,341,19,2298,338321
21,0,412687,74254,6,30232,745,7,7,1162,519100
22,0,52842,1661,0,272,158,1,0,230,55164
23,86,185308,51838,47,15638,4760,15,89,1683,259464
32,0,18266,9841,2346,8420,181,0,20,427,39501
33,6,17208,25496,547,23496,64,0,782,766,68365
41,0,175692,978206,1836,9191066,10283,488,2906,2560,10363037
42,0,48727,86096,10,604698,5003,29,307,743,745613
43,1,92515,284665,51,885592,2783,81,166,596,1266450
81,50,701217,7558053,410,1032079,2468,138,679,13913,9309007


In [33]:
try:

    ind_list = df_test3.index.tolist()

    idx = ind_list.index(99999999)

    ind_list[idx] = "Total"

    df_test3.index = ind_list

    col_list = df_test3.columns.tolist()

    print(col_list)

    idx = col_list.index(99999999)

    col_list[idx] = "Total"

    df_test3.columns = col_list
    
except: ValueError

[0, 1, 2, 3, 4, 5, 6, 8, 9, 99999999]


In [35]:
df_test3

Unnamed: 0,0,1,2,3,4,5,6,8,9,Total
11,10,15837,34914,7,48088,236807,341,19,2298,338321
21,0,412687,74254,6,30232,745,7,7,1162,519100
22,0,52842,1661,0,272,158,1,0,230,55164
23,86,185308,51838,47,15638,4760,15,89,1683,259464
32,0,18266,9841,2346,8420,181,0,20,427,39501
33,6,17208,25496,547,23496,64,0,782,766,68365
41,0,175692,978206,1836,9191066,10283,488,2906,2560,10363037
42,0,48727,86096,10,604698,5003,29,307,743,745613
43,1,92515,284665,51,885592,2783,81,166,596,1266450
81,50,701217,7558053,410,1032079,2468,138,679,13913,9309007
