# Accuracy Assessment Stats and Confusion Matrices for Sargassum Classification

In [1]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

## Load the AA shapefiles into GeodataFrame

In [24]:
# Get individual date accuracy assessment shapefiles
source_dir = r'/Users/arbailey/Google Drive/My Drive/sargassum/aa'
files = [file for file in os.listdir(source_dir) if file.endswith('validated.shp') and file.startswith('aaPoints')]
files.sort()
print(files)

# Append all AA shapefiles into one GeoDataFrame
for file in files:
    temp_gdf = gpd.read_file(os.path.join(source_dir,file))
    if files.index(file) == 0:
        sargassum_aa_gdf = temp_gdf.copy()
    else:
        sargassum_aa_gdf = sargassum_aa_gdf.append(temp_gdf)

# df = df.astype({"a": int, "b": complex})
        
sargassum_aa_gdf = sargassum_aa_gdf.dropna()  # remove rows w/null
print(sargassum_aa_gdf.describe())
sargassum_aa_gdf = sargassum_aa_gdf.astype({"validclass": int, "validpa": int})
sargassum_aa_gdf


['aaPoints_20190226_validated.shp', 'aaPoints_20190402_validated.shp', 'aaPoints_20190507_validated.shp', 'aaPoints_20190626_validated.shp', 'aaPoints_20190914_validated.shp', 'aaPoints_20191118_validated.shp', 'aaPoints_20191203_validated.shp']
         sargassum   validclass      validpa
count  1303.000000  1303.000000  1303.000000
mean      0.462778     2.878741     0.249424
std       0.498804     1.712617     0.432846
min       0.000000     1.000000     0.000000
25%       0.000000     2.000000     0.000000
50%       0.000000     2.000000     0.000000
75%       1.000000     5.000000     0.000000
max       1.000000     6.000000     1.000000


Unnamed: 0,sargassum,validclass,classdesc,validpa,imagedate,geometry
0,0,2,other veg,0,2019-02-26,POINT (519205.000 2354975.000)
1,0,2,other veg,0,2019-02-26,POINT (499725.000 2289545.000)
2,0,2,other veg,0,2019-02-26,POINT (466695.000 2383555.000)
3,0,3,beach,0,2019-02-26,POINT (520135.000 2343475.000)
4,0,2,other veg,0,2019-02-26,POINT (450165.000 2213945.000)
...,...,...,...,...,...,...
195,1,3,beach,0,2019-12-03,POINT (520655.000 2348245.000)
196,1,3,beach,0,2019-12-03,POINT (491095.000 2388295.000)
197,1,2,other veg,0,2019-12-03,POINT (463175.000 2382065.000)
198,1,3,beach,0,2019-12-03,POINT (492425.000 2388045.000)


## Error Matrices and Accuracy Stats

### All Dates

In [25]:
print(confusion_matrix(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum']))
print(classification_report(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum'], labels=[0, 1]))
print(accuracy_score(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum']))

[[697 281]
 [  3 322]]
              precision    recall  f1-score   support

           0       1.00      0.71      0.83       978
           1       0.53      0.99      0.69       325

    accuracy                           0.78      1303
   macro avg       0.76      0.85      0.76      1303
weighted avg       0.88      0.78      0.80      1303

0.7820414428242517


In [33]:

# df[df['first_name'].notnull() & (df['nationality'] == "USA")]
image_dates = sargassum_aa_gdf['imagedate'].unique()
print(image_dates)
date2test = '2019-02-26'

for date2test in image_dates:
    sargassum_aa_subset_gdf = sargassum_aa_gdf[sargassum_aa_gdf['imagedate']== date2test]
    print('\nDate: ' + date2test)
    print(confusion_matrix(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum']))
    print(classification_report(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum'], labels=[0, 1]))
    print(accuracy_score(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum']))

['2019-02-26' '2019-04-02' '2019-05-07' '2019-06-26' '2019-09-14'
 '2019-11-18' '2019-12-03']

Date: 2019-02-26
[[99 19]
 [ 1 54]]
              precision    recall  f1-score   support

           0       0.99      0.84      0.91       118
           1       0.74      0.98      0.84        55

    accuracy                           0.88       173
   macro avg       0.86      0.91      0.88       173
weighted avg       0.91      0.88      0.89       173

0.884393063583815

Date: 2019-04-02
[[100  29]
 [  0  54]]
              precision    recall  f1-score   support

           0       1.00      0.78      0.87       129
           1       0.65      1.00      0.79        54

    accuracy                           0.84       183
   macro avg       0.83      0.89      0.83       183
weighted avg       0.90      0.84      0.85       183

0.8415300546448088

Date: 2019-05-07
[[100   2]
 [  0  97]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.

In [35]:
sargassum_aa_gdf.groupby(['sargassum','validclass','classdesc','validpa']).count()
# .agg(['count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,imagedate,geometry
sargassum,validclass,classdesc,validpa,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1,sargassum,1,3,3
0,2,other veg,0,317,317
0,3,beach,0,123,123
0,4,buildings,0,26,26
0,4,pavement / roads,0,1,1
0,5,clouds,0,103,103
0,6,water,0,127,127
1,1,sargassum,1,322,322
1,2,other veg,0,47,47
1,3,beach,0,131,131
