# Accuracy Assessment Stats and Confusion Matrices for Sargassum Classification

In [2]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

## Load the AA shapefiles into GeodataFrame

In [5]:
# Get individual date accuracy assessment shapefiles
source_dir = r'/Users/arbailey/Google Drive/My Drive/sargassum/aa'

# # Merge Individual files here
# files = [file for file in os.listdir(source_dir) if file.endswith('validated.shp') and file.startswith('aaPoints')]
# files.sort()
# print(files)
# # Append all AA shapefiles into one GeoDataFrame
# for file in files:
#     temp_gdf = gpd.read_file(os.path.join(source_dir,file))
#     if files.index(file) == 0:
#         sargassum_aa_gdf = temp_gdf.copy()
#     else:
#         sargassum_aa_gdf = sargassum_aa_gdf.append(temp_gdf)
        
# Use already existing merged files
file = 'aaPoints_validated_2019.shp'
sargassum_aa_gdf = gpd.read_file(os.path.join(source_dir,file))

        
sargassum_aa_gdf = sargassum_aa_gdf.dropna()  # remove rows w/null
print(sargassum_aa_gdf.describe())
sargassum_aa_gdf = sargassum_aa_gdf.astype({"validclass": int, "validpa": int})
sargassum_aa_gdf


         sargassum   validclass      validpa         aa_id
count  1303.000000  1303.000000  1303.000000  1.303000e+03
mean      0.462778     2.878741     0.249424  4.640827e+10
std       0.498804     1.712617     0.432846  6.387323e+10
min       0.000000     1.000000     0.000000  2.019023e+10
25%       0.000000     2.000000     0.000000  2.019040e+10
50%       0.000000     2.000000     0.000000  2.019063e+10
75%       1.000000     5.000000     0.000000  2.019112e+10
max       1.000000     6.000000     1.000000  2.019012e+11


Unnamed: 0,sargassum,validclass,classdesc,validpa,imagedate,aa_id,geometry
0,0,2,other veg,0,2019-02-26,20190226001,POINT (519205.000 2354975.000)
1,0,2,other veg,0,2019-02-26,20190226002,POINT (499725.000 2289545.000)
2,0,2,other veg,0,2019-02-26,20190226003,POINT (466695.000 2383555.000)
3,0,3,beach,0,2019-02-26,20190226004,POINT (520135.000 2343475.000)
4,0,2,other veg,0,2019-02-26,20190226005,POINT (450165.000 2213945.000)
...,...,...,...,...,...,...,...
1395,1,3,beach,0,2019-12-03,201901203196,POINT (520655.000 2348245.000)
1396,1,3,beach,0,2019-12-03,201901203197,POINT (491095.000 2388295.000)
1397,1,2,other veg,0,2019-12-03,201901203198,POINT (463175.000 2382065.000)
1398,1,3,beach,0,2019-12-03,201901203199,POINT (492425.000 2388045.000)


In [10]:
# Original Patches
orig_patch_file = 'aaPatches_2019.shp'
orig_patch_gdf = gpd.read_file(os.path.join(source_dir,orig_patch_file))
# Updated training sites
# SR
sr_patch_file = 'aaPatches_2019sr.shp'
toa_patch_file = 'aaPatches_2019toa.shp'
sr_patch_gdf = gpd.read_file(os.path.join(source_dir,sr_patch_file))
toa_patch_gdf = gpd.read_file(os.path.join(source_dir,toa_patch_file))
print(orig_patch_gdf.head())
print(sr_patch_gdf.head())
print(toa_patch_gdf.head())

            patch        aa_id                    geometry
0  45664092306850  20190226103  POINT (-86.99497 20.27700)
1  21461951603665  20190226105  POINT (-87.53518 19.39259)
2  51492362924636  20190226116  POINT (-86.86517 20.53765)
3  54541789694105  20190226117  POINT (-86.79547 21.50330)
4  10767483047360  20190226131  POINT (-87.76379 18.42146)
   sr_sarg        sr_patch        aa_id                    geometry
0        1  21461951603665  20190226105  POINT (-87.53518 19.39259)
1        1  10767483047360  20190226131  POINT (-87.76379 18.42146)
2        1  22664542446226  20190226170  POINT (-87.50565 19.42132)
3        1  10531259846258  20190226172  POINT (-87.76921 18.40517)
4        1  10707353505326  20190226183  POINT (-87.76697 18.41141)
        toa_patch  toa_sarg        aa_id                    geometry
0  21414706963412         1  20190226105  POINT (-87.53518 19.39259)
1  22664542446226         1  20190226170  POINT (-87.50565 19.42132)
2  10535554813545         1  20

In [17]:
# Join these data to validated AA points
dfs2join = [orig_patch_gdf, sr_patch_gdf, toa_patch_gdf]
combined_aa_df = pd.merge(sargassum_aa_gdf, orig_patch_gdf.drop(columns=['geometry']), on="aa_id", how="left")
combined_aa_df = pd.merge(combined_aa_df, sr_patch_gdf.drop(columns=['geometry']), on="aa_id", how="left")
combined_aa_df = pd.merge(combined_aa_df, toa_patch_gdf.drop(columns=['geometry']), on="aa_id", how="left")
combined_aa_df

Unnamed: 0,sargassum,validclass,classdesc,validpa,imagedate,aa_id,geometry,patch,sr_sarg,sr_patch,toa_patch,toa_sarg
0,0,2,other veg,0,2019-02-26,20190226001,POINT (519205.000 2354975.000),,,,,
1,0,2,other veg,0,2019-02-26,20190226002,POINT (499725.000 2289545.000),,,,,
2,0,2,other veg,0,2019-02-26,20190226003,POINT (466695.000 2383555.000),,,,,
3,0,3,beach,0,2019-02-26,20190226004,POINT (520135.000 2343475.000),,,,,
4,0,2,other veg,0,2019-02-26,20190226005,POINT (450165.000 2213945.000),,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1,3,beach,0,2019-12-03,201901203196,POINT (520655.000 2348245.000),5.430127e+13,,,,
1299,1,3,beach,0,2019-12-03,201901203197,POINT (491095.000 2388295.000),4.161394e+13,,,,
1300,1,2,other veg,0,2019-12-03,201901203198,POINT (463175.000 2382065.000),2.964816e+13,1.0,2.967822e+13,,
1301,1,3,beach,0,2019-12-03,201901203199,POINT (492425.000 2388045.000),4.215940e+13,,,,


## Error Matrices and Accuracy Stats

### All Dates - Original Training Sites

In [18]:
print(confusion_matrix(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum']))
print(classification_report(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum'], labels=[0, 1]))
print(accuracy_score(sargassum_aa_gdf['validpa'], sargassum_aa_gdf['sargassum']))

[[697 281]
 [  3 322]]
              precision    recall  f1-score   support

           0       1.00      0.71      0.83       978
           1       0.53      0.99      0.69       325

    accuracy                           0.78      1303
   macro avg       0.76      0.85      0.76      1303
weighted avg       0.88      0.78      0.80      1303

0.7820414428242517


In [19]:

# df[df['first_name'].notnull() & (df['nationality'] == "USA")]
image_dates = sargassum_aa_gdf['imagedate'].unique()
print(image_dates)
date2test = '2019-02-26'

for date2test in image_dates:
    sargassum_aa_subset_gdf = sargassum_aa_gdf[sargassum_aa_gdf['imagedate']== date2test]
    print('\nDate: ' + date2test)
    print(confusion_matrix(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum']))
    print(classification_report(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum'], labels=[0, 1]))
    print(accuracy_score(sargassum_aa_subset_gdf['validpa'], sargassum_aa_subset_gdf['sargassum']))

['2019-02-26' '2019-04-02' '2019-05-07' '2019-06-26' '2019-09-14'
 '2019-11-18' '2019-12-03']

Date: 2019-02-26
[[99 19]
 [ 1 54]]
              precision    recall  f1-score   support

           0       0.99      0.84      0.91       118
           1       0.74      0.98      0.84        55

    accuracy                           0.88       173
   macro avg       0.86      0.91      0.88       173
weighted avg       0.91      0.88      0.89       173

0.884393063583815

Date: 2019-04-02
[[100  29]
 [  0  54]]
              precision    recall  f1-score   support

           0       1.00      0.78      0.87       129
           1       0.65      1.00      0.79        54

    accuracy                           0.84       183
   macro avg       0.83      0.89      0.83       183
weighted avg       0.90      0.84      0.85       183

0.8415300546448088

Date: 2019-05-07
[[100   2]
 [  0  97]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.

In [20]:
sargassum_aa_gdf.groupby(['sargassum','validclass','classdesc','validpa']).count()
# .agg(['count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,imagedate,aa_id,geometry
sargassum,validclass,classdesc,validpa,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,sargassum,1,3,3,3
0,2,other veg,0,317,317,317
0,3,beach,0,123,123,123
0,4,buildings,0,26,26,26
0,4,pavement / roads,0,1,1,1
0,5,clouds,0,103,103,103
0,6,water,0,127,127,127
1,1,sargassum,1,322,322,322
1,2,other veg,0,47,47,47
1,3,beach,0,131,131,131
