# Precision Analysis

This file computes the true positive and false positive in Matlab algoritm and AIS data by comparing the coordinates of both data in a specific row.

In [1]:
# Import libraries
import pandas as pd

# Importing csv files 
dep_csv = 'Matlab_algo.csv'
ref_csv = 'Modified_AIS_VT.csv'
outfile = 'Precision-Analysis.csv'
dep = pd.read_csv(dep_csv)
ref = pd.read_csv(ref_csv)

print (dep)
print (ref)

           Description        Lon        Lat
0     27.2924, -97.623 -97.623039  27.292366
1     26.977, -97.5029 -97.502899  26.976959
2    26.5498, -97.3903 -97.390266  26.549812
3    27.8271, -97.1471 -97.147095  27.827120
4    27.7422, -97.0928 -97.092804  27.742237
..                 ...        ...        ...
342   27.6189, -82.655 -82.654991  27.618927
343  27.9184, -82.4442 -82.444214  27.918373
344   26.915, -82.3561 -82.356125  26.914997
345  26.5505, -82.1994 -82.199425  26.550524
346   26.494, -82.1879 -82.187874  26.494030

[347 rows x 3 columns]
          MMSI         BaseDateTime       LAT       LON          VesselName  \
0    367682480  2022-05-30T00:12:00  29.31612 -89.08567          ERIC HUYNH   
1    367471780  2022-05-30T00:12:00  29.34629 -89.11212      PRINCESS KELLY   
2    367792030  2022-05-30T00:12:00  30.37990 -88.56173           SEA HORSE   
3    368188790  2022-05-30T00:12:01  29.68500 -90.09985     SOUTHERN NIGHTS   
4    367778650  2022-05-30T00:12:02  29.3

# Sorting the csv files

In [2]:
# Sorting the csv files based on latitude and longitude
dep.sort_values('Lat', ascending = True)
ref.sort_values ('LAT', ascending = True)

print (dep)
print (ref)

           Description        Lon        Lat
0     27.2924, -97.623 -97.623039  27.292366
1     26.977, -97.5029 -97.502899  26.976959
2    26.5498, -97.3903 -97.390266  26.549812
3    27.8271, -97.1471 -97.147095  27.827120
4    27.7422, -97.0928 -97.092804  27.742237
..                 ...        ...        ...
342   27.6189, -82.655 -82.654991  27.618927
343  27.9184, -82.4442 -82.444214  27.918373
344   26.915, -82.3561 -82.356125  26.914997
345  26.5505, -82.1994 -82.199425  26.550524
346   26.494, -82.1879 -82.187874  26.494030

[347 rows x 3 columns]
          MMSI         BaseDateTime       LAT       LON          VesselName  \
0    367682480  2022-05-30T00:12:00  29.31612 -89.08567          ERIC HUYNH   
1    367471780  2022-05-30T00:12:00  29.34629 -89.11212      PRINCESS KELLY   
2    367792030  2022-05-30T00:12:00  30.37990 -88.56173           SEA HORSE   
3    368188790  2022-05-30T00:12:01  29.68500 -90.09985     SOUTHERN NIGHTS   
4    367778650  2022-05-30T00:12:02  29.3

# Creating new csv to store the analyzation

In [3]:
dep_cols = list(dep.columns.values)
ref_cols = list (ref.columns.values)
# # ais_new = ais[cols[0:4] + cols [7:10] +[cols [-1]]]
# # pa = matlab['Lat', 'Lon'] + ais['BaseDateTime','LAT', 'LON', 'VesselName', 'Vessel']
# # pa = matlab['Lat', 'Lon']
# pa =  ais[['BaseDateTime','LAT', 'LON', 'VesselName', 'VesselType']]
# pa ['Lat', 'Lon'] = matlab[['Lat', 'Lon']]

pa = pd.concat([dep, ref], axis=1) 

print (pa)

           Description        Lon        Lat       MMSI         BaseDateTime  \
0     27.2924, -97.623 -97.623039  27.292366  367682480  2022-05-30T00:12:00   
1     26.977, -97.5029 -97.502899  26.976959  367471780  2022-05-30T00:12:00   
2    26.5498, -97.3903 -97.390266  26.549812  367792030  2022-05-30T00:12:00   
3    27.8271, -97.1471 -97.147095  27.827120  368188790  2022-05-30T00:12:01   
4    27.7422, -97.0928 -97.092804  27.742237  367778650  2022-05-30T00:12:02   
..                 ...        ...        ...        ...                  ...   
868                NaN        NaN        NaN  367735690  2022-05-30T00:18:57   
869                NaN        NaN        NaN  368088160  2022-05-30T00:18:57   
870                NaN        NaN        NaN  367718320  2022-05-30T00:18:58   
871                NaN        NaN        NaN  367454140  2022-05-30T00:18:58   
872                NaN        NaN        NaN  366986160  2022-05-30T00:18:59   

          LAT       LON          Vessel

# Comparing the latitudes and longitudes

In [4]:
import pandas as pd
import numpy as np

# pa['Lat_match'] = np.where(pa['Lat'] == pa['LAT'], 'True', 'False')
# pa['Lon_match'] = np.where(pa['Lon'] == pa['LON'], 'True', 'False')


# Assigning Tolerance
tolerance = 0.01

# Latitudes and Longitudes
pa ['LATminT'] = pa['LAT'] - tolerance
pa ['LATmaxT'] = pa['LAT'] + tolerance
pa ['LONminT'] = pa['LON'] - tolerance
pa ['LONmaxT'] = pa['LON'] + tolerance

# New columns 
pa['LATminT_match'] = np.where(pa['Lat'] > pa ['LATminT'], pa['Lat'], 10000)
pa['LATmaxT_match'] = np.where(pa['LATminT_match'] < pa ['LATmaxT'], 'True', 'False')
pa['LONminT_match'] = np.where(pa['Lon'] > pa ['LONminT'], pa['Lon'], 10000)
pa['LONmaxT_match'] = np.where(pa['LONminT_match'] < pa ['LONmaxT'], 'True', 'False')

print (pa)

           Description        Lon        Lat       MMSI         BaseDateTime  \
0     27.2924, -97.623 -97.623039  27.292366  367682480  2022-05-30T00:12:00   
1     26.977, -97.5029 -97.502899  26.976959  367471780  2022-05-30T00:12:00   
2    26.5498, -97.3903 -97.390266  26.549812  367792030  2022-05-30T00:12:00   
3    27.8271, -97.1471 -97.147095  27.827120  368188790  2022-05-30T00:12:01   
4    27.7422, -97.0928 -97.092804  27.742237  367778650  2022-05-30T00:12:02   
..                 ...        ...        ...        ...                  ...   
868                NaN        NaN        NaN  367735690  2022-05-30T00:18:57   
869                NaN        NaN        NaN  368088160  2022-05-30T00:18:57   
870                NaN        NaN        NaN  367718320  2022-05-30T00:18:58   
871                NaN        NaN        NaN  367454140  2022-05-30T00:18:58   
872                NaN        NaN        NaN  366986160  2022-05-30T00:18:59   

          LAT       LON          Vessel

# Displaying True Positive (Matched coordinates)

In [5]:
# Checking only Latitude
pa_LATmatch = pa.loc [pa['LATmaxT_match'] == 'True']

# Checking only longitude
pa_Lonmatch = pa.loc [pa['LONmaxT_match'] == 'True']

#  Checking both latitude and Longitude
pa_Coormatch = pa.loc [(pa['LATmaxT_match'] == 'True') & (pa['LONmaxT_match'] == 'True')]

# Printing the results
print ("Latitude TP:", len(pa_LATmatch))
print ("Longitude TP", len(pa_Lonmatch))
print ("Both latitude and longitude TP:", len(pa_Coormatch))


# # Checking only Latitude
# pa_LATmatch = pa.loc [pa['Lat_match'] == 'True']

# # Checking only longitude
# pa_Lonmatch = pa.loc [pa['Lon_match'] == 'True']

# #  Checking both latitude and Longitude
# pa_Coormatch = pa.loc [(pa['Lat_match'] == 'True') & (pa['Lon_match'] == 'True')]

# # Printing the results
# print ("Latitude TP:", len(pa_LATmatch))
# print ("Longitude TP", len(pa_Lonmatch))
# print ("Both latitude and longitude TP:", len(pa_Coormatch))



Latitude TP: 6
Longitude TP 0
Both latitude and longitude TP: 0


# Checking for False Positive (unmatched)

In [6]:
# Checking only Latitude
pa_LATmatch = pa.loc [pa['LATmaxT_match'] == 'False']

# Checking only longitude
pa_Lonmatch = pa.loc [pa['LONmaxT_match'] == 'False']

#  Checking both latitude and Longitude
pa_Coormatch = pa.loc [(pa['LATmaxT_match'] == 'False') & (pa['LONmaxT_match'] == 'False')]

# Printing the results
print ("Latitude FP:", len(pa_LATmatch))
print ("Longitude FP", len(pa_Lonmatch))
print ("Both latitude and longitude FP:", len(pa_Coormatch))


# # Checking only Latitude
# pa_LATmatch = pa.loc [pa['Lat_match'] == 'False']

# # Checking only longitude
# pa_Lonmatch = pa.loc [pa['Lon_match'] == 'False']

# #  Checking both latitude and Longitude
# pa_Coormatch = pa.loc [(pa['Lat_match'] == 'False') & (pa['Lon_match'] == 'False')]

# # Printing the results
# print ("Latitude TP:", len(pa_LATmatch))
# print ("Longitude TP", len(pa_Lonmatch))
# print ("Both latitude and longitude TP:", len(pa_Coormatch))

Latitude FP: 867
Longitude FP 873
Both latitude and longitude FP: 867


# Saving the new csv

In [7]:
# Don't include the indeces
newfile = 'PrecisionAnalysis.csv'
pa.to_csv(newfile, index=False)

print(pa)

           Description        Lon        Lat       MMSI         BaseDateTime  \
0     27.2924, -97.623 -97.623039  27.292366  367682480  2022-05-30T00:12:00   
1     26.977, -97.5029 -97.502899  26.976959  367471780  2022-05-30T00:12:00   
2    26.5498, -97.3903 -97.390266  26.549812  367792030  2022-05-30T00:12:00   
3    27.8271, -97.1471 -97.147095  27.827120  368188790  2022-05-30T00:12:01   
4    27.7422, -97.0928 -97.092804  27.742237  367778650  2022-05-30T00:12:02   
..                 ...        ...        ...        ...                  ...   
868                NaN        NaN        NaN  367735690  2022-05-30T00:18:57   
869                NaN        NaN        NaN  368088160  2022-05-30T00:18:57   
870                NaN        NaN        NaN  367718320  2022-05-30T00:18:58   
871                NaN        NaN        NaN  367454140  2022-05-30T00:18:58   
872                NaN        NaN        NaN  366986160  2022-05-30T00:18:59   

          LAT       LON          Vessel