<h1> Ship Type Prediction </h1>

<p align='justify'>

This Jupyter notebook contains a classification example which is
done with the help of Scikit-Learn library. In this notebook,
the following steps are performed:
</p>

<ol align='justify'>
    <li> The preprocessing i.e. feature generation, filtering and
         interpolation of the data is carried out using the
         NumMobility Library.
    </li>
    <li> Further, several models like RandomForestClassifier, KMeans
         Classifier etc. are trained using the Scikit-Learn library
         based on the cleaned dataset.
    </li>
    <li>
        Finally, on the interpolated dataset, the type of ships are
        predicted and their accuracy is checked.
    </li>

In [1]:
# Import the dataset.

import pandas as pd
from Nummobility.core.TrajectoryDF import NumPandasTraj

pdf = pd.read_csv('./data/ships.csv')
np_ships = NumPandasTraj(data_set=pdf,
                         latitude='lat',
                         longitude='lon',
                         datetime='Timestamp',
                         traj_id='Name')
np_ships.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,MMSI,NavStatus,SOG,COG,ShipType
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AB RAMANTENN,2017-05-07 00:13:05,11.905735,57.681092,265902200,Moored,0.1,170.7,Undefined
AB RAMANTENN,2017-05-07 00:25:04,11.90574,57.68107,265902200,Moored,0.1,170.7,Undefined
AB RAMANTENN,2017-05-07 00:31:05,11.905792,57.68106,265902200,Moored,0.1,177.4,Undefined
AB RAMANTENN,2017-05-07 01:01:05,11.90565,57.681127,265902200,Moored,0.0,175.6,Undefined
AB RAMANTENN,2017-05-07 01:07:05,11.9057,57.681107,265902200,Moored,0.1,180.8,Undefined


In [2]:
%%time

# Now using Nummobility, generate distance features and
# run hampel filter on the dataset to remove outliers.
from Nummobility.features.spatial_features import SpatialFeatures
from Nummobility.preprocessing.filters import Filters

dist_ships = SpatialFeatures.create_distance_between_consecutive_column(np_ships)
dist_ships.head()

CPU times: user 311 ms, sys: 12 ms, total: 323 ms
Wall time: 322 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,MMSI,NavStatus,SOG,COG,ShipType,Distance_prev_to_curr
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AB RAMANTENN,2017-05-07 00:13:05,11.905735,57.681092,265902200,Moored,0.1,170.7,Undefined,
AB RAMANTENN,2017-05-07 00:25:04,11.90574,57.68107,265902200,Moored,0.1,170.7,Undefined,2.457384
AB RAMANTENN,2017-05-07 00:31:05,11.905792,57.68106,265902200,Moored,0.1,177.4,Undefined,5.883613
AB RAMANTENN,2017-05-07 01:01:05,11.90565,57.681127,265902200,Moored,0.0,175.6,Undefined,17.391237
AB RAMANTENN,2017-05-07 01:07:05,11.9057,57.681107,265902200,Moored,0.1,180.8,Undefined,5.970428


In [3]:
%%time

filt_ships = Filters.hampel_outlier_detection(dist_ships,
                                              column_name='Distance_prev_to_curr')
print(f"Length of original DF: {len(dist_ships)}")
print(f"Length of Filtered DF: {len(filt_ships)}")

Length of original DF: 84702
Length of Filtered DF: 61394
CPU times: user 207 ms, sys: 108 ms, total: 315 ms
Wall time: 7.77 s




In [4]:

dr_filt_ships = Filters.remove_duplicates(filt_ships)
print(f"Length of original DF: {len(filt_ships)}")
print(f"Length of Filtered DF: {len(dr_filt_ships)}")

Length of original DF: 61394
Length of Filtered DF: 61102


In [5]:
fp_filt_ships = Filters.remove_trajectories_with_less_points(dr_filt_ships)
print(f"Length of original DF: {len(dr_filt_ships)}")
print(f"Length of Filtered DF: {len(fp_filt_ships)}")

Length of original DF: 61102
Length of Filtered DF: 61097


In [6]:
fp_filt_ships["ShipType"] = fp_filt_ships["ShipType"].str.strip()
fp_filt_ships["ShipType"].value_counts()

Tanker             16667
Passenger          14694
HSC                 7572
Pilot               7185
SAR                 4633
Tug                 2883
Cargo               2702
Pleasure            1049
Undefined            913
Sailing              872
Law enforcement      653
Spare 2              446
Diving               441
Fishing              234
Other                116
Dredging              37
Name: ShipType, dtype: int64

In [7]:
int_test = []
for i in range(len(fp_filt_ships['ShipType'])):
    if fp_filt_ships['ShipType'].iloc[i] == 'Tanker':
        int_test.append(0)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Passenger':
        int_test.append(1)
    elif fp_filt_ships['ShipType'].iloc[i] == 'HSC':
        int_test.append(2)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Pilot':
        int_test.append(3)
    elif fp_filt_ships['ShipType'].iloc[i] == 'SAR':
        int_test.append(4)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Tug':
        int_test.append(5)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Cargo':
        int_test.append(6)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Pleasure':
        int_test.append(7)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Undefined':
        int_test.append(8)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Sailing':
        int_test.append(9)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Law enforcement':
        int_test.append(10)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Spare 2':
        int_test.append(11)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Diving':
        int_test.append(12)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Fishing':
        int_test.append(13)
    elif fp_filt_ships['ShipType'].iloc[i] == 'Other':
        int_test.append(14)
    else:
        int_test.append(15)
fp_filt_ships['ShipType'] = int_test
fp_filt_ships.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,index,lat,lon,MMSI,NavStatus,SOG,COG,ShipType,Distance_prev_to_curr
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AB RAMANTENN,2017-05-07 00:13:05,0,11.905735,57.681092,265902200,Moored,0.1,170.7,8,
AB RAMANTENN,2017-05-07 00:25:04,1,11.90574,57.68107,265902200,Moored,0.1,170.7,8,2.457384
AB RAMANTENN,2017-05-07 00:31:05,2,11.905792,57.68106,265902200,Moored,0.1,177.4,8,5.883613
AB RAMANTENN,2017-05-07 01:07:05,3,11.9057,57.681107,265902200,Moored,0.1,180.8,8,5.970428
AB RAMANTENN,2017-05-07 01:31:04,4,11.905708,57.681045,265902200,Moored,0.1,173.2,8,6.804183


In [8]:
fp_filt_ships = fp_filt_ships.reset_index().drop_duplicates(subset=['traj_id', 'DateTime'])
fp_filt_ships = fp_filt_ships.drop(columns=['index'])
fp_filt_ships = NumPandasTraj(data_set=fp_filt_ships,
                              latitude='lat',
                              longitude='lon',
                              datetime='DateTime',
                              traj_id='traj_id')
fp_filt_ships.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,MMSI,NavStatus,SOG,COG,ShipType,Distance_prev_to_curr
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AB RAMANTENN,2017-05-07 00:13:05,11.905735,57.681092,265902200,Moored,0.1,170.7,8,
AB RAMANTENN,2017-05-07 00:25:04,11.90574,57.68107,265902200,Moored,0.1,170.7,8,2.457384
AB RAMANTENN,2017-05-07 00:31:05,11.905792,57.68106,265902200,Moored,0.1,177.4,8,5.883613
AB RAMANTENN,2017-05-07 01:07:05,11.9057,57.681107,265902200,Moored,0.1,180.8,8,5.970428
AB RAMANTENN,2017-05-07 01:31:04,11.905708,57.681045,265902200,Moored,0.1,173.2,8,6.804183


In [9]:
a = fp_filt_ships.reset_index()["DateTime"].diff().dt.total_seconds()
(a > 30).value_counts()

False    47968
True     12980
Name: DateTime, dtype: int64

In [10]:
from Nummobility.preprocessing.interpolation import Interpolation as ip

ip_ships = ip.interpolate_position(fp_filt_ships,
                                   time_jump=30,
                                   ip_type='cubic')

print(f"Length of original DF: {len(fp_filt_ships)}")
print(f"Length of interpolated DF: {len(ip_ships)}")

Length of original DF: 60948
Length of interpolated DF: 72701


In [11]:
ip_ships.shape

(72701, 2)

In [12]:
ip_ships.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1
AB RAMANTENN,2017-05-07 00:13:05,11.905735,57.681092
AB RAMANTENN,2017-05-07 00:13:35,11.905729,57.681092
AB RAMANTENN,2017-05-07 00:25:04,11.90574,57.68107
AB RAMANTENN,2017-05-07 00:25:34,11.905744,57.681069
AB RAMANTENN,2017-05-07 00:31:05,11.905792,57.68106
