## Naive Bayes - Classifier

### AIS Data – Activity Classification – Demo Data Extracted from Main Data

In [1]:
#### INPUTS (X): VesselName, MMSI, VesselType, Length, Width, SOG
##### MMSI --> Maritime Mobile Service Identity --> nine digits ID
##### SOG --> Speed Over Ground
##### OUTPUT (y): Status
#####             0 - 'engaged in fishing‘
#####             1 - 'under way using engine‘
#####             2 - 'undefined‘
#####             3 - 'at anchor'

In [2]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# Importing the dataset
dataset = pd.read_csv('AIS_2017_01_Zone01.csv')
dataset.head()

Unnamed: 0,VesselName,MMSI,VesselType,Length,Width,SOG,Status
0,ALEUTIAN NO 1,366988820,1001,37.22,9.73,9.2,engaged in fishing
1,ALEUTIAN NO 1,366988820,1001,37.22,9.73,9.6,engaged in fishing
2,ALEUTIAN NO 1,366988820,1001,37.22,9.73,9.3,engaged in fishing
3,ALEUTIAN NO 1,366988820,1001,37.22,9.73,9.5,engaged in fishing
4,ALEUTIAN NO 1,366988820,1001,37.22,9.73,9.6,engaged in fishing


In [4]:
dataset.tail()

Unnamed: 0,VesselName,MMSI,VesselType,Length,Width,SOG,Status
9952,WESTWOOD PACIFIC,211517000,1004,183.2,29.94,13.0,under way using engine
9953,WESTWOOD PACIFIC,211517000,1004,183.2,29.94,13.1,under way using engine
9954,WESTWOOD PACIFIC,211517000,1004,183.2,29.94,13.1,under way using engine
9955,WESTWOOD PACIFIC,211517000,1004,183.2,29.94,13.1,under way using engine
9956,WESTWOOD PACIFIC,211517000,1004,183.2,29.94,13.1,under way using engine


In [5]:
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values
print(y[0:3])

['engaged in fishing' 'engaged in fishing' 'engaged in fishing']


In [6]:
## Categories
# {'engaged in fishing','under way using engine','undefined','at anchor'}
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
print(y[0:3])

[1 1 1]


In [7]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [8]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB()

In [10]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [11]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[ 247    0    0    0]
 [   0  189    0    1]
 [   0    0  634    1]
 [   0    0    0 1418]]


In [12]:
# Applying k-Fold Cross Validation
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
accuracies.mean()

0.998928690633849

In [13]:
accuracies.std()

0.0011675356289698835