In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
% matplotlib inline
plt.style.use('ggplot')

import seaborn as sns

import cartopy.crs as ccrs
import cartopy.feature as cfeat
import cartopy.io.img_tiles as cimgt

import AISMapPlot as aismap

In [13]:
ais_learners = []
sectors = [0, 1, 2, 3, 4, 5]
for s in sectors:
    ais_learners.append(pd.read_pickle('ROT-HAM_learners_60min_s%d.pkl' % (s)))

In [14]:
ais_learners[0].head()

Unnamed: 0,TripID,time,Length,Breadth,Draught,Latitude,Longitude,SOG,COG,TH,...,hour_of_day,weekday,day_of_month,month,sector,mins_to_sector_leave,sector_leave_lat,sector_leave_lon,sector_leave_sog,sector_leave_cog
0,1000226,2016-06-15 14:00:00,369.0,51.0,13.8,51.97,4.033333,5.2,112.633333,112.0,...,14.0,2.0,15.0,6.0,0,300.0,52.8,4.2,12.8,34.3
1,1000226,2016-06-15 15:00:00,369.0,51.0,13.8,51.97383,4.046596,9.325532,196.780851,196.914894,...,15.0,2.0,15.0,6.0,0,274.595745,52.8,4.2,12.8,34.3
2,1000226,2016-06-15 16:00:00,369.0,51.0,13.8,52.13425,3.8885,12.945,256.5325,345.275,...,16.0,2.0,15.0,6.0,0,202.65,52.8,4.2,12.8,34.3
3,1000226,2016-06-15 17:00:00,369.0,51.0,13.8,52.328793,3.868621,13.075862,153.406897,152.448276,...,17.0,2.0,15.0,6.0,0,149.017241,52.8,4.2,12.8,34.3
4,1000226,2016-06-15 18:00:00,369.0,51.0,13.8,52.53093,3.936512,12.574419,24.613953,23.418605,...,18.0,2.0,15.0,6.0,0,90.046512,52.8,4.2,12.8,34.3


In [18]:
X = []
y = []
for s in sectors:
    X.append(ais_learners[s][['Latitude', 'Longitude']])
    y.append(ais_learners[s][['sector_leave_lat', 'sector_leave_lon']])

In [22]:
from sklearn.model_selection import GroupKFold

cv_splits = []
for s in sectors:
    cv_splits.append(list(GroupKFold(n_splits = 10).split(X[s], y[s], groups = ais_learners[s]['TripID'])))

In [65]:
# Retrieve ONE simple train-test split (for each sector)
train_indices = []
test_indices = []
for s in sectors:
    a, b = cv_splits[s][0]
    train_indices.append(a)
    test_indices.append(b)

X_train = []
X_test = []
for s in sectors:
    X_train.append(X[s].iloc[train_indices[s]])
    X_test.append(X[s].iloc[test_indices[s]])

y_train = []
y_test = []
for s in sectors:
    y_train.append(y[s].iloc[train_indices[s]])
    y_test.append(y[s].iloc[test_indices[s]])

In [None]:
from sklearn.linear_model import LinearRegression

lin = LinearRegression()

lin_params = dict(normalize = [True, False])
lin_grid = GridSearchCV(lin, lin_params, cv = cv_splits, return_train_score = True, scoring = 'neg_mean_absolute_error')
lin_grid.fit(X, y)

pd.DataFrame(lin_grid.cv_results_)

In [None]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()

knn_params = dict(n_neighbors = range(1, 50), weights = ['uniform', 'distance'])
knn_rand = RandomizedSearchCV(knn, knn_params, cv = cv_splits, return_train_score = True, scoring = 'neg_mean_absolute_error', n_iter = 10)
knn_rand.fit(X, y)

pd.DataFrame(knn_rand.cv_results_)

In [None]:
knn_best = KNeighborsRegressor(n_neighbors = 41, weights = 'distance')
knn_best.fit(X_train, y_train)

knn_predicts = knn_best.predict(X_test)

In [None]:
knn_predicts_diffs = knn_predicts - y_test

In [None]:
knn_predicts_frame = pd.DataFrame({'correct': y_test, 'knn_predicted': knn_predicts, 'diff': knn_predicts_diffs})
knn_predicts_frame['abs_error'] = knn_predicts_frame['diff'].transform(lambda x: abs(x))
knn_predicts_frame = knn_predicts_frame.sort_values('correct')
knn_predicts_frame['abs_error'].describe()

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(knn_predicts_frame['correct'], knn_predicts_frame['diff'])

In [None]:
from sklearn.ensemble import RandomForestRegressor

rfo = RandomForestRegressor()
rfo_params = dict(n_estimators = range(1, 10), max_depth = range(5,15))
rfo_rand = RandomizedSearchCV(rfo, rfo_params, cv = cv_splits, return_train_score = True, scoring = 'neg_mean_absolute_error', n_iter = 10)
rfo_rand.fit(X, y)

pd.DataFrame(rfo_rand.cv_results_)

In [None]:
from sklearn.svm import SVR
clf = SVR(C=1.0, epsilon=0.2)
clf.fit(X, y) 

In [None]:
clf_predicts= clf.predict(X_test)
clf_predicts_diffs = clf_predicts - y_test

In [None]:
clf_predicts_frame = pd.DataFrame({'correct': y_test, 'clf_predicted': clf_predicts, 'diff': clf_predicts_diffs})
clf_predicts_frame['abs_error'] = clf_predicts_frame['diff'].transform(lambda x: abs(x))
clf_predicts_frame = clf_predicts_frame.sort_values('correct')
clf_predicts_frame['abs_error'].describe()

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(clf_predicts_frame['correct'], clf_predicts_frame['diff'])

In [None]:
# need to test different parameters

from sklearn.neural_network import MLPRegressor
mlp = MLPRegressor(hidden_layer_sizes=(7),max_iter=1000)
mlp.fit(X_train,y_train)


In [None]:
ann_predicts= mlp.predict(X_test)
ann_predicts_diffs = ann_predicts - y_test
ann_predicts_frame = pd.DataFrame({'correct': y_test, 'ann_predicted': ann_predicts, 'diff': ann_predicts_diffs})
ann_predicts_frame['abs_error'] = ann_predicts_frame['diff'].transform(lambda x: abs(x))
ann_predicts_frame = ann_predicts_frame.sort_values('correct')
ann_predicts_frame['abs_error'].describe()

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(ann_predicts_frame['correct'], ann_predicts_frame['diff'])