In [171]:
import pandas as pd
from matplotlib import pyplot
import numpy as np
import seaborn as sns
import math
import datetime
import pickle
import joblib

model_rfr = joblib.load('savemodel_rfr.sav')

dfmain = pd.read_csv("AIS_weather_H_ok2_copy.csv",parse_dates=["Time"])
dfmain = dfmain[dfmain['LAT'] > 55.04 ]

dfpre = pd.read_csv("AIS_weather_h_rename_copy.csv",parse_dates=["Time"])
dfpre = dfpre[dfpre['LAT'] > 55.04 ]

In [172]:
dfpre = dfpre.drop(dfpre.columns[[0,1,2,5,6,7,8,9,10,11,12,13,14,15,16,17,18,21,22,23,26]],axis=1)
dfpre = dfpre.rename({'Eastward wind': 'eastwind', 'Northward wind': 'northwind',
                           'Eastward current': 'eastcurrent', 'Northward current': 'northcurrent',
                           'SOG':'sog','COG':'cog','Heading':'heading'},axis=1) 
dfpre.head(n=5)

Unnamed: 0,eastwind,northwind,eastcurrent,northcurrent,sog,cog
0,2.267688,6.741482,0.11369,0.044994,0.0,358.9
1,1.782681,6.410275,0.11369,0.044994,0.0,358.9
2,1.154659,6.385135,0.083984,0.054,0.0,358.9
3,0.348801,6.094659,0.083984,0.054,0.0,358.9
4,-0.593128,6.153186,0.083984,0.054,0.0,358.9


In [173]:
dfpre["True North Wind Direction"] = np.rad2deg(np.arctan(dfpre["eastwind"]/dfpre["northwind"]))

In [174]:
dfpre["True North Current Direction"] = np.rad2deg(np.arctan(dfpre["eastcurrent"]/dfpre["northcurrent"]))

In [175]:
dfpre = dfpre.drop(['eastwind','northwind','eastcurrent','northcurrent','sog','cog'],axis=1)

In [176]:
df = pd.concat([dfmain,dfpre],axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,Time,LON,LAT,Temperature above oceans,Air density above oceans,Max wave height,Swell period,Wind wave period,Wave period,...,Heading,Wind Speed,Current Speed,True Wind Direction,True Current Direction,True Swell Direction,True Wind Wave Direction,True Wave Direction,True North Wind Direction,True North Current Direction
0,0,2021-01-01 00:00:00,14.689663,55.098452,277.311035,1.259323,1.893614,4.818881,3.818459,4.154329,...,174.0,7.112664,0.12227,155.40818,105.591753,22.85405,22.749664,22.610062,18.59182,68.408246
1,1,2021-01-01 01:00:00,14.689659,55.098451,277.328369,1.259985,1.752303,4.7558,3.470216,4.13058,...,174.0,6.653538,0.12227,158.458891,105.591753,26.122772,19.152008,22.209274,15.541109,68.408246
2,2,2021-01-01 02:00:00,14.689648,55.098468,277.390625,1.260244,1.619775,4.645083,3.259682,4.083755,...,174.0,6.488697,0.099846,163.749663,116.740448,28.498383,12.424912,20.857971,10.250337,57.259552
3,3,2021-01-01 03:00:00,14.689646,55.098471,277.411377,1.260148,1.518736,4.588384,3.177906,4.010502,...,174.0,6.104632,0.099846,170.724504,116.740448,29.956985,6.725922,18.732071,3.275496,57.259552
4,4,2021-01-01 04:00:00,14.689644,55.098462,277.430664,1.260634,1.455034,4.528841,3.281654,3.913362,...,174.0,6.181707,0.099846,179.50594,116.740448,34.486298,3.761627,15.73143,-5.50594,57.259552


In [177]:
# SOG threshold according to Abebe
df = df[df['SOG'] > 5 ]

In [178]:
# Exclude data for the month of June to check the model's forecasting performance 
dfdate6 = df[df['Time'].dt.strftime('%Y-%m') == '2021-06']
df = df[df['Time'].dt.strftime('%Y-%m') != '2021-06']

In [179]:
dfdate6 = dfdate6.drop(['Unnamed: 0','Time','LON','LAT','Air density above oceans',
'Surface pressure','Width','Length'],axis=1)
dfdate6.head(n=5)

Unnamed: 0,Temperature above oceans,Max wave height,Swell period,Wind wave period,Wave period,Sea surface temperature,Combined wind waves and swell height,Swell height,Wind wave height,Draught,...,Heading,Wind Speed,Current Speed,True Wind Direction,True Current Direction,True Swell Direction,True Wind Wave Direction,True Wave Direction,True North Wind Direction,True North Current Direction
119,275.823975,3.35808,6.738089,4.925452,5.291555,278.661621,1.759765,0.743164,1.58374,5.281861,...,100.782313,10.248966,0.144914,100.989288,164.871808,15.554049,66.843493,57.268527,21.771601,85.654121
120,275.924011,3.57812,7.185143,5.342444,5.697814,278.936035,1.950242,0.841925,1.757568,5.289403,...,96.934816,11.364678,0.24622,108.737155,153.650411,11.071672,60.455881,51.313932,25.671971,70.585227
121,276.285034,4.184914,7.260825,5.460486,6.126474,279.209961,2.189788,0.933936,1.910706,5.288446,...,97.525758,12.666946,0.284315,114.910729,153.031589,16.281358,57.086511,46.894106,32.436488,70.557348
122,277.189484,4.90868,6.808745,6.225745,6.284671,279.216553,2.56523,0.723303,2.452637,5.290781,...,106.941521,13.386999,0.300428,123.568984,153.643227,69.175152,55.507286,56.527939,50.510505,80.584748
126,277.408752,4.621826,6.813305,5.970984,6.291378,279.319824,2.414834,1.022894,2.130981,4.842367,...,227.680837,12.053293,0.262897,26.476633,41.766758,158.632374,159.050718,167.192888,74.157471,89.447596


In [180]:
dfdate6= dfdate6.rename({'Max wave height': 'waveheight', 'Draught': 'draught',
                           'SOG': 'sog', 'Wind Speed': 'windspeed', 
                           'True Wind Direction': 'truewinddir','Temperature above oceans' : 'oceantemperature',
                           'COG': 'cog', 'Current Speed' : 'curspeed','True Wave Direction' : 'truewavedir',
                            'Swell period': 'swellperiod','Wind wave period': 'windwaveperiod','Sea surface temperature': 'surftemp',
                            'Combined wind waves and swell height': 'windwaveswellheight','Swell height': 'swellheight','Wind wave height': 'windwaveheight',
                            'Heading': 'heading','True Current Direction': 'truecurrentdir','True Swell Direction': 'trueswelldir',
                            'True Wind Wave Direction': 'truewindwavedir','Wave period': 'waveperiod',
                            'True North Wind Direction' : 'truenorthwinddir' , 'True North Current Direction' : 'truenorthcurrentdir'
                           }, axis=1) 

In [181]:
import numpy as np
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=50)


In [182]:
imputer.fit(dfdate6)

In [183]:
xdate = imputer.transform(dfdate6)

In [184]:
dfdate6tr = pd.DataFrame(xdate, columns=dfdate6.columns, index=dfdate6.index)

In [185]:
x_date = dfdate6tr.drop(['sog','truenorthcurrentdir','truenorthwinddir'],axis=1)
y_date = dfdate6tr.sog

In [186]:
def predict_y(x_test,model_type):
    y_predicted = model_type.predict(x_test)
    return y_predicted

def display_scores(x_test,y_test):
    from sklearn.metrics import explained_variance_score,mean_absolute_error,mean_squared_error,median_absolute_error
    print("R^2 score (Indicate model fit. Best Score = 1):", model.score(x_test,y_test))
    print("Explained Variance EV (Indicate amount of variance in model. Best Score = 1):", explained_variance_score(y_test,y_predicted))
    print("Mean Absolute Error MAE (Indicate how much error a model makes in its prediction. Best Score = 0):", mean_absolute_error(y_test,y_predicted))
    print("Root Mean Square Error RMSE (Same as MAE, more sensitive to outlier. Best Score = 0):", mean_squared_error(y_test,y_predicted))
    print("Median Absolute Error MAD (Check robustness against outlier. Best Score = 1):", median_absolute_error(y_test,y_predicted))

y_predicted = predict_y(x_date,model_rfr)
display_scores(x_date,y_date)

R^2 score (Indicate model fit. Best Score = 1): 0.8655106540790448
Explained Variance EV (Indicate amount of variance in model. Best Score = 1): 0.8856947207468615
Mean Absolute Error MAE (Indicate how much error a model makes in its prediction. Best Score = 0): 0.8174891710427643
Root Mean Square Error RMSE (Same as MAE, more sensitive to outlier. Best Score = 0): 1.2635660377651237
Median Absolute Error MAD (Check robustness against outlier. Best Score = 1): 0.5814641679499957


In [187]:
ydate_pred = y_predicted
dfprog_sog = pd.Series(y_date,name="sog_act").to_frame()
dfprog_sog["sog_pred"] = ydate_pred
dfprog_sog["gamma"] = dfdate6tr["truenorthcurrentdir"]
dfprog_sog["nwinddir"] = dfdate6tr["truenorthwinddir"]

print(dfprog_sog)

        sog_act   sog_pred      gamma   nwinddir
119   18.998906  18.532150  85.654121  21.771601
120   19.334278  18.507278  70.585227  25.671971
121   18.911047  18.629329  70.557348  32.436488
122   13.854933  13.987405  80.584748  50.510505
126    9.305198  10.829396  89.447596  74.157471
...         ...        ...        ...        ...
7413  19.072622  18.308065 -75.033834 -69.040073
7414  17.696584  17.661892  86.330174 -53.162661
7415  15.963630  16.482205  83.486098 -60.981379
7418   8.988086   8.568891 -15.412087 -39.276623
7419  17.672769  17.378516   1.124001 -36.097446

[322 rows x 4 columns]


In [188]:
dfprog = pd.concat([x_date,dfprog_sog],axis=1)
dfprog.head(n=10)

Unnamed: 0,oceantemperature,waveheight,swellperiod,windwaveperiod,waveperiod,surftemp,windwaveswellheight,swellheight,windwaveheight,draught,...,curspeed,truewinddir,truecurrentdir,trueswelldir,truewindwavedir,truewavedir,sog_act,sog_pred,gamma,nwinddir
119,275.823975,3.35808,6.738089,4.925452,5.291555,278.661621,1.759765,0.743164,1.58374,5.281861,...,0.144914,100.989288,164.871808,15.554049,66.843493,57.268527,18.998906,18.53215,85.654121,21.771601
120,275.924011,3.57812,7.185143,5.342444,5.697814,278.936035,1.950242,0.841925,1.757568,5.289403,...,0.24622,108.737155,153.650411,11.071672,60.455881,51.313932,19.334278,18.507278,70.585227,25.671971
121,276.285034,4.184914,7.260825,5.460486,6.126474,279.209961,2.189788,0.933936,1.910706,5.288446,...,0.284315,114.910729,153.031589,16.281358,57.086511,46.894106,18.911047,18.629329,70.557348,32.436488
122,277.189484,4.90868,6.808745,6.225745,6.284671,279.216553,2.56523,0.723303,2.452637,5.290781,...,0.300428,123.568984,153.643227,69.175152,55.507286,56.527939,13.854933,13.987405,80.584748,50.510505
126,277.408752,4.621826,6.813305,5.970984,6.291378,279.319824,2.414834,1.022894,2.130981,4.842367,...,0.262897,26.476633,41.766758,158.632374,159.050718,167.192888,9.305198,10.829396,89.447596,74.157471
127,277.399658,3.852527,6.907508,5.299354,6.061778,279.230591,2.021695,1.143225,1.607422,4.84153,...,0.18146,49.650197,36.203894,98.603207,126.285782,119.939682,20.935741,17.424789,85.735437,72.289134
128,275.499359,3.304548,6.816074,4.561194,5.927387,286.705762,1.754722,1.136414,1.240662,4.85289,...,0.281402,11.124505,16.745666,170.252415,173.337517,176.253205,12.419853,12.235935,87.870444,60.000273
130,276.370789,2.99914,6.68077,3.821632,5.959699,286.763584,1.585114,1.306714,0.837463,4.923635,...,0.185268,121.750507,148.640604,55.948144,59.314576,54.778275,20.447721,16.448165,-84.704755,68.405148
131,277.268555,3.283719,6.891045,4.686806,5.876345,279.216553,1.731217,1.167188,1.231018,4.925344,...,0.133047,156.811217,154.9016,57.73244,24.484072,41.385363,18.702889,16.465161,82.800757,84.710374
134,277.220703,2.827531,6.624643,4.262856,5.585643,279.216553,1.490863,1.038464,1.032349,5.202804,...,0.139192,1.584058,10.96151,144.13538,178.761731,159.775304,16.155258,16.377091,-78.378834,89.075598


In [189]:
dfprog["vgms"] = dfprog["sog_pred"]/1.9438

In [190]:
rad_gamma = np.deg2rad(dfprog["gamma"])
rad_cog = np.deg2rad(dfprog["cog"])

In [191]:
dfprog["vgx"] = dfprog["vgms"] * np.sin(rad_cog)
dfprog["vcx"] = dfprog["curspeed"] * np.sin(rad_gamma)
dfprog["stw_x"] = (dfprog["vgx"] - dfprog["vcx"])

In [192]:
dfprog["vgy"] = dfprog["vgms"] * np.cos(rad_cog)
dfprog["vcy"] = dfprog["curspeed"] * np.cos(rad_gamma)
dfprog["stw_y"] = (dfprog["vgy"] - dfprog["vcy"])

In [193]:
dfprog["vwms_p"] = np.sqrt(dfprog["stw_x"]**2 + dfprog["stw_y"]**2)
dfprog["stw_pred"] = dfprog["vwms_p"]*1.9438
dfprog = dfprog.drop(['vgms','vgx','vcx','stw_x',
                      'vgy','vcy','stw_y',
                      'vwms_p'],axis=1)
#df_ship.head(n=5)
dfprog.head(n=10)

Unnamed: 0,oceantemperature,waveheight,swellperiod,windwaveperiod,waveperiod,surftemp,windwaveswellheight,swellheight,windwaveheight,draught,...,truewinddir,truecurrentdir,trueswelldir,truewindwavedir,truewavedir,sog_act,sog_pred,gamma,nwinddir,stw_pred
119,275.823975,3.35808,6.738089,4.925452,5.291555,278.661621,1.759765,0.743164,1.58374,5.281861,...,100.989288,164.871808,15.554049,66.843493,57.268527,18.998906,18.53215,85.654121,21.771601,18.263222
120,275.924011,3.57812,7.185143,5.342444,5.697814,278.936035,1.950242,0.841925,1.757568,5.289403,...,108.737155,153.650411,11.071672,60.455881,51.313932,19.334278,18.507278,70.585227,25.671971,18.086519
121,276.285034,4.184914,7.260825,5.460486,6.126474,279.209961,2.189788,0.933936,1.910706,5.288446,...,114.910729,153.031589,16.281358,57.086511,46.894106,18.911047,18.629329,70.557348,32.436488,18.148146
122,277.189484,4.90868,6.808745,6.225745,6.284671,279.216553,2.56523,0.723303,2.452637,5.290781,...,123.568984,153.643227,69.175152,55.507286,56.527939,13.854933,13.987405,80.584748,50.510505,13.409203
126,277.408752,4.621826,6.813305,5.970984,6.291378,279.319824,2.414834,1.022894,2.130981,4.842367,...,26.476633,41.766758,158.632374,159.050718,167.192888,9.305198,10.829396,89.447596,74.157471,10.532619
127,277.399658,3.852527,6.907508,5.299354,6.061778,279.230591,2.021695,1.143225,1.607422,4.84153,...,49.650197,36.203894,98.603207,126.285782,119.939682,20.935741,17.424789,85.735437,72.289134,17.710532
128,275.499359,3.304548,6.816074,4.561194,5.927387,286.705762,1.754722,1.136414,1.240662,4.85289,...,11.124505,16.745666,170.252415,173.337517,176.253205,12.419853,12.235935,87.870444,60.000273,12.570038
130,276.370789,2.99914,6.68077,3.821632,5.959699,286.763584,1.585114,1.306714,0.837463,4.923635,...,121.750507,148.640604,55.948144,59.314576,54.778275,20.447721,16.448165,-84.704755,68.405148,16.747957
131,277.268555,3.283719,6.891045,4.686806,5.876345,279.216553,1.731217,1.167188,1.231018,4.925344,...,156.811217,154.9016,57.73244,24.484072,41.385363,18.702889,16.465161,82.800757,84.710374,16.233139
134,277.220703,2.827531,6.624643,4.262856,5.585643,279.216553,1.490863,1.038464,1.032349,5.202804,...,1.584058,10.96151,144.13538,178.761731,159.775304,16.155258,16.377091,-78.378834,89.075598,16.108414
