### Here i will do some basic visualization on Geospatial data. The data that i am using here is a data from a shipping company which includes GPS tracking of their vessels and the route that the vessels have to follow during shipping.

In [2]:
import geopandas as gpd
import folium
import matplotlib.pyplot as plt
import pandas as pd
from shapely.geometry import Point,Polygon,LineString
from folium.plugins import FastMarkerCluster
from folium.features import GeoJson, GeoJsonTooltip, GeoJsonPopup
from keplergl import KeplerGl
import branca
import branca.colormap as cm

In [3]:
df = gpd.read_file('/Users/danielbeltsazar/Work/ZebraX/Coding/Geospatial/clean_result.csv')
df.head()

Unnamed: 0,datetime,txid,vessel_name,lat,lon,fleet_work_center,route_name,network_no,position,route_polygon,is_outside_route,deviation_in_meter,geometry
0,2020-08-08 14:15:00.000,T818,TB ENTEBE MEGASTAR 79,-1.7195,116.6212,TBMS79,TMCT-ADANGBAY,174000021674,POINT (116.6212 -1.7195),POLYGON ((116.16248591680016 -1.80794871782329...,1,76.17405892,
1,2020-08-08 16:30:00.000,T818,TB ENTEBE MEGASTAR 79,-1.7194,116.6208,TBMS79,TMCT-ADANGBAY,174000021674,POINT (116.6208 -1.7194),POLYGON ((116.16248591680016 -1.80794871782329...,1,62.87094379,
2,2020-08-08 14:00:00.000,T818,TB ENTEBE MEGASTAR 79,-1.7195,116.6212,TBMS79,TMCT-ADANGBAY,174000021674,POINT (116.6212 -1.7195),POLYGON ((116.16248591680016 -1.80794871782329...,1,76.17405892,
3,2020-08-08 16:15:00.000,T818,TB ENTEBE MEGASTAR 79,-1.7195,116.6208,TBMS79,TMCT-ADANGBAY,174000021674,POINT (116.6208 -1.7195),POLYGON ((116.16248591680016 -1.80794871782329...,1,73.91421159,
4,2020-08-08 13:45:00.000,T818,TB ENTEBE MEGASTAR 79,-1.7208,116.6206,TBMS79,TMCT-ADANGBAY,174000021674,POINT (116.6206 -1.7208),POLYGON ((116.16248591680016 -1.80794871782329...,1,216.34677472,


### We can see our data consists geometry value of POINT and POLYGON data. The Position column which consists POINT data refers to GPS Tracking of the vessels. The route_polygon column which consists POLYGON data referes to the route of the vessels.

### Here we convert the data type of some columns in our data so we can easily process it later if needed

In [4]:
df['lat']=df['lat'].astype("float")
df['lon']=df['lon'].astype("float")
df['deviation_in_meter']=df['deviation_in_meter'].astype("float")
df['network_no']=df['network_no'].astype('int')
df['is_outside_route']=df['is_outside_route'].astype('int')
df['datetime']=pd.to_datetime(df['datetime'])

In [5]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 541639 entries, 0 to 541638
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   datetime            541639 non-null  datetime64[ns]
 1   txid                541639 non-null  object        
 2   vessel_name         541639 non-null  object        
 3   lat                 541639 non-null  float64       
 4   lon                 541639 non-null  float64       
 5   fleet_work_center   541639 non-null  object        
 6   route_name          541639 non-null  object        
 7   network_no          541639 non-null  int64         
 8   position            541639 non-null  object        
 9   route_polygon       541639 non-null  object        
 10  is_outside_route    541639 non-null  int64         
 11  deviation_in_meter  541639 non-null  float64       
 12  geometry            0 non-null       geometry      
dtypes: datetime64[ns](1),

### We have at least 5 routes of the shipping

In [6]:
df['route_name'].value_counts()

TMCT-ADANGBAY         488552
ASAM-ASAM-REMBANG      24112
ASAM-ASAM-PAITON       18923
TARAHAN-LABUAN          5406
TARAHAN-PLTULONTAR      4646
Name: route_name, dtype: int64

## Plotting Function For Each Route

### Function

In [7]:
def plot_map(df,routename):
    df1 = df[df['route_name']==routename]
    df1 = df1.drop(['geometry'],axis=1)
    
    df1['position'] = gpd.GeoSeries.from_wkt(df1['position'])
    df1 = gpd.GeoDataFrame(df1, geometry='position')
    df1['route_polygon'] = gpd.GeoSeries.from_wkt(df1['route_polygon'])
    df1 = gpd.GeoDataFrame(df1, geometry='route_polygon')
    
    df1.crs = "epsg:4326"
    df1=df1.to_crs("EPSG:4326")
    
    colormap = cm.LinearColormap(colors=['yellow','red'], index=[0,15000],vmin=0,vmax=15000)
    dev = list(df1.deviation_in_meter)
    
    #x_start = (df1['lat'].max() + df1['lat'].min()) / 2
    #y_start = (df1['lon'].max() + df1['lon'].min()) / 2
    x_start = (df1['lat'].mean())
    y_start = (df1['lon'].mean())
    start_coord = (x_start, y_start)

    maps = folium.Map(location = start_coord,zoom_start = 8)
    geo_df1_list = [[point.xy[1][0], point.xy[0][0]] for point in df1.position]
    for coordinates,p in zip(geo_df1_list,dev):
        folium.CircleMarker(location = coordinates,radius=2,color=colormap(p)).add_to(maps)
    maps.add_child(colormap)
    folium.GeoJson(data=df1["route_polygon"]).add_to(maps)
    #maps.save(" maps {}.html ".format(routename))
    return maps

In [8]:
df['route_name'].value_counts()

TMCT-ADANGBAY         488552
ASAM-ASAM-REMBANG      24112
ASAM-ASAM-PAITON       18923
TARAHAN-LABUAN          5406
TARAHAN-PLTULONTAR      4646
Name: route_name, dtype: int64

### Then we try to plot one of the routes and the vessels' GPS tracking position along the route

In [9]:
map1 = plot_map(df,'TARAHAN-PLTULONTAR')

In [10]:
#map1

### I only inserted the snapshot picture of the map that is generated by folium below. It is because the python notebook will be too large for github to be rendered. The actual map is interactive as we can zoom in/out and slide the map.

![Map1.png](attachment:Map1.png)

### We can see that the blue line is the actual route that vessels must stick to during shipping. The yellow-red circle markers are the actual position from GPS tracking of the vessels. Yellow means that the deviation (the difference distance of the position of the vessels from their actual route or the positions where they should be) is very small and red means the deviation is very large. The minimum deviation is 0 meters and the maximum deviation is 15000 meters.


### Then we can plot other routes

In [12]:
map2 = plot_map(df,'TARAHAN-LABUAN')
#map2

![Map3.png](attachment:Map3.png)

### We can see from the map above that there are some vessels with deviate too far from their route. It is suspicious that the vessels' crew do some illegal trading during shipping in the ocean.

In [16]:
map3 = plot_map(df,'ASAM-ASAM-PAITON')
#map3

![Map4.png](attachment:Map4.png)