In [ ]:
# Install dependencies
# !pip install streamlit-folium

In [None]:
# Import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.io import output_file, show, save
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models import TabPanel, Tabs
import re
import matplotlib.dates as mdates
import folium
from pathlib import Path

In [None]:
# Read in clean data file - from data.world
clean_data_path = Path('Data_Files/[Clean] IMO Piracy - 2000 to 2022 (PDV 01-2023).csv')
piracy_df_original = pd.read_csv(clean_data_path)

In [None]:
# Drop null lat/long values for mapping data
piracy_df_map = piracy_df_original.dropna(subset=['Latitude','Longitude'])
piracy_df_map.head(10)

### Adding severity column to piracy_df_original and piracy_df_map

In [None]:
severe_df_1 = piracy_df_original[(piracy_df_original['Flag - Crew Deaths'] == True) | (piracy_df_original['Flag - Crew Missing'] == True)]
severe_df_1.loc[: , 'severity'] = 'high' 
#includes deaths and or missing crew members 

In [None]:
medium_df_1 = piracy_df_original[(piracy_df_original['Flag - Crew Assaulted'] == True) | (piracy_df_original['Flag - Crew Injuries'] == True) 
| (piracy_df_original['Flag - Crew Held Hostage'] == True)]
medium_df_1.loc[: , 'severity'] = 'medium'
#assaulted, injuries, and or held hostage

In [None]:
low_df_1 = piracy_df_original[(piracy_df_original['Flag - Crew Assaulted'] == False) & (piracy_df_original['Flag - Crew Injuries'] == False) 
& (piracy_df_original['Flag - Crew Held Hostage'] == False) & (piracy_df_original['Flag - Crew Deaths'] == False) 
& (piracy_df_original['Flag - Crew Missing'] == False)]
low_df_1.loc[: , 'severity'] = 'low'
#simply an incident no incidents to the crew 

In [None]:
dfs1 = [ low_df_1, medium_df_1, severe_df_1]

In [None]:
piracy_df_original = pd.concat(dfs1).reset_index(drop=True) 

In [None]:
piracy_df_original.head(10)

In [None]:
severe_df = piracy_df_map[(piracy_df_map['Flag - Crew Deaths'] == True) | (piracy_df_map['Flag - Crew Missing'] == True)]

In [None]:
severe_df.loc[: ,'severity'] = 'high'

In [None]:
medium_df = piracy_df_map[(piracy_df_map['Flag - Crew Assaulted'] == True) | (piracy_df_map['Flag - Crew Injuries'] == True) | (piracy_df_map['Flag - Crew Held Hostage'] == True)]

In [None]:
medium_df.loc[: , 'severity'] = 'medium'

In [None]:
low_df = piracy_df_map[(piracy_df_map['Flag - Crew Assaulted'] == False) & (piracy_df_map['Flag - Crew Injuries'] == False) & (piracy_df_map['Flag - Crew Held Hostage'] == False) & (piracy_df_map['Flag - Crew Deaths'] == False) & (piracy_df_map['Flag - Crew Missing'] == False)]

In [None]:
low_df.loc[: , 'severity'] = 'low'

In [None]:
dfs = [ low_df, medium_df, severe_df]

In [None]:
piracy_df_map = pd.concat(dfs).reset_index(drop=True) #combine all three map to create the mappable dataframe 

In [None]:
piracy_df_map.head(10)

### Folium map of severity levels 

In [None]:
# Create a map using Folium
map_center = [piracy_df_original['Latitude'].mean(), piracy_df_original['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=4)

# Add markers for vessel type, ship status, and country
for index, row in severe_df.iterrows():
    popup_text = f"Vessel Type: {row['Ship Type']}<br>Ship Status: {row['Ship Status']}<br>Ship Origin: {row['Ship Flag']}<br>Latitude:{row['Latitude']}<br>Longitude: {row['Longitude']}"
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']], radius=5, color='black', fill=True, fill_color='black', popup=popup_text).add_to(m)

for index, row in medium_df.iterrows():
    popup_text = f"Vessel Type: {row['Ship Type']}<br>Ship Status: {row['Ship Status']}<br>Ship Origin: {row['Ship Flag']}<br>Latitude:{row['Latitude']}<br>Longitude: {row['Longitude']}"
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']], radius=5, color='red', fill=True, fill_color='red', popup=popup_text).add_to(m)

for index, row in low_df.iterrows():
    popup_text = f"Vessel Type: {row['Ship Type']}<br>Ship Status: {row['Ship Status']}<br>Ship Origin: {row['Ship Flag']}<br>Latitude:{row['Latitude']}<br>Longitude: {row['Longitude']}"
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']], radius=5, color='yellow', fill=True, fill_color='yellow', popup=popup_text).add_to(m)

# Example box need to figure out exact area to investigate

top_left1 = [7, -1]  # west neg 
bottom_right1 = [-2,10]  # south eng 
Gulf_Guinea = folium.Rectangle(bounds=[top_left1, bottom_right1], color='black', fill=False)
Gulf_Guinea.add_to(m)

top_left2 = [20, 40] #west neg 
bottom_right2 = [10, 55.17] #south neg 
Gulf_Aden = folium.Rectangle(bounds=[top_left2, bottom_right2], color='black', fill=False)
Gulf_Aden.add_to(m)

top_left3 = [10, 93] #west neg 
bottom_right3 = [-10, 110] #south neg 
Strait_Malacca = folium.Rectangle(bounds=[top_left3, bottom_right3], color='black', fill=False)
Strait_Malacca.add_to(m)

# Legend
legend_html = '''
     <div style="position: fixed; bottom: 50px; left: 50px; width: 200px; height: 120px; background-color: white; border:2px solid grey; z-index:9999; font-size:14px;">
     <p><strong>Legend</strong></p>
     <p><i class="fa fa-circle fa-1x" style="color:black"></i> Severe</p>
     <p><i class="fa fa-circle fa-1x" style="color:red"></i> Medium</p>
     <p><i class="fa fa-circle fa-1x" style="color:yellow"></i> Low</p>
     </div>
     '''
m.get_root().html.add_child(folium.Element(legend_html))

# Save the map
# m.save(Path('./Data_Files/piracy_map.html'))

In [None]:
# Show map
m

In [None]:
# Big picture 

# Assuming piracy_original_df is your DataFrame containing the piracy incidents data
# Group the data by severity level and vessel type, and count the occurrences
severity_vessel_counts = piracy_df_original.groupby(['Consequences to Crew', 'Ship Type']).size().unstack(fill_value=0)

# Plot the bar graph
severity_vessel_counts.plot(kind='bar', stacked=True)
plt.xlabel('Severity Level')
plt.ylabel('Count')
plt.title('Piracy Incidents by Severity Level and Vessel Type')
plt.legend(title='Vessel Type')
#plt.show()
#Can see this is not feasible for analysis 

In [None]:
vessel_type_mapping = {
    'Cargo Ship': ['General cargo ship', 'Container ship', 'Container', 'General dry cargo ship', 'Bulk carrier', 'Bulk Carrier', 
                    'Vehicle carrier', 'Ro-ro-cargo ship','Ro-Ro-cargo ship', 'Refrigerated cargo carrier', 
                    'Livestock carrier', 'Reefer', 'Cable-Layer', 'Ore/bulk/oil carrier', 
                    'Ore/bulk/oil Carrier',  'Cellular Container ship', 'Cargo ship', 'Ro-Ro cargo ship',
                    'Cargo Ship', 'Log carrier', 'Unitised vessel','Cement carrier','Heavy load carrier'
                   'Barge carrier', 'Supply ship', 'Supply ship', 'Offshore tug/Supply ship', 'Wood chip carrier',
                  'Bulk and container carrier', 'Multipurpose cargo ship', 'Feeder container', 'Container Ship', 'Combination carrier',
                  'Steel-Bulk Carrier', 'Flush-tween/3 decker', 'Dry bulk carrier', 'Freighter/Log carrier', 'Dry cargo ship', 'Refrigerated cargo ship',
                  'Container/General cargo ship', 'Multipurpose tweendecker', 'Reefer/Container ship', 'Log/Bulk carrier', 
                  'General cargo', 'Car carrier', 'Flat-top cargo barge', 'Ro-ro ship', 'Offshore Supply ship'],
    
    'Fishing': ['Fishing vessel', 'Fishing trawler', 'Fishing vessels', 'Fishing boat'],

    'Non Commercial Ship': ['Research ship','Rescue/standby ship', 'Factory ship'],

    'Passenger Ship': ['Dhow','Ro-ro passenger vessel', 'Ferry', 'Passenger ship', 'Yacht', 'Ro-Ro ferry passenger ship'],

    'Service Ship': ['Salvage tug', 'Tug', 'Tug/lighter', 'Towing tug', 'Offshore Support Vessel'],

    'Tanker': ['Tanker', 'Tanker ship', 'Chemical tanker', 'Chemical Tanker', 'Oil product tanker', 'Product tanker', 'Product Tanker', 
                'LPG tanker', 'Oil tanker', 'Oil Tanker','Gas carrier-LPG', 'Gas carrier - LPG', 'LPG', 'Gas carrier - LNG','Gas carrier - non-specified',
                'Crude Oil tanker', 'Ore/Bulk/oil carrier', 'Gas carrier', 'Liquefied gas carrier', 'Gas carrier/tanker',
              'Chemical carrier', 'Gas/Oil tanker', 'Ore/Bulk/Oil carrier', 'Barge oil', 'Motor tanker', 'Liquefied Gas/Oil tanker', 
              'Oil/Chemical tanker', 'Product /Motor tanker', 'Product/Motor tanker', 'Chemical/oil tanker', 
              'VLCC', 'Tanker VLCC', 'Tanker (VLCC)', 'Oil Products Tanker'],

    'Other': ['Cutter/dredger', 'Barge', 'Mobile offshore drilling unit', 'Refrigerated ship', 'Reefer', 'Boat', 'Barter Trade Boat', 
                    'Multi purpose ship','Multipurpose','Special purpose ship', 'Landing craft', 'Motor yacht', 'Hopper/Dregger', 'Dredger',
             'Navy ship', 'Coast Guard ship', 'Coaster', 'Trading ship', 'Warship', 'Speedboat', 'Catamaran', 'Offshore Barge carrier'],

    'Unknown': ['Not Reported', 'Unknown', 'Panama', None]}

In [None]:
# Function to map vessel types to categories
def map_vessel_type(vessel_type):
    for category, types in vessel_type_mapping.items():
        if vessel_type in types:
            return category

In [None]:
# Map the categories to the new types 
piracy_df_original['Vessel Category'] = piracy_df_original['Ship Type'].map(map_vessel_type)

In [None]:
#strait of malacca 
min_lat = -10
max_lat = 10
min_lon = 93
max_lon = 150.0
strait_malacca_df = piracy_df_map[(piracy_df_map['Latitude'] >= min_lat) & (piracy_df_map['Latitude'] <= max_lat) &
                                  (piracy_df_map['Longitude'] >= min_lon) & (piracy_df_map['Longitude'] <= max_lon)]

In [None]:
strait_malacca_df.loc[ : , 'Vessel Category'] = strait_malacca_df['Ship Type'].map(map_vessel_type)


In [None]:
#Gulf Aden  
min_lat = 10
max_lat = 20
min_lon = 93
max_lon = 110.0
gulf_of_eden_df = piracy_df_map[(piracy_df_map['Latitude'] >= min_lat) & (piracy_df_map['Latitude'] <= max_lat) &
                                  (piracy_df_map['Longitude'] >= min_lon) & (piracy_df_map['Longitude'] <= max_lon)]

In [None]:
gulf_of_eden_df.loc[ : , 'Vessel Category'] = gulf_of_eden_df['Ship Type'].map(map_vessel_type)

In [None]:
#Gulf of Guinea
min_lat = -2
max_lat = 7
min_lon = -1
max_lon = 10
gulf_of_guinea_df = piracy_df_map[(piracy_df_map['Latitude'] >= min_lat) & (piracy_df_map['Latitude'] <= max_lat) &
                                  (piracy_df_map['Longitude'] >= min_lon) & (piracy_df_map['Longitude'] <= max_lon)]

In [None]:
gulf_of_guinea_df.loc[ : , 'Vessel Category'] = gulf_of_guinea_df['Ship Type'].map(map_vessel_type)

### All DataFrames created:

In [None]:
###The specific dataframes for our box areas.
#piracy_df_original 
# original dataframe that includes all events regardless of whether lat/longs are available
#piracy_df_map
#contains only data that includes lat/long in order to plot on map
#strait_malacca_df
#gulf_of_eden_df
#gulf_of_guinea_df

### Big Picture 

In [None]:
#Bid picture across the globe. 

C_Crew_vessel_cat = piracy_df_original.groupby(['Consequences to Crew', 'Vessel Category']).size().unstack(fill_value=0)

# Plot the bar graph
C_Crew_vessel_cat.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Vessel Category')
plt.legend(title='Vessel Category')
plt.show()

In [None]:
C_Crew_wpns_used = piracy_df_original.groupby(['Consequences to Crew', 'Weapons Used']).size().unstack(fill_value=0)

# Plot the bar graph
C_Crew_wpns_used.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Weapons Used')
plt.legend(title='Weapons used')
plt.show()

In [None]:
#Bid picture across the globe. 
severity_vessel_counts = piracy_df_original.groupby(['Consequences to Crew', 'Vessel Category']).size().unstack(fill_value=0)

# Plot the bar graph
severity_vessel_counts.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Vessel Category')
plt.legend(title='Vessel Category')
plt.show()

### By vessel category Bokeh

In [None]:
SM_vessel_counts = strait_malacca_df.groupby(['Consequences to Crew', 'Vessel Category']).size().unstack(fill_value=0)

# Plot the bar graph
SM_vessel_counts.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Vessel Type in the Strait of Malacca ')
plt.legend(title='Vessel Category')
plt.show()

In [None]:
GE_vessel_counts = gulf_of_eden_df.groupby(['Consequences to Crew', 'Vessel Category']).size().unstack(fill_value=0)

# Plot the bar graph
GE_vessel_counts.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Vessel Type in the Gulf of Aden ')
plt.legend(title='Vessel Category')
plt.show()

In [None]:
GG_vessel_counts = gulf_of_guinea_df.groupby(['Consequences to Crew', 'Vessel Category']).size().unstack(fill_value=0)

# Plot the bar graph
GG_vessel_counts.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Vessel Type in the Gulf of Guinea')
plt.legend(title='Vessel Category')
plt.show()

In [None]:
###GG
gulf_of_guinea_source = ColumnDataSource(data=dict(
    x=GG_vessel_counts.index.tolist(),
    Service_ships=GG_vessel_counts['Service Ship'].tolist(),
    Cargo_ships=GG_vessel_counts['Cargo Ship'].tolist(),
    Tankers=GG_vessel_counts['Tanker'].tolist(),
    Unknown=GG_vessel_counts['Unknown'].tolist(),
    Fishing=GG_vessel_counts['Fishing'].tolist(),
    All_others=GG_vessel_counts['Other'].tolist()
))
v1 = figure(x_range=GG_vessel_counts.index.tolist(), title="Piracy Incidents by Consequences to Crew and Vessel Category in the Gulf of Guinea", height=800, width=800)

v1.vbar_stack(['Service_ships', 'Cargo_ships', 'Tankers', 'Unknown', 'Fishing',
       'All others'], x='x', color=['blue', 'green', 'red', 'purple', 'orange', 'yellow'], width=0.5, source=gulf_of_guinea_source, legend_label=['Service ships', 'Cargo ships', 'Tankers', 'Unknown', 'Fishing',
       'All others'])
v1.xaxis.major_label_orientation = "vertical"

####GE
gulf_of_eden_source = ColumnDataSource(data=dict(
    x=GE_vessel_counts.index.tolist(),
    Service_ships=GE_vessel_counts['Service Ship'].tolist(),
    Cargo_ships=GE_vessel_counts['Cargo Ship'].tolist(),
    Tankers=GE_vessel_counts['Tanker'].tolist(),
    Unknown=GE_vessel_counts['Unknown'].tolist(),
    All_others=GE_vessel_counts['Other'].tolist()
)) #no fishing vessels to account for 
v2 = figure(x_range=GE_vessel_counts.index.tolist(), title="Piracy Incidents by Consequences to Crew and Vessel Category in the Gulf of Aden", height=800, width=800)

v2.vbar_stack(['Service_ships', 'Cargo_ships', 'Tankers', 'Unknown', 'Fishing',
       'All others'], x='x', width=0.5, color=['blue', 'green', 'red', 'purple', 'orange', 'yellow'],source=gulf_of_eden_source, legend_label=['Service ships', 'Cargo ships', 'Tankers', 'Unknown', 'Fishing','All others'])
v2.xaxis.major_label_orientation = "vertical"
#######SM
strait_malacca_source = ColumnDataSource(data=dict(
    x=SM_vessel_counts.index.tolist(),
    Service_ships=SM_vessel_counts['Service Ship'].tolist(),
    Cargo_ships=SM_vessel_counts['Cargo Ship'].tolist(),
    Tankers=SM_vessel_counts['Tanker'].tolist(),
    Unknown=SM_vessel_counts['Unknown'].tolist(),
    Fishing=SM_vessel_counts['Fishing'].tolist(),
    All_others=SM_vessel_counts['Other'].tolist()
))
v3 = figure(x_range=SM_vessel_counts.index.tolist(), title="Piracy Incidents by Consequences to Crew and Vessel Category in the Strait of Malacca", height=800, width=800)

v3.vbar_stack(['Service_ships', 'Cargo_ships', 'Tankers', 'Unknown', 'Fishing',
       'All_others'], x='x', width=0.5, color=['blue', 'green', 'red', 'purple', 'orange', 'yellow'],source=strait_malacca_source, legend_label=['Service Ships', 'Cargo Ships', 'Tankers', 'Unknown', 'Fishing', 'Others'])
v3.xaxis.major_label_orientation = "vertical"
#MAKE TABS 
tab_1 = TabPanel(child=v1, title="Gulf of Guinea")
tab_2= TabPanel(child=v2, title="Gulf of Aden")
tab_3= TabPanel(child=v3, title="Strait of Malacca")
tabs_vessel= Tabs(tabs=[tab_1, tab_2, tab_3])
show(tabs_vessel)

# Save to file
# output_file(Path("./Results/vessel_cat_tabs.html"))
#save(tabs_vessel, filename=Path("./Results/vessel_cat_tabs.html"))

### By Wpns Used  Bokeh

In [None]:
SM_wpns = strait_malacca_df.groupby(['Consequences to Crew', 'Weapons Used']).size().unstack(fill_value=0)
#add better color scheme 

# Plot the bar graph
SM_wpns.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Weapons Used in the Strait of Malacca ')
plt.legend(title='Weapons Used')
plt.show()

In [None]:
GG_wpns = gulf_of_guinea_df.groupby(['Consequences to Crew', 'Weapons Used']).size().unstack(fill_value=0)

# Plot the bar graph
GG_wpns.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Weapons Used in the GG ')
plt.legend(title='Weapons Used')
plt.show()

In [None]:
GE_wpns = gulf_of_eden_df.groupby(['Consequences to Crew', 'Weapons Used']).size().unstack(fill_value=0)

# Plot the bar graph
GE_wpns.plot(kind='bar', stacked=True)
plt.xlabel('Consequences to Crew')
plt.ylabel('Count')
plt.title('Piracy Incidents by Consequences to Crew and Weapons Used in the GE ')
plt.legend(title='Weapons Used')
plt.show()

In [None]:
gulf_of_eden_df['Weapons Used'].unique()

In [None]:
###GG
gulf_of_guinea_source = ColumnDataSource(data=dict(
    x=GG_wpns.index.tolist(),
    Other_Weapon=GG_wpns['Other Weapon'].tolist(),
    None_or_Not_Reported=GG_wpns['None or Not Reported'].tolist(),
    Knives=GG_wpns['Knives'].tolist(),
    Firearms=GG_wpns['Firearms'].tolist(),
)) 
a1 = figure(x_range=GG_wpns.index.tolist(), title="Piracy Incidents by Consequences to Crew and Weapons Used in the Gulf of Guinea", height=800, width=800)

a1.vbar_stack(['Other_Weapon', 'None_or_Not_Reported', 'Knives', 'Firearms'], x='x', color=['blue', 'green', 'red', 'purple'], width=0.5, source=gulf_of_guinea_source, 
              legend_label=['Other Weapon', 'None or Not Reported', 'Knives', 'Firearms'])
a1.xaxis.major_label_orientation = "vertical"

####GE
gulf_of_eden_source = ColumnDataSource(data=dict(
    x=GE_wpns.index.tolist(),
    Other_Weapon=GE_wpns['Other Weapon'].tolist(),
    None_or_Not_Reported=GE_wpns['None or Not Reported'].tolist(),
    Knives=GE_wpns['Knives'].tolist(),
    Firearms=GE_wpns['Firearms'].tolist(),
)) 
a2 = figure(x_range=GE_wpns.index.tolist(), title="Piracy Incidents by Consequences to Crew and Weapons Used in the Gulf of Aden", height=800, width=800)

a2.vbar_stack(['Other_Weapon', 'None_or_Not_Reported', 'Knives', 'Firearms'], x='x', color=['blue', 'green', 'red', 'purple'], width=0.5, source=gulf_of_eden_source, 
              legend_label=['Other Weapon', 'None or Not Reported', 'Knives', 'Firearms'])
a2.xaxis.major_label_orientation = "vertical"

#######SM
strait_malacca_source = ColumnDataSource(data=dict(
    x=SM_wpns.index.tolist(),
    Other_Weapon=SM_wpns['Other Weapon'].tolist(),
    None_or_Not_Reported=SM_wpns['None or Not Reported'].tolist(),
    Knives=SM_wpns['Knives'].tolist(),
    Firearms=SM_wpns['Firearms'].tolist(),
)) 
a3 = figure(x_range=SM_wpns.index.tolist(), title="Piracy Incidents by Consequences to Crew and Weapons Used in the Strait of Malacca", height=800, width=800)

a3.vbar_stack(['Other_Weapon', 'None_or_Not_Reported', 'Knives', 'Firearms'], x='x', color=['blue', 'green', 'red', 'purple'], width=0.5, source=strait_malacca_source, 
              legend_label=['Other Weapon', 'None or Not Reported', 'Knives', 'Firearms'])
a3.xaxis.major_label_orientation = "vertical"


#tabs
tab_1_ = TabPanel(child=a1, title="Gulf of Guinea")
tab_2_= TabPanel(child=a2, title="Gulf of Aden")
tab_3_= TabPanel(child=a3, title="Strait of Malacca")
tabs_wpns= Tabs(tabs=[tab_1_, tab_2_, tab_3_])
show(tabs_wpns)
output_file(Path("./Results/wpns_tabs.html"))
# save(tabs_wpns, filename=Path("./Results/wpns_tabs.html"))

### Bokeh Severity by Ship Flag 

In [None]:
strait_malacca_SF_severity = strait_malacca_df.groupby(['Ship Flag', 'severity']).size().unstack(fill_value=0)


colors = {'high': 'black', 'medium': 'red', 'low': 'yellow'}

# Plot the bar graph with custom colors
ax1 = strait_malacca_SF_severity.plot(kind='bar', figsize = (10,5), stacked=True, color=[colors[col] for col in strait_malacca_SF_severity.columns])

plt.xlabel('Severity Level')
plt.ylabel('Count')
plt.title('Piracy Incidents by Ship Flag and Severity Level Used in the Strait of Malacca ')
high_patch = plt.Line2D([0], [0], color='black', lw=4)
medium_patch = plt.Line2D([0], [0], color='red', lw=4)
low_patch = plt.Line2D([0], [0], color='yellow', lw=4)

plt.legend([high_patch, medium_patch, low_patch], ['High', 'Medium', 'Low'], title='Severity Level')

#plt.legend(title='Severity Level')
plt.show()

In [None]:
gulf_of_eden_SF_severity = gulf_of_eden_df.groupby(['Ship Flag', 'severity']).size().unstack(fill_value=0)

#severity_vessel_counts.plot(kind='bar', stacked=True)
colors = {'high': 'black', 'medium': 'red', 'low': 'yellow'}

# Plot the bar graph with custom colors
ax2 = gulf_of_eden_SF_severity.plot(kind='bar', stacked=True, color=[colors[col] for col in gulf_of_eden_SF_severity.columns])

plt.xlabel('Severity Level')
plt.ylabel('Count')
plt.title('Piracy Incidents by Ship Flag and Severity Level Used in the Gulf of Aden ')
plt.legend(title='Severity Level')
plt.show()

In [None]:
gulf_of_guinea_SF_severity = gulf_of_guinea_df.groupby(['Ship Flag', 'severity']).size().unstack(fill_value=0)

#severity_vessel_counts.plot(kind='bar', stacked=True)
colors = {'high': 'black', 'medium': 'red', 'low': 'yellow'}

# Plot the bar graph with custom colors
ax3 = gulf_of_guinea_SF_severity.plot(kind='bar', figsize = (10,5), stacked=True, color=[colors[col] for col in gulf_of_guinea_SF_severity.columns])

# Plot the bars again to add black borders

plt.xlabel('Severity Level')
plt.ylabel('Count')
plt.title('Piracy Incidents by Ship Flag and Severity Level in the Gulf of Guinea')
plt.legend(title='Severity Level')
plt.show()

In [None]:
gulf_of_guinea_source = ColumnDataSource(data=dict(
    x=gulf_of_guinea_SF_severity.index.tolist(),
    high=gulf_of_guinea_SF_severity['high'].tolist(),
    medium=gulf_of_guinea_SF_severity['medium'].tolist(),
    low=gulf_of_guinea_SF_severity['low'].tolist()
))
p1 = figure(x_range=gulf_of_guinea_SF_severity.index.tolist(), title="Piracy Incidents by Ship Flag and Severity Level in the Gulf of Guinea", height=350, width=1000)
p1.vbar_stack(['high', 'medium', 'low'], x='x', width=0.5, color=['black', 'red', 'yellow'], source=gulf_of_guinea_source, legend_label=['High', 'Medium', 'Low'])
p1.xaxis.major_label_orientation = "vertical"

gulf_of_eden_source = ColumnDataSource(data=dict(
    x=gulf_of_eden_SF_severity.index.tolist(),
    medium=gulf_of_eden_SF_severity['medium'].tolist(),
    low=gulf_of_eden_SF_severity['low'].tolist()
))

p2 = figure(x_range=gulf_of_eden_SF_severity.index.tolist(), title="Piracy Incidents by Ship Flag and Severity Level in the Gulf of Aden", height=350, width=1000)
p2.vbar_stack(['high', 'medium', 'low'], x='x', width=0.5, color=['black', 'red', 'yellow'], source=gulf_of_eden_source, legend_label=['High', 'Medium', 'Low'])
p2.xaxis.major_label_orientation = "vertical"

strait_malacca_source = ColumnDataSource(data=dict(
    x=strait_malacca_SF_severity.index.tolist(),
    high=strait_malacca_SF_severity['high'].tolist(),
    medium=strait_malacca_SF_severity['medium'].tolist(),
    low=strait_malacca_SF_severity['low'].tolist()
))

p3 = figure(x_range=strait_malacca_SF_severity.index.tolist(), title="Piracy Incidents by Ship Flag and Severity Level in the Strait of Malacca", height=350, width=1000)
p3.vbar_stack(['high', 'medium', 'low'], x='x', width=0.5, color=['black', 'red', 'yellow'], source=strait_malacca_source, legend_label=['High', 'Medium', 'Low'])
p3.xaxis.major_label_orientation = "vertical"

tab_one = TabPanel(child=p1, title="Gulf of Guinea")
tab_two = TabPanel(child=p2, title="Gulf of Aden")
tab_three = TabPanel(child=p3, title="Strait of Malacca")
tabs_severity = Tabs(tabs=[tab_one, tab_two, tab_three])
show(tabs_severity)

# Save to disk
# output_file(Path("./Results/severity_tabs.html"))
# save(tabs_severity, filename=Path("./Results/severity_tabs.html"))

# Import and Clean Dirty Data Frame
## (after language-processing model)

In [None]:
# read in dirty data as a pandas data frame
model_output_path = Path('./Data_Files/Model_Output.csv')
model_output_df = pd.read_csv(model_output_path)
model_output_df

In [None]:
# add column called 'Vessel_Type' that generalizes the "Ship Type' into 8 categories
vessel_type_mapping_inverted = {v: k for k, vl in vessel_type_mapping.items() for v in vl}

model_output_df['Vessel_Type'] = model_output_df['Ship Type'].replace(vessel_type_mapping_inverted)

In [None]:
# change date into a datetime object
model_output_df['Date'] = pd.to_datetime(model_output_df['Date'])

In [None]:
# convert lat/lon to decimal
def dms_to_decimal(loc_string):
    '''Converts coordinates in dms to decimals.'''
    pattern = '[\d]+[.]*[\d]*'
    direction = loc_string[-1:]
    matches = re.findall(pattern, loc_string)
    degrees = int(matches[0])
    minutes = float(matches[1])
    
    if direction in ['S', 'W']:
        direction = -1
    else: direction = 1
    
    return (degrees + minutes/60) * direction

model_output_df['Lat_Dec'] = model_output_df['Latitude'].dropna().apply(dms_to_decimal)
model_output_df['Lon_Dec'] = model_output_df['Longitude'].dropna().apply(dms_to_decimal)
model_output_df

In [None]:
# Use ISO Number to determine Ship Flag
imos_df = pd.read_csv(Path('Data_Files/imo-vessel-codes.csv'))
iso2_df = pd.read_csv(Path('Data_Files/cocom_countries.csv'))

# rename column in iso df to merge with the imos_df
iso2_df = iso2_df.rename(columns={'iso_2': 'flag'})

# adds 'country' column to iso df
iso_flag_df = imos_df.merge(iso2_df[['flag', 'country']], on='flag', how='left')

# ensures NaN flag values arent changed to 'Namibia'
iso_flag_df.loc[iso_flag_df['flag'].isnull(), 'country'] = None

# rename imo column to comply with name of column in dirty_pirate_df
iso_flag_df = iso_flag_df.rename(columns={'imo': 'IMO No.'})

# change imo data type to string
iso_flag_df['IMO No.'] = iso_flag_df['IMO No.'].astype('str')

# change imo data type to string
model_output_df['IMO No.'] = model_output_df['IMO No.'].astype('str')

# adds 'country' column to dirty_pirate_df
model_output_df = model_output_df.merge(iso_flag_df[['IMO No.', 'country']], on='IMO No.', how='left')

In [None]:
# Drop null values for lat/lon for mapping data
map_dirty_pirate_df = model_output_df.dropna(subset=['Lat_Dec', 'Lon_Dec']) #drop lat/long nulls for a map

In [None]:
#Strait of Malacca - from DIRTY data
min_lat = -10
max_lat = 10
min_lon = 93
max_lon = 150.0

# Filter the DataFrame based on the range of latitude and longitude values make sure to use piracy_df_map bc null values will mess up the dataframe
dirty_strait_malacca_df = map_dirty_pirate_df[(map_dirty_pirate_df['Lat_Dec'] >= min_lat) & (map_dirty_pirate_df['Lat_Dec'] <= max_lat) &
                                  (map_dirty_pirate_df['Lon_Dec'] >= min_lon) & (map_dirty_pirate_df['Lon_Dec'] <= max_lon)]

In [None]:
#Gulf of Eden - from DIRTY data
min_lat = 10
max_lat = 20
min_lon = 93
max_lon = 110.0

# Filter the DataFrame based on the range of latitude and longitude values make sure to use piracy_df_map bc null values will mess up the dataframe
dirty_gulf_of_eden_df = map_dirty_pirate_df[(map_dirty_pirate_df['Lat_Dec'] >= min_lat) & (map_dirty_pirate_df['Lat_Dec'] <= max_lat) &
                                  (map_dirty_pirate_df['Lon_Dec'] >= min_lon) & (map_dirty_pirate_df['Lon_Dec'] <= max_lon)]


In [None]:
#Gulf of Guinea - from DIRTY data
min_lat = -2
max_lat = 7
min_lon = -1
max_lon = 10

# Filter the DataFrame based on the range of latitude and longitude values make sure to use piracy_df_map bc null values will mess up the dataframe
dirty_gulf_of_guinea_df = map_dirty_pirate_df[(map_dirty_pirate_df['Lat_Dec'] >= min_lat) & (map_dirty_pirate_df['Lat_Dec'] <= max_lat) &
                                  (map_dirty_pirate_df['Lon_Dec'] >= min_lon) & (map_dirty_pirate_df['Lon_Dec'] <= max_lon)]

In [None]:
# Count the occurrence of each country and sort by country name
country_counts = dirty_gulf_of_guinea_df['country'].value_counts().sort_index()

# Create a histogram
plt.figure(figsize=(10,6))
country_counts.plot(kind='bar')
plt.title('Piracy Incidents by Ship Flag - Gulf of Guinea')
plt.xlabel('Country')
plt.ylabel('Number of Incidents')
plt.xticks(rotation=90)  # Rotate x-axis labels for better visibility
plt.grid(axis='y')
plt.show()

In [None]:
# change date into a datetime object in the clean dfs
strait_malacca_df.loc[:,'Incident Date'] = pd.to_datetime(strait_malacca_df['Incident Date'])
gulf_of_eden_df.loc[:,'Incident Date'] = pd.to_datetime(gulf_of_eden_df['Incident Date'])
gulf_of_guinea_df.loc[:,'Incident Date'] = pd.to_datetime(gulf_of_guinea_df['Incident Date'])

In [None]:
# Histogram that compares all incidents (dirty vs clean) for each country

# Count the occurrence of each country and sort by country name
dirty_counts = dirty_gulf_of_guinea_df['country'].value_counts().sort_index()
clean_counts = gulf_of_guinea_df['Ship Flag'].value_counts().sort_index()

# Create a DataFrame for seaborn
df = pd.concat([dirty_counts.rename('dirty_counts'), clean_counts.rename('clean_counts')], axis=1)

# Create a histogram
plt.figure(figsize=(10,6))

# Calculate the width of each bar
width = 0.4

# Create an array with the position of each bar along the x-axis
r1 = np.arange(len(df['dirty_counts']))
r2 = [x + width for x in r1]

# Plot bars
plt.bar(r1, df['dirty_counts'], color='blue', width=width, label='dirty_gulf_of_guinea_df')
plt.bar(r2, df['clean_counts'], color='orange', width=width, label='gulf_of_guinea_df')

# Add xticks on the middle of the group bars
plt.xlabel('Country', fontweight='bold')
plt.ylabel('Number of Incidents')
plt.xticks([r + width / 2 for r in range(len(df['dirty_counts']))], df.index, rotation=90)

plt.title('Piracy Incidents by Ship Flag - Gulf of Guinea')
plt.legend()
plt.show()


In [None]:
# Histogram of the top 10 countries (dirty vs clean) involved in incidents
# Region: Gulf of Guinea

# Count the occurrence of each country and get the top 10
dirty_counts = dirty_gulf_of_guinea_df['country'].value_counts().nlargest(10).sort_index()
clean_counts = gulf_of_guinea_df['Ship Flag'].value_counts().nlargest(10).sort_index()

# Create a DataFrame for seaborn
df = pd.concat([dirty_counts.rename('dirty_counts'), clean_counts.rename('clean_counts')], axis=1)

# Create a histogram
plt.figure(figsize=(10,7))

# Calculate the width of each bar
width = 0.4

# Create an array with the position of each bar along the x-axis
r1 = np.arange(len(df['dirty_counts']))
r2 = [x + width for x in r1]

# Plot bars
plt.bar(r1, df['dirty_counts'], color='olivedrab', width=width, label='Dirty Data Set')
plt.bar(r2, df['clean_counts'], color='steelblue', width=width, label='Clean Data Set')

# Add xticks on the middle of the group bars
plt.xlabel('Country', fontsize=16)
plt.ylabel('Number of Incidents', fontsize=16)
plt.xticks([r + width / 2 for r in range(len(df['dirty_counts']))], df.index, rotation=45, fontsize=14, ha='right')

plt.title('Piracy Incidents by Ship Flag - Gulf of Guinea\nTop 10 Countries', fontsize=18)
plt.legend()
plt.grid(axis='y')
plt.tight_layout()
# plt.savefig(Path('./Results/'hist_gog.png'))
plt.show()

In [None]:
# Histogram of the top 10 countries (dirty vs clean) involved in incidents
# Region: Gulf of Aden

# Count the occurrence of each country and get the top 10
dirty_counts = dirty_gulf_of_eden_df['country'].value_counts().nlargest(10).sort_index()
clean_counts = gulf_of_eden_df['Ship Flag'].value_counts().nlargest(10).sort_index()

# Create a DataFrame for seaborn
df = pd.concat([dirty_counts.rename('dirty_counts'), clean_counts.rename('clean_counts')], axis=1)

# Create a histogram
plt.figure(figsize=(10,7))

# Calculate the width of each bar
width = 0.4

# Create an array with the position of each bar along the x-axis
r1 = np.arange(len(df['dirty_counts']))
r2 = [x + width for x in r1]

# Plot bars
plt.bar(r1, df['dirty_counts'], color='olivedrab', width=width, label='Dirty Data Set')
plt.bar(r2, df['clean_counts'], color='steelblue', width=width, label='Clean Data Set')

# Add xticks on the middle of the group bars
plt.xlabel('Country', fontsize=16)
plt.ylabel('Number of Incidents', fontsize=16)
plt.xticks([r + width / 2 for r in range(len(df['dirty_counts']))], df.index, rotation=45, fontsize=14, ha='right')

plt.title('Piracy Incidents by Ship Flag - Gulf of Aden\nTop 10 Countries', fontsize=18)
plt.legend()
plt.grid(axis='y')
plt.tight_layout()
# plt.savefig(Path('./Results/'hist_goa.png'))
plt.show()

In [None]:
# Histogram of the top 10 countries (dirty vs clean) involved in incidents
# Region: Malacca Strait

# Count the occurrence of each country and get the top 10
dirty_counts = dirty_strait_malacca_df['country'].value_counts().nlargest(10).sort_index()
clean_counts = strait_malacca_df['Ship Flag'].value_counts().nlargest(10).sort_index()

# Create a DataFrame for seaborn
df = pd.concat([dirty_counts.rename('dirty_counts'), clean_counts.rename('clean_counts')], axis=1)

# Create a histogram
plt.figure(figsize=(10,7))

# Calculate the width of each bar
width = 0.4

# Create an array with the position of each bar along the x-axis
r1 = np.arange(len(df['dirty_counts']))
r2 = [x + width for x in r1]

# Plot bars
plt.bar(r1, df['dirty_counts'], color='olivedrab', width=width, label='Dirty Data Set')
plt.bar(r2, df['clean_counts'], color='steelblue', width=width, label='Clean Data Set')

# Add xticks on the middle of the group bars
plt.xlabel('Country', fontsize=16)
plt.ylabel('Number of Incidents', fontsize=16)
plt.xticks([r + width / 2 for r in range(len(df['dirty_counts']))], df.index, rotation=45, fontsize=14, ha='right')

plt.title('Piracy Incidents by Ship Flag - Malacca Strait\nTop 10 Countries', fontsize=18)
plt.legend()
plt.grid(axis='y')
plt.tight_layout()
# plt.savefig(Path('./Results/'hist_ms.png'))
plt.show()

In [None]:
# Histogram of the top 10 countries (dirty vs clean) involved in incidents
# Region: ALL - Worldwide

# Count the occurrence of each country and get the top 10
dirty_counts = model_output_df['country'].value_counts().nlargest(10).sort_index()
clean_counts = piracy_df_original['Ship Flag'].value_counts().nlargest(10).sort_index()

# Create a DataFrame for seaborn
df = pd.concat([dirty_counts.rename('dirty_counts'), clean_counts.rename('clean_counts')], axis=1)

# Create a histogram
plt.figure(figsize=(10,7))

# Calculate the width of each bar
width = 0.4

# Create an array with the position of each bar along the x-axis
r1 = np.arange(len(df['dirty_counts']))
r2 = [x + width for x in r1]

# Plot bars
plt.bar(r1, df['dirty_counts'], color='olivedrab', width=width, label='Dirty Data Set')
plt.bar(r2, df['clean_counts'], color='steelblue', width=width, label='Clean Data Set')

# Add xticks on the middle of the group bars
plt.xlabel('Country', fontsize=16)
plt.ylabel('Number of Incidents', fontsize=16)
plt.xticks([r + width / 2 for r in range(len(df['dirty_counts']))], df.index, rotation=45, fontsize=14, ha='right')

plt.title('Piracy Incidents by Ship Flag\nTop 10 Countries', fontsize=18)
plt.legend()
plt.grid(axis='y')
plt.tight_layout()
# plt.savefig(Path('./Results/hist_all.png'))
plt.show()


In [None]:
# Line graph that plots dirty data incidents over time by region

# Resample the data every three months and count the occurrences
strait_malacca_counts = dirty_strait_malacca_df.resample('3M', on='Date')['country'].count()
gulf_of_eden_counts = dirty_gulf_of_eden_df.resample('3M', on='Date')['country'].count()
gulf_of_guinea_counts = dirty_gulf_of_guinea_df.resample('3M', on='Date')['country'].count()

# Create a line graph
fig, ax = plt.subplots(figsize=(12,4))

ax.plot(strait_malacca_counts.index, strait_malacca_counts, color='blue', label='Strait of Malacca')
ax.plot(gulf_of_eden_counts.index, gulf_of_eden_counts, color='orange', label='Gulf of Aden')
ax.plot(gulf_of_guinea_counts.index, gulf_of_guinea_counts, color='green', label='Gulf of Guinea')

ax.set_title('Piracy Incidents Over Time\nDirty Data Set', fontsize=16)
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Number of Incidents', fontsize=14)

# Set major ticks every three years and minor ticks every year
ax.xaxis.set_major_locator(mdates.YearLocator(3))
ax.xaxis.set_minor_locator(mdates.YearLocator(1))

# Format x-tick labels as 4-digit years
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

ax.set_xlim([pd.Timestamp('1994-07-01'), pd.Timestamp('2024-03-01')])
ax.set_ylim(-1, 55)

ax.legend()

plt.grid()
plt.tight_layout()
# plt.savefig(Path('./Results/dirty_line.png'))
plt.show()


In [None]:
# Line graph that plots dirty data incidents over time by region

# Resample the data every three months and count the occurrences
strait_malacca_counts = strait_malacca_df.resample('3M', on='Incident Date')['Ship Flag'].count()
gulf_of_eden_counts = gulf_of_eden_df.resample('3M', on='Incident Date')['Ship Flag'].count()
gulf_of_guinea_counts = gulf_of_guinea_df.resample('3M', on='Incident Date')['Ship Flag'].count()

# Create a line graph
fig, ax = plt.subplots(figsize=(12,4))

ax.plot(strait_malacca_counts.index, strait_malacca_counts, color='blue', label='Strait of Malacca')
ax.plot(gulf_of_eden_counts.index, gulf_of_eden_counts, color='orange', label='Gulf of Aden')
ax.plot(gulf_of_guinea_counts.index, gulf_of_guinea_counts, color='green', label='Gulf of Guinea')

ax.set_title('Piracy Incidents Over Time\nClean Data Set', fontsize=16)
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Number of Incidents', fontsize=14)

# Set major ticks every three years and minor ticks every year
ax.xaxis.set_major_locator(mdates.YearLocator(3))
ax.xaxis.set_minor_locator(mdates.YearLocator(1))

# Format x-tick labels as 4-digit years
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

ax.set_xlim([pd.Timestamp('1994-07-01'), pd.Timestamp('2024-03-01')])
ax.set_ylim(-1, 55)

ax.legend()

plt.grid()
plt.tight_layout()
# plt.savefig(Path('./Results/clean_line.png'))
plt.show()


In [None]:
# Create a Folium map that displays the dirty data by severity

# Create a map using Folium
map_center = [map_dirty_pirate_df['Lat_Dec'].mean(), map_dirty_pirate_df['Lon_Dec'].mean()]
m = folium.Map(location=map_center, zoom_start=4)

# Add markers for vessel type, ship status, and country
for index, row in map_dirty_pirate_df.iterrows():
    popup_text = f"Vessel Type: {row['Vessel_Type']}<br>Ship Name: {row['Ship Name']}<br>Ship Origin: {row['country']}<br>Latitude:{row['Lat_Dec']}<br>Longitude: {row['Lon_Dec']}"
    if row['BOARDED'] == row['HIJACKED'] == row['HOSTAGES_TAKEN'] == row['CREW_ASSAULTED'] == 0:
        folium.CircleMarker(location=[row['Lat_Dec'], row['Lon_Dec']], radius=5, color='green', fill=True, fill_color='yellow', popup=popup_text).add_to(m)
    elif (row['BOARDED'] == 1 or row['HIJACKED'] == 1) and row['HOSTAGES_TAKEN'] == row['CREW_ASSAULTED'] == 0:
        folium.CircleMarker(location=[row['Lat_Dec'], row['Lon_Dec']], radius=5, color='yellow', fill=True, fill_color='yellow', popup=popup_text).add_to(m)
    elif row['CREW_ASSAULTED'] == 1 and row['HOSTAGES_TAKEN'] == 0:
        folium.CircleMarker(location=[row['Lat_Dec'], row['Lon_Dec']], radius=5, color='red', fill=True, fill_color='red', popup=popup_text).add_to(m)
    elif row['HOSTAGES_TAKEN'] == 1:
        folium.CircleMarker(location=[row['Lat_Dec'], row['Lon_Dec']], radius=5, color='black', fill=True, fill_color='black', popup=popup_text).add_to(m)


top_left1 = [7, -1] #west neg 
bottom_right1 = [-2,10] #south eng 
Gulf_Guinea = folium.Rectangle(bounds=[top_left1, bottom_right1], color='black', fill=False)
Gulf_Guinea.add_to(m)

top_left2 = [20, 40] #west neg 
bottom_right2 = [10, 55.17] #south neg 
Gulf_Aden = folium.Rectangle(bounds=[top_left2, bottom_right2], color='black', fill=False)
Gulf_Aden.add_to(m)

top_left3 = [10, 93] #west neg 
bottom_right3 = [-10, 110] #south neg 
Strait_Malacca = folium.Rectangle(bounds=[top_left3, bottom_right3], color='black', fill=False)
Strait_Malacca.add_to(m)

#legend
legend_html = '''
     <div style="position: fixed; bottom: 50px; left: 50px; width: 175px; height: 150px; background-color: white; border:2px solid grey; z-index:9999; font-size:14px;">
     <p><strong>Legend</strong></p>
     <p><i class="fa fa-circle fa-1x" style="color:black"></i> Severe</p>
     <p><i class="fa fa-circle fa-1x" style="color:red"></i> Medium</p>
     <p><i class="fa fa-circle fa-1x" style="color:yellow"></i> Low</p>
     <p><i class="fa fa-circle fa-1x" style="color:green"></i> Unsuccessful Attempt</p>
     </div>
     '''
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m
# m.save(Path('./Results/dirty_piracy_map.html'))