In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import DBSCAN
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
pirate_attacks = pd.read_csv('data/pirate_attacks.csv')
country_indicators = pd.read_csv('data/country_indicators.csv')
country_codes = pd.read_csv('data/country_codes.csv')

In [3]:
pirate_attacks.head(3)

Unnamed: 0,date,time,longitude,latitude,attack_type,location_description,nearest_country,eez_country,shore_distance,shore_longitude,shore_latitude,attack_description,vessel_name,vessel_type,vessel_status,data_source
0,1993-01-02,,116.9667,19.7,,Hong Kong - Luzon - Hainan,CHN,TWN,357.502373,115.825956,22.746644,,Mv Cosmic Leader,,,mappingpiracy
1,1993-01-04,,116.0,22.35,,Hong Kong - Luzon - Hainan,CHN,CHN,47.431573,115.825956,22.746644,,Mv Tricolor Star III,,,mappingpiracy
2,1993-01-06,,115.25,19.67,,Hong Kong - Luzon - Hainan,CHN,TWN,280.811871,114.302501,22.044867,,Mv Arktis Star,,,mappingpiracy


In [4]:
country_indicators.head(3)

Unnamed: 0,country,year,corruption_index,homicide_rate,GDP,total_fisheries_per_ton,total_military,population,unemployment_rate,totalgr,industryofgdp
0,ABW,1993.0,,,14936.827219,260.0,,72504.0,,,
1,ABW,1994.0,,,16241.046521,260.0,,76700.0,,,
2,ABW,1995.0,,7.469748,16439.356361,140.0,,80324.0,,,0.153986


In [5]:
country_codes.head(3)

Unnamed: 0,country,region,country_name
0,ABW,Latin America & Caribbean,Aruba
1,AFG,South Asia,Afghanistan
2,AGO,Sub-Saharan Africa,Angola


In [6]:
nan_percentage = pirate_attacks.isna().mean() * 100
print("NaN percentage per column:")
print(nan_percentage)

NaN percentage per column:
date                     0.000000
time                    84.702436
longitude                0.000000
latitude                 0.000000
attack_type              1.597657
location_description     0.106510
nearest_country          0.252962
eez_country              3.927573
shore_distance           0.000000
shore_longitude          0.000000
shore_latitude           0.000000
attack_description      84.382905
vessel_name             19.065371
vessel_type             84.382905
vessel_status           12.142191
data_source              0.000000
dtype: float64


In [7]:
# Create a scatter plot using Plotly Express
fig = px.scatter_mapbox(pirate_attacks, lat='latitude', lon='longitude', 
                        title='Pirate Attacks: Latitude and Longitude',
                        zoom=2, height=600)

# Update map layout using Mapbox
fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(margin=dict(l=0, r=0, t=40, b=0))

# Show the plot
fig.show()

In [8]:
# Group the data by nearest country and year, and calculate the count of attacks for each group
pirate_attacks['date_datetime'] = pd.to_datetime(pirate_attacks['date'])
yearly_country_counts = pirate_attacks.groupby(['nearest_country', pirate_attacks['date_datetime'].dt.year]).size().reset_index(name='num_attacks')

# Calculate the average number of attacks per year for each country
avg_yearly_country_counts = yearly_country_counts.groupby('nearest_country')['num_attacks'].mean().reset_index()
avg_yearly_country_counts = avg_yearly_country_counts.sort_values(by='num_attacks', ascending=False)

# Merge with country indicators to get country names
df_top_10_countries = pd.merge(avg_yearly_country_counts.head(10), country_codes[['country', 'country_name']],
                                left_on='nearest_country', right_on='country', how='left')

# Create the bar chart using Plotly Express
fig = px.bar(df_top_10_countries, x='num_attacks', y='country_name', orientation='h',
             title='Top 10 Nearest Countries by Average Number of Attacks per Year',
             labels={'num_attacks': 'Average Number of Attacks per Year', 'country_name': 'Country'})
fig.update_layout(height=600, yaxis_categoryorder='total ascending')

# Show the plot
fig.show()

In [9]:
# Drop rows with NaN values in latitude or longitude columns
pirate_attacks.dropna(subset=['latitude', 'longitude'], inplace=True)

# Convert latitude and longitude columns to radians for DBSCAN
pirate_attacks['lat_rad'] = np.radians(pirate_attacks['latitude'])
pirate_attacks['lon_rad'] = np.radians(pirate_attacks['longitude'])

# Concatenate lat_rad and lon_rad columns into a single numpy array
coordinates = pirate_attacks[['lat_rad', 'lon_rad']].values

# Perform DBSCAN clustering
dbscan = DBSCAN(eps=0.05, min_samples=50)  # Adjust eps and min_samples as needed
dbscan.fit(coordinates)

# Add cluster labels to the DataFrame
pirate_attacks['cluster'] = dbscan.labels_

# Create a scatter plot using Plotly Express with clusters colored differently
fig = px.scatter_mapbox(pirate_attacks, lat='latitude', lon='longitude', color='cluster',
                        title='Pirate Attacks Clusters', zoom=2, height=600,
                        color_continuous_scale=px.colors.qualitative.Light24)

# Update map layout using Mapbox
fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(margin=dict(l=0, r=0, t=40, b=0))

# Show the plot
fig.show()

In [10]:
pirate_attacks.head(1)

Unnamed: 0,date,time,longitude,latitude,attack_type,location_description,nearest_country,eez_country,shore_distance,shore_longitude,shore_latitude,attack_description,vessel_name,vessel_type,vessel_status,data_source,date_datetime,lat_rad,lon_rad,cluster
0,1993-01-02,,116.9667,19.7,,Hong Kong - Luzon - Hainan,CHN,TWN,357.502373,115.825956,22.746644,,Mv Cosmic Leader,,,mappingpiracy,1993-01-02,0.34383,2.041454,0


In [11]:
from statsmodels.tsa.seasonal import seasonal_decompose

# Convert 'date' column to datetime type and set it as the index
pirate_attacks['date_datetime'] = pd.to_datetime(pirate_attacks['date'])
pirate_attacks.set_index('date_datetime', inplace=True)


# Aggregate the number of attacks by month
monthly_attacks = pirate_attacks.resample('ME').size()
result = seasonal_decompose(monthly_attacks, model='additive')

# Create plot for trend component
fig_trend = go.Figure(go.Scatter(x=result.trend.index, y=result.trend, mode='lines', name='Trend'))
fig_trend.update_layout(title='Trend Component',
                        xaxis_title='Date',
                        yaxis_title='Number of Attacks')

# Show the plot
fig_trend.show()

In [12]:
# Perform seasonal decomposition
result = seasonal_decompose(monthly_attacks, model='additive')

# Create plot for seasonal component
fig_seasonal = go.Figure(go.Scatter(x=result.seasonal.index, y=result.seasonal, mode='lines', name='Seasonal'))
fig_seasonal.update_layout(title='Seasonal Component',
                           xaxis_title='Date',
                           yaxis_title='Number of Attacks')

# Show the plot
fig_seasonal.show()

In [13]:
# Extract the year from the 'date' column
pirate_attacks['year'] = pirate_attacks.index.year

# Group the data by nearest_country and year, and calculate the count of attacks for each group
grouped_pirate_attacks = pirate_attacks.groupby(['nearest_country', 'year']).size().reset_index(name='num_attacks')

# Merge pirate_attacks with country_indicators based on 'nearest_country' and 'year'
df = pd.merge(grouped_pirate_attacks, country_indicators, left_on=['nearest_country', 'year'], right_on=['country', 'year'])

# Merge the resulting DataFrame with country_codes based on 'country'
df = pd.merge(df, country_codes, on='country')

df = df[df['nearest_country'] == 'IDN']

# Calculate the correlation matrix
correlation_matrix = df[['num_attacks', 'corruption_index', 'homicide_rate', 'GDP',
                         'total_fisheries_per_ton', 'total_military', 'population',
                         'unemployment_rate', 'totalgr', 'industryofgdp']].corr()

# Create a heatmap using Plotly
fig = go.Figure(data=go.Heatmap(z=correlation_matrix.values,
                                 x=correlation_matrix.columns,
                                 y=correlation_matrix.columns,
                                 colorscale='RdYlBu'))

# Update the layout
fig.update_layout(title='Correlation Matrix of Selected Variables',
                  xaxis_title='Variables',
                  yaxis_title='Variables',
                  height=800,
                  width=800
                 )

# Show the plot
fig.show()