In [14]:
import pandas as pd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.ops import unary_union
from geopy.distance import geodesic
import geopandas as gpd

In [15]:
filename = ('./data/vessel_hurricane.csv')
vessel_df = pd.read_csv(filename)

In [16]:
vessel_df.head()

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,...,50-knot Wind Radii NE,50-knot Wind Radii SE,50-knot Wind Radii SW,50-knot Wind Radii NW,64-knot Wind Radii NE,64-knot Wind Radii SE,64-knot Wind Radii SW,64-knot Wind Radii NW,Speed mph,hurricane_datetime
0,636093156,2023-08-29 00:00:00,25.89899,-79.79531,15.5,60.7,70.0,BREMEN EXPRESS,IMO9343728,5LJY8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.339218,2023-08-29 00:00:00
1,636093156,2023-08-29 00:00:00,25.89899,-79.79531,15.5,60.7,70.0,BREMEN EXPRESS,IMO9343728,5LJY8,...,70.0,40.0,40.0,40.0,30.0,25.0,20.0,20.0,8.060978,2023-08-29 00:00:00
2,636093156,2023-08-29 00:00:00,25.89899,-79.79531,15.5,60.7,70.0,BREMEN EXPRESS,IMO9343728,5LJY8,...,30.0,40.0,20.0,20.0,0.0,0.0,0.0,0.0,6.991989,2023-08-29 00:00:00
3,636093156,2023-08-29 00:00:00,25.89899,-79.79531,15.5,60.7,70.0,BREMEN EXPRESS,IMO9343728,5LJY8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.784165,2023-08-29 00:00:00
4,477948800,2023-08-29 00:00:00,40.58595,-74.03638,10.5,343.4,343.0,YM WARMTH,IMO9704647,VROO5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.339218,2023-08-29 00:00:00


In [17]:
vessel_df['Date_only'] = pd.to_datetime(vessel_df['BaseDateTime'], errors='coerce')

In [18]:
vessel_df.columns

Index(['MMSI', 'BaseDateTime', 'LAT', 'LON', 'SOG', 'COG', 'Heading',
       'VesselName', 'IMO', 'CallSign', 'VesselType', 'Status', 'Length',
       'Width', 'Draft', 'Cargo', 'TransceiverClass', 'Name', 'Num Entries',
       'Year', 'Time', 'Record Identifier', 'Status of System', 'Latitude',
       'Longitude', 'Maximum Sustained Wind', 'Minimum Pressure',
       '34-knot Wind Radii NE', '34-knot Wind Radii SE',
       '34-knot Wind Radii SW', '34-knot Wind Radii NW',
       '50-knot Wind Radii NE', '50-knot Wind Radii SE',
       '50-knot Wind Radii SW', '50-knot Wind Radii NW',
       '64-knot Wind Radii NE', '64-knot Wind Radii SE',
       '64-knot Wind Radii SW', '64-knot Wind Radii NW', 'Speed mph',
       'hurricane_datetime', 'Date_only'],
      dtype='object')

In [19]:
def convert_to_decimal_degrees(coord_str):
    
    if pd.isna(coord_str) or not isinstance(coord_str, str):
        return None
    
    try:
        value, direction = coord_str[:-1], coord_str[-1]
        value = float(value)
        if direction in ['S', 'W']:
            value = -value
        return value
    except ValueError:
        return None
    

# sample/testing
sample_coords = ['28.8N', '56.4W']
print([convert_to_decimal_degrees(coord) for coord in sample_coords])

[28.8, -56.4]


In [20]:
vessel_df['Latitude'] = vessel_df['Latitude'].apply(convert_to_decimal_degrees)
vessel_df['Longitude'] = vessel_df['Longitude'].apply(convert_to_decimal_degrees)

In [21]:
def identify_impacted_vessels(vessel_df):
    '''
    This function first maps hurricanes to a radius in kilometers determined by the max_wind_speed column. The radius are determined by the Saffir-Simpson Scale. 
    https://en.wikipedia.org/wiki/Saffir%E2%80%93Simpson_scale
    A category does not have a fixed diameter or size, so I used PerplexityAi to give me
    adequate measurement for each category.
    Still, the size of hurricanes varies dramatically. The effect of hurricanes on ports
    and ships would more depend on its wind speed. Currently, the function is only testing on whether 
    the vessel is in the radius that I have set, which assumes the hurricanes diameter,-
    not if it is damaged. 
    
    '''

    def calculate_radius(max_wind_speed):
        if max_wind_speed >= 137:  
            return 161  # Radius in kilometers for Category 5 hurricanes
        elif max_wind_speed >= 113:  
            return 161  # Radius in kilometers for Category 4 hurricanes
        elif max_wind_speed >= 96:  
            return 161  # Radius in kilometers for Category 3 hurricanes
        elif max_wind_speed >= 83:  
            return 100  # Radius in kilometers for Category 2 hurricanes
        elif max_wind_speed >= 64:  
            return 100  # Radius in kilometers for Category 1 hurricanes
        elif max_wind_speed >= 34:
            return 50
        else:
            return 0  
    
    vessel_df['impacted'] = False  

    #Processed_timestamps is to make sure that we only iterate through the hurricane_datetime once in the for loop

    processed_timestamps = set()

    print(vessel_df[['Latitude', 'Longitude']].head())

    for idx, hurricane in vessel_df.iterrows():
        hurricane_lat = hurricane['Latitude']
        hurricane_lon = hurricane['Longitude']
        hurricane_time = hurricane['hurricane_datetime']
        max_wind_speed = hurricane['Maximum Sustained Wind'] 

        if pd.isna(hurricane_lat) or pd.isna(hurricane_lon) or pd.isna(max_wind_speed):
            continue 
        if hurricane_time in processed_timestamps:
            continue
        processed_timestamps.add(hurricane_time)

        impact_radius = calculate_radius(max_wind_speed)
        if impact_radius == 0:
            continue  

        hurricane_point = Point(hurricane_lon, hurricane_lat)

        matching_vessels = vessel_df[vessel_df['BaseDateTime'] == hurricane_time]
        
        if matching_vessels.empty:
            # print(matching_vessels)
            print(f"No vessels found for hurricane time: {hurricane_time}")
            continue
        
        print(f"{len(matching_vessels)} vessels found for hurricane at {hurricane_time}")


        for v_idx, vessel in matching_vessels.iterrows():
            vessel_lat = vessel['LAT']
            vessel_lon = vessel['LON']
            
            if pd.isna(vessel_lat) or pd.isna(vessel_lon):
                continue  

            distance = geodesic((vessel_lat, vessel_lon), (hurricane_lat, hurricane_lon)).kilometers

            print(f"Vessel at ({vessel_lat}, {vessel_lon}) - Distance: {distance:.2f} km from hurricane on {hurricane_time}")

            if distance <= impact_radius:
                # Mark the vessel as impacted
                vessel_df.at[v_idx, 'impacted'] = True
                vessel_id = vessel.get('vessel_id', 'Unknown ID')
                print(f"Vessel {vessel_id} is impacted.")

    return vessel_df  

In [22]:
def process_weeks_with_impact(vessel_df):
    start_date = '2022-08-01' 
    end_date = '2023-09-30'

    # storing the results of each week
    all_weeks_data = []

    # Iterate week by week
    for week_start in pd.date_range(start=start_date, end=end_date, freq='W-MON'):
        week_end = week_start + pd.Timedelta(days=6)

        # Filter the dataset for the current week (using only the date part)
        vessel_week = vessel_df[(vessel_df['Date_only'] >= week_start) & (vessel_df['Date_only'] <= week_end)]
        
        # Process the hurricane and vessel data for this week
        if not vessel_week.empty:
            print(f"Processing week starting {week_start.date()} with hurricanes and {len(vessel_week)} vessels")
            
            impacted_vessels_week = identify_impacted_vessels(vessel_week)

            # Append the impacted vessel data for this week to the list
            all_weeks_data.append(impacted_vessels_week)

    # Concatenate all processed week data into a single DataFrame
    final_df = pd.concat(all_weeks_data, ignore_index=True)

    return final_df

In [23]:
processed_vessel_df = process_weeks_with_impact(vessel_df)

Processing week starting 2022-08-29 with hurricanes and 2077 vessels
      Latitude Longitude
44389     None      None
44390     None      None
44391     None      None
44392     None      None
44393     None      None
Processing week starting 2022-09-05 with hurricanes and 5148 vessels
      Latitude Longitude
47126     None      None
47127     None      None
47128     None      None
47129     None      None
47130     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2022-09-12 with hurricanes and 4444 vessels
      Latitude Longitude
52886     None      None
52887     None      None
52888     None      None
52889     None      None
52890     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2022-09-19 with hurricanes and 7378 vessels
      Latitude Longitude
57630     None      None
57631     None      None
57632     None      None
57633     None      None
57634     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2022-09-26 with hurricanes and 4413 vessels
      Latitude Longitude
66260     None      None
66261     None      None
66262     None      None
66263     None      None
66264     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-08-14 with hurricanes and 1140 vessels
     Latitude Longitude
2208     None      None
2209     None      None
2210     None      None
2211     None      None
2212     None      None
Processing week starting 2023-08-21 with hurricanes and 6689 vessels
     Latitude Longitude
3186     None      None
3187     None      None
3188     None      None
3189     None      None
3190     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-08-28 with hurricanes and 8832 vessels
  Latitude Longitude
0     None      None
1     None      None
2     None      None
3     None      None
4     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-09-04 with hurricanes and 9782 vessels
      Latitude Longitude
16465     None      None
16466     None      None
16467     None      None
16468     None      None
16469     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-09-11 with hurricanes and 6551 vessels
      Latitude Longitude
25789     None      None
25790     None      None
25791     None      None
25792     None      None
25793     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-09-18 with hurricanes and 3823 vessels
      Latitude Longitude
14827     None      None
14828     None      None
14829     None      None
14830     None      None
14831     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


Processing week starting 2023-09-25 with hurricanes and 3760 vessels
      Latitude Longitude
20469     None      None
20470     None      None
20471     None      None
20472     None      None
20473     None      None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vessel_df['impacted'] = False


In [24]:
processed_vessel_df['impacted'].unique()

array([False])