In [1]:
import json
import pandas as pd 

def flatten_dict(d, parent_key='', sep='_'):
    items = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.update(flatten_dict(v, new_key, sep=sep))
        else:
            items[new_key] = v
    return items


# Opening JSON file
f = open('port_arthur.json')
 
# returns JSON object as a dictionary
data = json.load(f)
 
# Flatten each dictionary and store the results in a list
flattened_dicts = [flatten_dict(d) for d in data['data']]

# Create a DataFrame
data = pd.DataFrame(flattened_dicts)

# Print the DataFrame
print(data)
 
# Closing file
f.close()

     vessel_type vessel_callsign vessel_subtype  vessel_imo       vessel_name  \
0         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
1         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
2         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
3         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
4         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
...          ...             ...            ...         ...               ...   
2342       cargo           9VAW8            all     9081801          HOSANGER   
2343       cargo           9VAW8            all     9081801          HOSANGER   
2344       cargo           9VAW8            all     9081801          HOSANGER   
2345       cargo           9VAW8            all     9081801          HOSANGER   
2346       cargo           9VAW8            all     9081801          HOSANGER   

      navigation_draught   

In [2]:
data['navigation_time'] = pd.to_datetime(data['navigation_time'])

data = data[data['navigation_status'] != 'moored'].drop_duplicates()



In [3]:
from shapely.geometry import Point

def create_point(row):
    return Point(row['navigation_location_long'], row['navigation_location_lat'])

data['geometry'] = data.apply(create_point, axis=1)

tugs = data.loc[(data['vessel_type'] == 'tug')]
non_tug = data.loc[(data['vessel_type'] != 'tug')]

In [4]:
tugs

Unnamed: 0,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port,geometry
1388,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96003,29.82966,0.0,2021-04-07 16:01:59+00:00,324.1,367182980,15,8,15,2,POINT (-93.96003 29.82966)
1389,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96004,29.82965,0.0,2021-04-07 16:06:59+00:00,325.9,367182980,15,8,15,2,POINT (-93.96004 29.82965)
1390,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96004,29.82964,0.0,2021-04-07 16:11:58+00:00,314.4,367182980,15,8,15,2,POINT (-93.96004 29.82964)
1391,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96006,29.82963,0.0,2021-04-07 16:16:58+00:00,301.0,367182980,15,8,15,2,POINT (-93.96006 29.82963)
1392,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96002,29.82963,0.0,2021-04-07 16:21:58+00:00,177.7,367182980,15,8,15,2,POINT (-93.96002 29.82963)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2144,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86099,0.0,2021-04-08 08:11:50+00:00,25.8,367774360,16,14,7,6,POINT (-93.93849 29.86099)
2145,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93851,29.86098,0.0,2021-04-08 08:16:51+00:00,25.8,367774360,16,14,7,6,POINT (-93.93851 29.86098)
2146,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93851,29.86099,0.0,2021-04-08 08:21:52+00:00,133.2,367774360,16,14,7,6,POINT (-93.93851 29.86099)
2147,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93852,29.86098,0.0,2021-04-08 08:26:51+00:00,133.2,367774360,16,14,7,6,POINT (-93.93852 29.86098)


In [5]:
non_tug

Unnamed: 0,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port,geometry
0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,2021-04-07 16:01:55+00:00,45.9,224941000,226,24,58,19,POINT (-93.70755 29.4163)
1,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70643,29.41625,1.4,2021-04-07 16:06:15+00:00,118.6,224941000,226,24,58,19,POINT (-93.70643 29.41625)
2,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07 16:11:05+00:00,144.9,224941000,226,24,58,19,POINT (-93.70459 29.41492)
4,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70219,29.41087,4.4,2021-04-07 16:21:54+00:00,97.0,224941000,226,24,58,19,POINT (-93.70219 29.41087)
5,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.69435,29.41231,5.8,2021-04-07 16:26:44+00:00,62.8,224941000,226,24,58,19,POINT (-93.69435 29.41231)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1312,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95618,29.81012,6.5,2021-04-08 02:16:53+00:00,342.0,357449000,207,25,38,17,POINT (-93.95618 29.81012)
1313,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95863,29.81590,3.4,2021-04-08 02:21:54+00:00,338.0,357449000,207,25,38,17,POINT (-93.95863 29.8159)
1314,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95940,29.81963,1.9,2021-04-08 02:26:54+00:00,10.0,357449000,207,25,38,17,POINT (-93.9594 29.81963)
1315,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95737,29.82167,1.9,2021-04-08 02:31:53+00:00,52.0,357449000,207,25,38,17,POINT (-93.95737 29.82167)


In [6]:
merged_data = non_tug.merge(tugs,how= 'cross', suffixes = ('_nontug','_tug'))

In [7]:
merged_data['time_diff'] = (merged_data['navigation_time_tug']-merged_data['navigation_time_nontug']).dt.total_seconds() / 60


In [8]:
import numpy as np

# Function to calculate distance in kilometers using Haversine formula
def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0
    
    # Convert latitude and longitude from degrees to radians
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    distance = R * c
    
    return distance

# Apply the haversine function to each row to create a new column 'distance_km'
merged_data['distance_km'] = merged_data.apply(lambda row: haversine(row['navigation_location_lat_nontug'], row['navigation_location_long_nontug'], row['navigation_location_lat_tug'], row['navigation_location_long_tug']), axis=1)


In [9]:
merged_data

Unnamed: 0,vessel_type_nontug,vessel_callsign_nontug,vessel_subtype_nontug,vessel_imo_nontug,vessel_name_nontug,navigation_draught_nontug,navigation_status_nontug,navigation_location_long_nontug,navigation_location_lat_nontug,navigation_speed_nontug,...,navigation_time_tug,navigation_course_tug,device_mmsi_tug,device_dimensions_to_bow_tug,device_dimensions_to_starboard_tug,device_dimensions_to_stern_tug,device_dimensions_to_port_tug,geometry_tug,time_diff,distance_km
0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,...,2021-04-07 16:01:59+00:00,324.1,367182980,15,8,15,2,POINT (-93.96003 29.82966),0.066667,52.040835
1,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,...,2021-04-07 16:06:59+00:00,325.9,367182980,15,8,15,2,POINT (-93.96004 29.82965),5.066667,52.040307
2,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,...,2021-04-07 16:11:58+00:00,314.4,367182980,15,8,15,2,POINT (-93.96004 29.82964),10.050000,52.039325
3,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,...,2021-04-07 16:16:58+00:00,301.0,367182980,15,8,15,2,POINT (-93.96006 29.82963),15.050000,52.039251
4,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,...,2021-04-07 16:21:58+00:00,177.7,367182980,15,8,15,2,POINT (-93.96002 29.82963),20.050000,52.037437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324465,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,0.2,...,2021-04-08 08:11:50+00:00,25.8,367774360,16,14,7,6,POINT (-93.93849 29.86099),334.816667,4.620012
324466,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,0.2,...,2021-04-08 08:16:51+00:00,25.8,367774360,16,14,7,6,POINT (-93.93851 29.86098),339.833333,4.618262
324467,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,0.2,...,2021-04-08 08:21:52+00:00,133.2,367774360,16,14,7,6,POINT (-93.93851 29.86099),344.850000,4.619295
324468,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,0.2,...,2021-04-08 08:26:51+00:00,133.2,367774360,16,14,7,6,POINT (-93.93852 29.86098),349.833333,4.617904


In [10]:
tugged_vessels = merged_data[(abs(merged_data['time_diff']) < 10) & (merged_data['distance_km'] < 0.5 )]

HERE ON OLNY FOR TESTING

In [11]:
tugged_vessels #.to_csv('Output3.csv')

Unnamed: 0,vessel_type_nontug,vessel_callsign_nontug,vessel_subtype_nontug,vessel_imo_nontug,vessel_name_nontug,navigation_draught_nontug,navigation_status_nontug,navigation_location_long_nontug,navigation_location_lat_nontug,navigation_speed_nontug,...,navigation_time_tug,navigation_course_tug,device_mmsi_tug,device_dimensions_to_bow_tug,device_dimensions_to_starboard_tug,device_dimensions_to_stern_tug,device_dimensions_to_port_tug,geometry_tug,time_diff,distance_km
87959,tanker,MEOE9,all,9352133,CARIBE LIZA,6.3,under-way-using-engine,-93.93393,29.86447,7.7,...,2021-04-08 01:51:52+00:00,241.9,367774360,16,14,7,6,POINT (-93.93787 29.86167),-5.150000,0.491210
87960,tanker,MEOE9,all,9352133,CARIBE LIZA,6.3,under-way-using-engine,-93.93393,29.86447,7.7,...,2021-04-08 01:56:51+00:00,234.1,367774360,16,14,7,6,POINT (-93.93785 29.86166),-0.166667,0.490427
87961,tanker,MEOE9,all,9352133,CARIBE LIZA,6.3,under-way-using-engine,-93.93393,29.86447,7.7,...,2021-04-08 02:01:52+00:00,234.1,367774360,16,14,7,6,POINT (-93.93787 29.86167),4.850000,0.491210
87962,tanker,MEOE9,all,9352133,CARIBE LIZA,6.3,under-way-using-engine,-93.93393,29.86447,7.7,...,2021-04-08 02:06:50+00:00,234.1,367774360,16,14,7,6,POINT (-93.93785 29.86167),9.816667,0.489719
106759,tanker,MEOE9,all,9352133,CARIBE LIZA,6.3,under-way-using-engine,-94.04110,30.06333,7.6,...,2021-04-08 04:11:53+00:00,114.8,367183020,15,5,11,5,POINT (-94.03754 30.06158),-0.116667,0.394009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243307,cargo,WGAH,,9332547,ALLIANCE NORFOLK,8.1,under-way-using-engine,-93.93718,29.86107,1.9,...,2021-04-08 06:37:00+00:00,51.0,367561460,10,12,6,12,POINT (-93.93822 29.86118),5.000000,0.101033
243504,cargo,WGAH,,9332547,ALLIANCE NORFOLK,8.1,under-way-using-engine,-93.93718,29.86107,1.9,...,2021-04-08 06:26:51+00:00,151.2,367774360,16,14,7,6,POINT (-93.93848 29.86099),-5.150000,0.125677
243505,cargo,WGAH,,9332547,ALLIANCE NORFOLK,8.1,under-way-using-engine,-93.93718,29.86107,1.9,...,2021-04-08 06:31:51+00:00,154.7,367774360,16,14,7,6,POINT (-93.93849 29.86099),-0.150000,0.126639
243506,cargo,WGAH,,9332547,ALLIANCE NORFOLK,8.1,under-way-using-engine,-93.93718,29.86107,1.9,...,2021-04-08 06:36:51+00:00,143.0,367774360,16,14,7,6,POINT (-93.9385 29.86097),4.850000,0.127775


In [12]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import random


# Convert the 'navigation_time' column to datetime
df['navigation_time'] = pd.to_datetime(df['navigation_time'])

# Sort the DataFrame by 'navigation_time'
df.sort_values(by=['vessel_name', 'navigation_time'], inplace=True)

# Create a figure using Plotly Express
fig = px.scatter_geo(df, lon='navigation_location_long', lat='navigation_location_lat', animation_frame='navigation_time',
                     color='vessel_name', title="Vessel Name Animation",
                     labels={'navigation_time': 'Time', 'vessel_name': 'Vessel Name'},
                     projection='natural earth')  # Use a background map

# Create lines to show vessel trajectories
data = []
for vessel_name, group in df.groupby('vessel_name'):
    color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
    trace = go.Scattergeo(
        lon=group['navigation_location_long'],
        lat=group['navigation_location_lat'],
        mode='lines+markers',
        line=dict(color=color),  # Assign a random color to the trajectory
        marker=dict(size=6),
        name=f'Trajectory of {vessel_name}',
    )
    data.append(trace)

# Add trajectories to the figure
fig.update(data=data)

# Customize the figure layout if needed
fig.update_geos(showcoastlines=True, coastlinecolor="Black", showland=True, landcolor="LightGreen")

# Show the animation
fig.show()


NameError: name 'df' is not defined

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your DataFrame here (assuming it's named 'df')
# df = pd.read_csv('your_data.csv')

# Convert the 'navigation_time' column to datetime
df['navigation_time'] = pd.to_datetime(df['navigation_time'])

# Sort the DataFrame by 'navigation_time'
df.sort_values(by=['vessel_name', 'navigation_time'], inplace=True)

# Get a list of unique vessel names
unique_vessel_names = df['vessel_name'].unique()

# Filter out the tug vessels
non_tug_vessels = df[df['vessel_type'] != 'tug']
tug_vessels = df[df['vessel_type'] == 'tug']

# Create subplots for non-tug vessels
fig, axes = plt.subplots(nrows=len(unique_vessel_names), figsize=(10, 6*len(unique_vessel_names)))
plt.subplots_adjust(hspace=0.5)

for i, vessel_name in enumerate(unique_vessel_names):
    ax = axes[i]
    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')
    ax.set_title(f'Trajectory of {vessel_name}')
    
    # Plot the trajectory of the non-tug vessel
    non_tug_data = non_tug_vessels[non_tug_vessels['vessel_name'] == vessel_name]
    ax.plot(non_tug_data['navigation_location_long'], non_tug_data['navigation_location_lat'], label=f'{vessel_name} (Non-Tug)')
    
    # Show all the tug vessels on this subplot
    for tug_vessel_name in tug_vessels['vessel_name'].unique():
        tug_data = tug_vessels[tug_vessels['vessel_name'] == tug_vessel_name]
        ax.plot(tug_data['navigation_location_long'], tug_data['navigation_location_lat'], label=f'{tug_vessel_name} (Tug)', linestyle='--')

    ax.legend()

# Show the subplots
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import folium
import geopandas as gpd
from branca.colormap import LinearColormap
from shapely.geometry import Point

# Load your DataFrame here (assuming it's named 'df')
# df = pd.read_csv('your_data.csv')

# Convert the 'navigation_time' column to datetime
df['navigation_time'] = pd.to_datetime(df['navigation_time'])

# Sort the DataFrame by 'navigation_time'
df.sort_values(by=['navigation_time'], inplace=True)

# Create a GeoDataFrame from the DataFrame using Shapely Point geometries
geometry = [Point(xy) for xy in zip(df['navigation_location_long'], df['navigation_location_lat'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Create a color map based on speed
speed_colormap = LinearColormap(colors=['blue', 'green', 'yellow', 'red'],
                                 vmin=df['navigation_speed'].min(), vmax=df['navigation_speed'].max())

# Create a Folium map centered around the initial location
initial_location = (df['navigation_location_lat'].iloc[0], df['navigation_location_long'].iloc[0])
m = folium.Map(location=initial_location, zoom_start=10)

# Create markers for each point in the GeoDataFrame with color based on speed
for index, row in gdf.iterrows():
    folium.CircleMarker(location=(row['navigation_location_lat'], row['navigation_location_long']),
                        radius=5,
                        color=speed_colormap(row['navigation_speed']),
                        fill=True,
                        fill_color=speed_colormap(row['navigation_speed']),
                        fill_opacity=0.7,
                        popup=f"Speed: {row['navigation_speed']} knots\nTime: {row['navigation_time']}"
                        ).add_to(m)

# Add the speed color map to the map
speed_colormap.add_to(m)

# Display the Folium map
m


In [None]:
import pandas as pd

# Assuming your data is stored in a DataFrame called 'df'
# Filter tug boat records that are under-way-using-engine
tug_boats = df[(df['vessel_type'] == 'tug') & (df['navigation_status'] == 'under-way-using-engine')]

# Define a time window (in seconds) within which a tug boat is considered to be assisting another vessel
time_window = 600  # You can adjust this value as needed

# Initialize a dictionary to store the relationships between tug boats and vessels they assist
tug_to_vessel = {}

# Iterate through each tug boat record
for _, tug_row in tug_boats.iterrows():
    tug_time = pd.to_datetime(tug_row['navigation_time']).timestamp()
    tug_location = (tug_row['navigation_location_long'], tug_row['navigation_location_lat'])
    
    # Search for vessels that are in the vicinity of the tug boat within the time window
    potential_vessels = df[
        (df['vessel_type'] != 'tug') &
        (pd.to_datetime(df['navigation_time']).apply(lambda x: x.timestamp()) >= tug_time - time_window) &
        (pd.to_datetime(df['navigation_time']).apply(lambda x: x.timestamp()) <= tug_time + time_window) &
        (df.apply(lambda x: (x['navigation_location_long'], x['navigation_location_lat']), axis=1) == tug_location)
    ]
    
    # Store the tug-to-vessel relationship in the dictionary
    if not potential_vessels.empty:
        tug_to_vessel[tug_row['ID']] = potential_vessels['vessel_name'].tolist()

# Print the relationships
for tug_id, vessels in tug_to_vessel.items():
    print(f"Tug Boat ID {tug_id} assisted the following vessels: {', '.join(vessels)}")


In [None]:
import plotly.express as px

# Assuming your data is stored in a DataFrame called 'df'
# Filter data for the vessel "ALLIANCE NORFOLK"
vessel_name = "ALLIANCE NORFOLK"
vessel_data = df[df['vessel_name'] == vessel_name]

# Create a scatter mapbox trace for the vessel's path
fig = px.scatter_mapbox(
    vessel_data,
    lat="navigation_location_lat",
    lon="navigation_location_long",
    hover_name="navigation_time",
    title=f"Path of {vessel_name}",
    mapbox_style="open-street-map",  # You can change the map style as needed
)

# Customize the appearance of the path trace if desired
fig.update_traces(
    mode="lines+markers",  # Show lines connecting the data points
    line=dict(width=2, color="blue"),  # Line color and width
    marker=dict(size=8, opacity=0.8, color="red"),  # Marker appearance
)

# Show the plot
fig.show()


In [None]:
import plotly.express as px

# Assuming your data is stored in a DataFrame called 'df'

# Step 1: Find the time range of the "ALLIANCE NORFOLK" vessel's data
vessel_name = "ALLIANCE NORFOLK"
vessel_data = df[df['vessel_name'] == vessel_name]
start_time = vessel_data['navigation_time'].min()
end_time = vessel_data['navigation_time'].max()

# Step 2: Filter tug type vessels' data within the same time range
tug_data = df[(df['vessel_type'] == 'tug') & (df['navigation_time'] >= start_time) & (df['navigation_time'] <= end_time)]

# Step 3: Create a scatter mapbox trace for both vessels
fig = px.scatter_mapbox(
    lat=vessel_data["navigation_location_lat"],
    lon=vessel_data["navigation_location_long"],
    hover_name=vessel_data["navigation_time"],
    title=f"Path of {vessel_name}",
    mapbox_style="open-street-map",
    color=vessel_data["navigation_speed"],
    color_continuous_scale="Viridis"
    #colorbar=dict(title="Speed"),
    #name=f"{vessel_name} (Cargo)",
)

# Customize the appearance of the path trace for "ALLIANCE NORFOLK"
fig.update_traces(
    mode="lines+markers",
    line=dict(width=2, color="blue"),
    marker=dict(size=8, opacity=0.8),
)

# Add paths for tug type vessels
fig.add_trace(
    px.scatter_mapbox(
        tug_data,
        lat="navigation_location_lat",
        lon="navigation_location_long",
        title= 'vessel_name',
        hover_name="navigation_time",
        color="navigation_speed",
        color_continuous_scale="Viridis",
        #colorbar=dict(title="Speed"),
        #name="Tug Vessels",
    ).data[0]
)

# Show the plot
fig.show()


In [None]:
import pandas as pd
from shapely.geometry import Point
from datetime import timedelta

# Load your original DataFrame here (assuming it's named 'df')
# df = pd.read_csv('your_data.csv')

# Convert the 'navigation_time' column to datetime
df['navigation_time'] = pd.to_datetime(df['navigation_time'])

# Define a function to filter tug vessels within 100 meters and 20 minutes
def filter_tugs(row):
    if row['vessel_type'] == 'tug':
        return False  # Exclude tug vessels
    state = row['navigation_status']
    if state == 'moored':
        return False  # Exclude when state is moored
    timestamp = row['navigation_time']
    vessel_id = row['vessel_name']
    # Filter tugs within 100 meters and 20 minutes
    nearby_tugs = df[(df['vessel_type'] == 'tug') & (df['vessel_name'] != vessel_id) &
                     (df['navigation_time'] - timestamp <= timedelta(minutes=20)) &
                     ((df['navigation_location_long'] - row['navigation_location_long'])**2 +
                      (df['navigation_location_lat'] - row['navigation_location_lat'])**2 <= 0.0001)]  # 100 meters in degrees
    return not nearby_tugs.empty

# Create a new DataFrame based on the filter function
filtered_df = df[df.apply(filter_tugs, axis=1)]

# Now 'filtered_df' contains data for vessels that are not tugs,
# in a state other than moored, with nearby tugs within 100 meters and 20 minutes.

# You can perform further analysis or visualization with 'filtered_df'.


In [None]:
filtered_df.to_csv('Output2.csv')

In [None]:
# Convert the 'navigation_time' column to datetime
df['navigation_time'] = pd.to_datetime(df['navigation_time'])

# Separate tugs and non-tug vessels
tugs = df.loc[(df['vessel_type'] == 'tug') & (df['navigation_status'] != 'moored')].drop_duplicates()
non_tug = df.loc[(df['vessel_type'] != 'tug') & (df['navigation_status'] != 'moored')].drop_duplicates()


In [None]:
non_tug

In [None]:

# Create an empty DataFrame for vessels that were tugged
vessels_tugged = pd.DataFrame(columns=df.columns)

# Function to check if a tug matches a non-tug based on criteria
def is_tugged(tug_row, non_tug_row):
    if (tug_row['navigation_time'] - non_tug_row['navigation_time'] <= timedelta(minutes=10) and
        Point(tug_row['navigation_location_long'], tug_row['navigation_location_lat']).distance(
            Point(non_tug_row['navigation_location_long'], non_tug_row['navigation_location_lat'])
        ) <= 0.0015):  # 150 meters in degrees
        return True
    return False

# Iterate through non-tug vessels and find matching tugs
for index_non_tug, non_tug_row in non_tug.iterrows():
    tugs_used = pd.DataFrame(columns=df.columns)  # Create an empty DataFrame for tugs used
    for index_tug, tug_row in tugs.iterrows():
        if is_tugged(tug_row, non_tug_row):
            tugs_used = pd.concat([tugs_used, tug_row.to_frame().T], ignore_index=True)
    # Add tugs_used data to the non-tug row and append it to vessels_tugged
    combined_row = pd.concat([non_tug_row.to_frame().T, tugs_used], axis=0, ignore_index=True)
    vessels_tugged = pd.concat([vessels_tugged, combined_row], ignore_index=True)

# Now 'vessels_tugged' contains data for non-tug vessels that were tugged by tugs,
# including the relevant tug data for each match.


In [None]:
vessels_tugged