In [1]:
import pandas as pd
import json

In [2]:
# Path to the JSON file
json_file_path = 'CIEM6302/port_arthur.json' 

# Read the JSON data
with open(json_file_path, 'r') as json_file:
    data_json = json.load(json_file)


# Extract the data
data_list = data_json["data"]

# Initialize lists for each desired column
type_list = []
callsign_list = []
subtype_list = []
imo_list = []
name_list = []
draught_list = []
status_list = []
long_list = []
lat_list = []
speed_list = []
time_list = []
course_list = []
mmsi_list = []
to_bow_list = []
to_starboard_list = []
to_stern_list = []
to_port_list = []

# Extract data for each column
for entry in data_list:
    vessel = entry.get("vessel", {})
    type_list.append(vessel.get("type"))
    callsign_list.append(vessel.get("callsign"))
    subtype_list.append(vessel.get("subtype"))
    imo_list.append(vessel.get("imo"))
    name_list.append(vessel.get("name"))
    
    navigation = entry.get("navigation", {})
    draught_list.append(navigation.get("draught"))
    status_list.append(navigation.get("status"))
    
    location = navigation.get("location", {})
    long_list.append(location.get("long"))
    lat_list.append(location.get("lat"))
    
    speed_list.append(navigation.get("speed"))
    time_list.append(navigation.get("time"))
    course_list.append(navigation.get("course"))
    
    device = entry.get("device", {})
    mmsi_list.append(device.get("mmsi"))
    
    dimensions = device.get("dimensions", {})
    to_bow_list.append(dimensions.get("to_bow"))
    to_starboard_list.append(dimensions.get("to_starboard"))
    to_stern_list.append(dimensions.get("to_stern"))
    to_port_list.append(dimensions.get("to_port"))

# Create a Dataframe 
df = pd.DataFrame({
    "vessel.type": type_list,
    "vessel.callsign": callsign_list,
    "vessel.subtype": subtype_list,
    "vessel.imo": imo_list,
    "vessel.name": name_list,
    "navigation.draught": draught_list,
    "navigation.status": status_list,
    "navigation.location.long": long_list,
    "navigation.location.lat": lat_list,
    "navigation.speed": speed_list,
    "navigation.time": time_list,
    "navigation.course": course_list,
    "device.mmsi": mmsi_list,
    "device.dimensions.to_bow": to_bow_list,
    "device.dimensions.to_starboard": to_starboard_list,
    "device.dimensions.to_stern": to_stern_list,
    "device.dimensions.to_port": to_port_list
})


df

Unnamed: 0,vessel.type,vessel.callsign,vessel.subtype,vessel.imo,vessel.name,navigation.draught,navigation.status,navigation.location.long,navigation.location.lat,navigation.speed,navigation.time,navigation.course,device.mmsi,device.dimensions.to_bow,device.dimensions.to_starboard,device.dimensions.to_stern,device.dimensions.to_port
0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,2021-04-07T16:01:55+00:00,45.9,224941000,226,24,58,19
1,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70643,29.41625,1.4,2021-04-07T16:06:15+00:00,118.6,224941000,226,24,58,19
2,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07T16:11:05+00:00,144.9,224941000,226,24,58,19
3,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07T16:11:05+00:00,144.9,224941000,226,24,58,19
4,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70219,29.41087,4.4,2021-04-07T16:21:54+00:00,97.0,224941000,226,24,58,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2342,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09057,30.07655,0.0,2021-04-08T08:11:46+00:00,277.5,563495000,187,16,26,15
2343,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09055,30.07655,0.0,2021-04-08T08:14:46+00:00,277.5,563495000,187,16,26,15
2344,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09057,30.07657,0.0,2021-04-08T08:20:45+00:00,277.5,563495000,187,16,26,15
2345,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09056,30.07656,0.0,2021-04-08T08:26:46+00:00,277.5,563495000,187,16,26,15


In [3]:
# Get unique values in the 'type' column
unique_types = df['vessel.type'].unique()

print(unique_types)

['tanker' 'cargo' 'tug']


In [4]:
!pip install geopy



In [5]:
import math
# Set the speed threshold
speed_threshold = 1.0 

# Filter rows where the vessel type is "tug" and speed is close to 0
tug_rows = df[(df['vessel.type'] == 'tug') & (df['navigation.speed'] < speed_threshold)]

# Dictionary to store the closest vessel (tanker or cargo) for each tug
closest_vessel_dict = {}

# Iterate over each "tug" row
for index, tug_row in tug_rows.iterrows():
    tug_location = (tug_row['navigation.location.lat'], tug_row['navigation.location.long'])
    
    # Rows where the vessel type is not "tug" and speed is close to 0
    other_vessels = df[(df['vessel.type'] != 'tug') & (df['navigation.speed'] < speed_threshold)]
    
    # Distance to each other vessel and find the closest with the haversine formula
    closest_distance = float('inf')
    closest_vessel = None
    
    for _, other_vessel_row in other_vessels.iterrows():
        other_vessel_location = (other_vessel_row['navigation.location.lat'], other_vessel_row['navigation.location.long'])
        
        # Distance
        lat1, lon1 = tug_location
        lat2, lon2 = other_vessel_location
        radius = 6371  # Radius of the earth
        dlat = math.radians(lat2 - lat1)
        dlon = math.radians(lon2 - lon1)
        a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2)
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        distance = radius * c
        
        if distance < closest_distance:
            closest_distance = distance
            closest_vessel = other_vessel_row
    
    # Store the closest vessel
    closest_vessel_dict[index] = {
        'closest_vessel_type': closest_vessel['vessel.type'],
        'closest_vessel_name': closest_vessel['vessel.name'],
        'closest_vessel_distance': closest_distance
    }

# DataFrame from the closest vessel dictionary
closest_vessel_df = pd.DataFrame.from_dict(closest_vessel_dict, orient='index')


In [6]:
closest_vessel_df
closest_vessel_df['time'] = df['navigation.time']
closest_vessel_df['tugname'] = df['vessel.name']
closest_vessel_df

Unnamed: 0,closest_vessel_type,closest_vessel_name,closest_vessel_distance,time,tugname
1388,tanker,NINGBO DAWN,0.876880,2021-04-07T16:01:59+00:00,SABINE
1389,tanker,NINGBO DAWN,0.876315,2021-04-07T16:06:59+00:00,SABINE
1390,tanker,NINGBO DAWN,0.875321,2021-04-07T16:11:58+00:00,SABINE
1391,tanker,NINGBO DAWN,0.875192,2021-04-07T16:16:58+00:00,SABINE
1392,tanker,NINGBO DAWN,0.873464,2021-04-07T16:21:58+00:00,SABINE
...,...,...,...,...,...
2144,cargo,ALLIANCE NORFOLK,0.190958,2021-04-08T08:11:50+00:00,TRITON
2145,cargo,ALLIANCE NORFOLK,0.193096,2021-04-08T08:16:51+00:00,TRITON
2146,cargo,ALLIANCE NORFOLK,0.192297,2021-04-08T08:21:52+00:00,TRITON
2147,cargo,ALLIANCE NORFOLK,0.193766,2021-04-08T08:26:51+00:00,TRITON


In [7]:
# Repetition blocks with the same vessel name
repetition_blocks = []
current_block = []
prev_name = None

for index, row in closest_vessel_df.iterrows():
    name = row['closest_vessel_name']
    if name == prev_name:
        current_block.append(index)
    else:
        if len(current_block) >= 2:
            repetition_blocks.extend([current_block[0], current_block[-1]])
        current_block = [index]
    prev_name = name

# Add the last repetition block if it meets the criteria
if len(current_block) >= 3:
    repetition_blocks.extend([current_block[0], current_block[-1]])

# DataFrame with the first and last rows of each repetition block
filtered_df = closest_vessel_df.loc[repetition_blocks]


In [8]:
filtered_df

Unnamed: 0,closest_vessel_type,closest_vessel_name,closest_vessel_distance,time,tugname
1388,tanker,NINGBO DAWN,0.87688,2021-04-07T16:01:59+00:00,SABINE
1425,tanker,NINGBO DAWN,0.87688,2021-04-07T19:06:58+00:00,SABINE
1485,cargo,IRELAND,0.029206,2021-04-08T00:05:49+00:00,SABINE
1694,cargo,IRELAND,0.204462,2021-04-08T03:31:03+00:00,BUCCANEER
1712,tanker,ENERGY PROGRESS,1.811474,2021-04-08T05:21:52+00:00,BUCCANEER
1776,tanker,ENERGY PROGRESS,0.926975,2021-04-07T18:06:50+00:00,ATHENA
1795,cargo,ALLIANCE NORFOLK,0.157156,2021-04-07T19:41:50+00:00,ATHENA
1877,cargo,ALLIANCE NORFOLK,0.148234,2021-04-08T02:31:50+00:00,ATHENA
1883,tanker,NINGBO DAWN,2.874316,2021-04-08T03:01:59+00:00,ATHENA
1904,tanker,NINGBO DAWN,1.441965,2021-04-08T04:47:00+00:00,ATHENA


In [11]:
# Sort the DataFrame by index in ascending order
#closest_vessel_df = closest_vessel_df.sort_index()

# Keep only the rows with the lowest and highest index for each unique value in the "vessel_name" column
#result_df = closest_vessel_df.groupby('closest_vessel_name', group_keys=False).apply(lambda x: x.iloc[[0, -1]])

# Reset the index of the resulting DataFrame
#result_df = result_df.reset_index(drop=True)

In [12]:
#result_df