In [26]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable
import matplotlib.patches as mpatches

In [27]:
def flatten_dict(d, parent_key='', sep='_'):
    items = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.update(flatten_dict(v, new_key, sep=sep))
        else:
            items[new_key] = v
    return items

# Opening JSON file
f = open('port_arthur.json')
 
# returns JSON object as a dictionary
datad = json.load(f)
 
# Flatten each dictionary and store the results in a list
flattened_dicts = [flatten_dict(d) for d in datad['data']]

# Create a DataFrame
datad = pd.DataFrame(flattened_dicts)

# Print the DataFrame
print(datad)
 
# Closing file
f.close()

     vessel_type vessel_callsign vessel_subtype  vessel_imo       vessel_name  \
0         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
1         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
2         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
3         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
4         tanker            EBZV            all     9236420  CATALUNYA SPIRIT   
...          ...             ...            ...         ...               ...   
2342       cargo           9VAW8            all     9081801          HOSANGER   
2343       cargo           9VAW8            all     9081801          HOSANGER   
2344       cargo           9VAW8            all     9081801          HOSANGER   
2345       cargo           9VAW8            all     9081801          HOSANGER   
2346       cargo           9VAW8            all     9081801          HOSANGER   

      navigation_draught   

In [28]:
datad['navigation_time'] = pd.to_datetime(datad['navigation_time'])

data = datad[
    (datad['navigation_status'] != 'moored') &
    (datad['navigation_speed'] != 0)
    ].drop_duplicates()
data

Unnamed: 0,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port
0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,2021-04-07 16:01:55+00:00,45.9,224941000,226,24,58,19
1,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70643,29.41625,1.4,2021-04-07 16:06:15+00:00,118.6,224941000,226,24,58,19
2,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07 16:11:05+00:00,144.9,224941000,226,24,58,19
4,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70219,29.41087,4.4,2021-04-07 16:21:54+00:00,97.0,224941000,226,24,58,19
5,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.69435,29.41231,5.8,2021-04-07 16:26:44+00:00,62.8,224941000,226,24,58,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2129,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86100,0.1,2021-04-08 06:56:52+00:00,154.1,367774360,16,14,7,6
2130,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86100,0.1,2021-04-08 07:01:52+00:00,196.6,367774360,16,14,7,6
2131,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86099,0.1,2021-04-08 07:06:51+00:00,182.5,367774360,16,14,7,6
2133,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93850,29.86097,0.1,2021-04-08 07:16:53+00:00,196.7,367774360,16,14,7,6


In [29]:
from shapely.geometry import Point

def create_point(row):
    return Point(row['navigation_location_long'], row['navigation_location_lat'])

data['geometry'] = data.apply(create_point, axis=1)

tugs = data.loc[(data['vessel_type'] == 'tug')].reset_index()
non_tug = data.loc[(data['vessel_type'] != 'tug')].reset_index()

In [30]:
tugs

Unnamed: 0,index,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port,geometry
0,1425,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96003,29.82966,0.1,2021-04-07 19:06:58+00:00,225.4,367182980,15,8,15,2,POINT (-93.96003 29.82966)
1,1426,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.96014,29.82968,1.0,2021-04-07 19:11:58+00:00,256.3,367182980,15,8,15,2,POINT (-93.96014 29.82968)
2,1427,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95980,29.82104,8.3,2021-04-07 19:16:59+00:00,164.6,367182980,15,8,15,2,POINT (-93.9598 29.82104)
3,1428,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95654,29.80986,8.3,2021-04-07 19:21:58+00:00,162.6,367182980,15,8,15,2,POINT (-93.95654 29.80986)
4,1429,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95659,29.81096,8.8,2021-04-07 19:26:59+00:00,342.3,367182980,15,8,15,2,POINT (-93.95659 29.81096)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,2129,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86100,0.1,2021-04-08 06:56:52+00:00,154.1,367774360,16,14,7,6,POINT (-93.93849 29.861)
309,2130,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86100,0.1,2021-04-08 07:01:52+00:00,196.6,367774360,16,14,7,6,POINT (-93.93849 29.861)
310,2131,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93849,29.86099,0.1,2021-04-08 07:06:51+00:00,182.5,367774360,16,14,7,6,POINT (-93.93849 29.86099)
311,2133,tug,WDJ4094,regular,9812042,TRITON,6.0,under-way-using-engine,-93.93850,29.86097,0.1,2021-04-08 07:16:53+00:00,196.7,367774360,16,14,7,6,POINT (-93.9385 29.86097)


In [31]:
merged_data = non_tug.merge(tugs,how= 'cross', suffixes = ('_nontug','_tug'))

In [32]:
merged_data

Unnamed: 0,index_nontug,vessel_type_nontug,vessel_callsign_nontug,vessel_subtype_nontug,vessel_imo_nontug,vessel_name_nontug,navigation_draught_nontug,navigation_status_nontug,navigation_location_long_nontug,navigation_location_lat_nontug,...,navigation_location_lat_tug,navigation_speed_tug,navigation_time_tug,navigation_course_tug,device_mmsi_tug,device_dimensions_to_bow_tug,device_dimensions_to_starboard_tug,device_dimensions_to_stern_tug,device_dimensions_to_port_tug,geometry_tug
0,0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,...,29.82966,0.1,2021-04-07 19:06:58+00:00,225.4,367182980,15,8,15,2,POINT (-93.96003 29.82966)
1,0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,...,29.82968,1.0,2021-04-07 19:11:58+00:00,256.3,367182980,15,8,15,2,POINT (-93.96014 29.82968)
2,0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,...,29.82104,8.3,2021-04-07 19:16:59+00:00,164.6,367182980,15,8,15,2,POINT (-93.9598 29.82104)
3,0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,...,29.80986,8.3,2021-04-07 19:21:58+00:00,162.6,367182980,15,8,15,2,POINT (-93.95654 29.80986)
4,0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,...,29.81096,8.8,2021-04-07 19:26:59+00:00,342.3,367182980,15,8,15,2,POINT (-93.95659 29.81096)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120187,1316,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,...,29.86100,0.1,2021-04-08 06:56:52+00:00,154.1,367774360,16,14,7,6,POINT (-93.93849 29.861)
120188,1316,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,...,29.86100,0.1,2021-04-08 07:01:52+00:00,196.6,367774360,16,14,7,6,POINT (-93.93849 29.861)
120189,1316,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,...,29.86099,0.1,2021-04-08 07:06:51+00:00,182.5,367774360,16,14,7,6,POINT (-93.93849 29.86099)
120190,1316,tanker,3EZV4,all,9397808,NINGBO DAWN,11.7,under-way-using-engine,-93.95630,29.82242,...,29.86097,0.1,2021-04-08 07:16:53+00:00,196.7,367774360,16,14,7,6,POINT (-93.9385 29.86097)


In [33]:
merged_data['time_diff'] = (merged_data['navigation_time_tug']-merged_data['navigation_time_nontug']).dt.total_seconds() / 60
merged_data['navigation_diff'] = abs(merged_data['navigation_course_tug']-merged_data['navigation_course_nontug'])
merged_data['speed_diff'] = abs(merged_data['navigation_speed_tug']-merged_data['navigation_speed_nontug'])


In [34]:
import numpy as np

# Function to calculate distance in kilometers using Haversine formula
def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0
    
    # Convert latitude and longitude from degrees to radians
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    distance = R * c
    
    return distance

# Apply the haversine function to each row to create a new column 'distance_km'
merged_data['distance_km'] = merged_data.apply(lambda row: haversine(row['navigation_location_lat_nontug'], row['navigation_location_long_nontug'], row['navigation_location_lat_tug'], row['navigation_location_long_tug']), axis=1)


In [35]:
filtered_columns = merged_data[[
    'index_nontug','index_tug',
    'vessel_name_nontug','vessel_name_tug',
    'navigation_speed_nontug','navigation_speed_tug', 
    'navigation_course_nontug','navigation_course_tug',
    'speed_diff','navigation_diff', 'distance_km','time_diff',
    'navigation_time_nontug'
    ]]

In [36]:
filtered_columns = filtered_columns[ 
    (abs(merged_data['time_diff']) < 3) & 
    (merged_data['distance_km'] < 0.25 ) &
    (merged_data['navigation_diff'] < 10 ) &
    (merged_data['speed_diff'] < 1 ) ]

In [37]:
filtered_columns

Unnamed: 0,index_nontug,index_tug,vessel_name_nontug,vessel_name_tug,navigation_speed_nontug,navigation_speed_tug,navigation_course_nontug,navigation_course_tug,speed_diff,navigation_diff,distance_km,time_diff,navigation_time_nontug
68238,639,1429,IRELAND,SABINE,8.7,8.8,341.1,342.3,0.1,1.2,0.037093,0.05,2021-04-07 19:26:56+00:00
68552,640,1430,IRELAND,SABINE,8.0,8.2,14.4,5.9,0.2,8.5,0.098622,0.033333,2021-04-07 19:31:55+00:00
69180,642,1432,IRELAND,SABINE,7.1,7.2,16.3,11.3,0.1,5.0,0.101213,-0.05,2021-04-07 19:42:00+00:00
69494,643,1433,IRELAND,SABINE,5.5,5.4,39.6,39.7,0.1,0.1,0.098313,0.05,2021-04-07 19:46:56+00:00
69808,644,1434,IRELAND,SABINE,4.1,4.0,39.7,42.6,0.1,2.9,0.099883,0.0,2021-04-07 19:51:58+00:00
70122,645,1435,IRELAND,SABINE,3.9,4.0,41.2,40.6,0.1,0.6,0.097495,-0.033333,2021-04-07 19:57:00+00:00
70436,646,1436,IRELAND,SABINE,5.2,5.4,26.6,33.7,0.2,7.1,0.10209,0.0,2021-04-07 20:01:57+00:00
70750,647,1437,IRELAND,SABINE,7.4,7.5,37.8,34.1,0.1,3.7,0.095502,0.05,2021-04-07 20:06:57+00:00
71064,648,1438,IRELAND,SABINE,8.2,8.3,38.5,38.1,0.1,0.4,0.093954,0.016667,2021-04-07 20:11:56+00:00
71378,649,1439,IRELAND,SABINE,8.6,8.7,37.8,40.7,0.1,2.9,0.094901,0.083333,2021-04-07 20:16:55+00:00


In [38]:
tugged_answer = pd.concat([
    tugs[tugs['index'].isin(filtered_columns['index_tug'])],
    non_tug[non_tug['index'].isin(filtered_columns['index_nontug'])]]
)


In [39]:
tugged_answer['Meeting'] = 1
tugged_answer

Unnamed: 0,index,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port,geometry,Meeting
4,1429,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95659,29.81096,8.8,2021-04-07 19:26:59+00:00,342.3,367182980,15,8,15,2,POINT (-93.95659 29.81096),1
5,1430,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95972,29.82188,8.2,2021-04-07 19:31:57+00:00,5.9,367182980,15,8,15,2,POINT (-93.95972 29.82188),1
7,1432,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.95240,29.84249,7.2,2021-04-07 19:41:57+00:00,11.3,367182980,15,8,15,2,POINT (-93.9524 29.84249),1
8,1433,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.94748,29.85031,5.4,2021-04-07 19:46:59+00:00,39.7,367182980,15,8,15,2,POINT (-93.94748 29.85031),1
9,1434,tug,WDD7182,,9397391,SABINE,4.0,under-way-using-engine,-93.94244,29.85557,4.0,2021-04-07 19:51:58+00:00,42.6,367182980,15,8,15,2,POINT (-93.94244 29.85557),1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,688,cargo,C6DD6,all,9770543,IRELAND,12.2,under-way-using-engine,-94.05058,30.06909,5.0,2021-04-07 23:30:18+00:00,309.4,311000646,196,23,33,10,POINT (-94.05058 30.06909),1
268,689,cargo,C6DD6,all,9770543,IRELAND,12.2,under-way-using-engine,-94.05697,30.07383,3.4,2021-04-07 23:36:35+00:00,305.2,311000646,196,23,33,10,POINT (-94.05697 30.07383),1
269,690,cargo,C6DD6,all,9770543,IRELAND,12.2,under-way-using-engine,-94.06166,30.07627,3.6,2021-04-07 23:41:36+00:00,297.6,311000646,196,23,33,10,POINT (-94.06166 30.07627),1
270,691,cargo,C6DD6,all,9770543,IRELAND,12.2,under-way-using-engine,-94.06220,30.07650,3.7,2021-04-07 23:42:07+00:00,297.4,311000646,196,23,33,10,POINT (-94.0622 30.0765),1


In [40]:
for index_value in tugged_answer['navigation_time']:
    datad.loc[datad['navigation_time'] == index_value, 'meeting'] = 1

# Fill NaN values in the "meeting" column with 0
datad['meeting'].fillna(0, inplace=True)

# Display the updated DataFrame Y
datad

Unnamed: 0,vessel_type,vessel_callsign,vessel_subtype,vessel_imo,vessel_name,navigation_draught,navigation_status,navigation_location_long,navigation_location_lat,navigation_speed,navigation_time,navigation_course,device_mmsi,device_dimensions_to_bow,device_dimensions_to_starboard,device_dimensions_to_stern,device_dimensions_to_port,meeting
0,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70755,29.41630,0.7,2021-04-07 16:01:55+00:00,45.9,224941000,226,24,58,19,0.0
1,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70643,29.41625,1.4,2021-04-07 16:06:15+00:00,118.6,224941000,226,24,58,19,0.0
2,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07 16:11:05+00:00,144.9,224941000,226,24,58,19,0.0
3,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70459,29.41492,1.2,2021-04-07 16:11:05+00:00,144.9,224941000,226,24,58,19,0.0
4,tanker,EBZV,all,9236420,CATALUNYA SPIRIT,9.6,under-way-using-engine,-93.70219,29.41087,4.4,2021-04-07 16:21:54+00:00,97.0,224941000,226,24,58,19,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2342,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09057,30.07655,0.0,2021-04-08 08:11:46+00:00,277.5,563495000,187,16,26,15,0.0
2343,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09055,30.07655,0.0,2021-04-08 08:14:46+00:00,277.5,563495000,187,16,26,15,0.0
2344,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09057,30.07657,0.0,2021-04-08 08:20:45+00:00,277.5,563495000,187,16,26,15,0.0
2345,cargo,9VAW8,all,9081801,HOSANGER,9.1,moored,-94.09056,30.07656,0.0,2021-04-08 08:26:46+00:00,277.5,563495000,187,16,26,15,0.0


In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score, classification_report

# Load your dataframe (assuming you have it loaded as df)

# Encode the "meeting" column as 0 or 1
label_encoder = LabelEncoder()
datad['meeting'] = label_encoder.fit_transform(datad['meeting'])

# Define categorical and numeric columns
categorical_cols = ['vessel_type', 'vessel_callsign', 'vessel_subtype', 'vessel_name', 'navigation_status']
numeric_cols = ['vessel_imo', 'navigation_draught', 'navigation_location_long', 'navigation_location_lat',
                'navigation_speed', 'navigation_course', 'device_mmsi', 'device_dimensions_to_bow',
                'device_dimensions_to_starboard', 'device_dimensions_to_stern', 'device_dimensions_to_port']

# Preprocessing pipeline
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Split the data into training and testing sets
X = datad.drop(columns=['meeting'])
y = datad['meeting']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train different classification models using the preprocessor
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

for model_name, model in models.items():
    clf = Pipeline(steps=[('preprocessor', preprocessor),
                          ('classifier', model)])
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #print(f'{model_name} Accuracy: {accuracy:.2f}')

    print(f'{model_name} Accuracy:',metrics.accuracy_score(y_test, y_pred))
    print(f'{model_name} Recall:',metrics.recall_score(y_test, y_pred, zero_division=1))
    print(f'{model_name} Precision:',metrics.precision_score(y_test, y_pred, zero_division=1))
    print(f'{model_name} CL Report:')
    print(metrics.classification_report(y_test, y_pred, zero_division=1))
    print('- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -')

# Create empty lists to store the results
model_names = []
accuracies = []
recalls = []
precisions = []
classification_reports = []

# Assuming you have a dictionary called 'models' with model_name as keys and model objects as values
for model_name, model in models.items():
    clf = Pipeline(steps=[('preprocessor', preprocessor),
                          ('classifier', model)])
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, zero_division=1)
    precision = precision_score(y_test, y_pred, zero_division=1)
    classification_report_str = classification_report(y_test, y_pred, zero_division=1)

    # Append the results to the lists
    model_names.append(model_name)
    accuracies.append(accuracy)
    recalls.append(recall)
    precisions.append(precision)
    classification_reports.append(classification_report_str)

# Create a DataFrame from the lists
results_df = pd.DataFrame({
    'Model': model_names,
    'Accuracy': accuracies,
    'Recall': recalls,
    'Precision': precisions,
    'Classification Report': classification_reports
})

# Display the DataFrame
results_df


Logistic Regression Accuracy: 0.9553191489361702
Logistic Regression Recall: 0.0
Logistic Regression Precision: 1.0
Logistic Regression CL Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       449
           1       1.00      0.00      0.00        21

    accuracy                           0.96       470
   macro avg       0.98      0.50      0.49       470
weighted avg       0.96      0.96      0.93       470

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Decision Tree Accuracy: 0.9787234042553191
Decision Tree Recall: 0.7142857142857143
Decision Tree Precision: 0.7894736842105263
Decision Tree CL Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       449
           1       0.79      0.71      0.75        21

    accuracy                           0.98       470
   macro avg       0.89      0.85      0.87       470
weighted av

Unnamed: 0,Model,Accuracy,Recall,Precision,Classification Report
0,Logistic Regression,0.955319,0.0,1.0,precision recall f1-score ...
1,Decision Tree,0.978723,0.714286,0.789474,precision recall f1-score ...
2,Random Forest,0.985106,0.714286,0.9375,precision recall f1-score ...
3,SVM,0.955319,0.0,1.0,precision recall f1-score ...
4,K-Nearest Neighbors,0.955319,0.47619,0.5,precision recall f1-score ...


XGBoost

In [53]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load your dataframe (assuming you have it loaded as df)

# Drop non-numeric columns
df_numeric = datad.drop(columns=['vessel_type', 'vessel_callsign', 'vessel_subtype', 'vessel_name', 'navigation_status', 'navigation_time'])

# Split the data into training and testing sets
X = df_numeric.drop(columns=['meeting'])
y = df_numeric['meeting']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an XGBoost classifier
model = xgb.XGBClassifier(objective="binary:logistic", random_state=42)

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'XGBoost Accuracy: {accuracy:.2f}')

print(f'XGBoost Accuracy:',metrics.accuracy_score(y_test, y_pred))
print(f'XGBoost Recall:',metrics.recall_score(y_test, y_pred, zero_division=1))
print(f'XGBoost Precision:',metrics.precision_score(y_test, y_pred, zero_division=1))
print(f'XGBoost CL Report:')
print(metrics.classification_report(y_test, y_pred, zero_division=1))



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


XGBoost Accuracy: 0.98
XGBoost Accuracy: 0.9829787234042553
XGBoost Recall: 0.7142857142857143
XGBoost Precision: 0.8823529411764706
XGBoost CL Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       449
           1       0.88      0.71      0.79        21

    accuracy                           0.98       470
   macro avg       0.93      0.85      0.89       470
weighted avg       0.98      0.98      0.98       470



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
