In [2]:
# Import required libraries
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [4]:
# Load the data from the provided Excel file
file_path = "synthetic_ship_data.xlsx"  # Ensure this file is in the same directory or provide the correct path
data = pd.read_excel(file_path)

In [6]:
# Prepare the features and target variables
# We use the latitude and longitude as inputs and attempt to predict the next latitude and longitude
data['Next_Latitude'] = data['Latitude'].shift(-1)
data['Next_Longitude'] = data['Longitude'].shift(-1)

In [8]:
# Drop the last row with NaN in Next_Latitude and Next_Longitude due to shift
data = data.dropna()

In [10]:
# Define features (current latitude and longitude) and target (next latitude and longitude)
X = data[['Latitude', 'Longitude']]
y = data[['Next_Latitude', 'Next_Longitude']]

In [12]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Define a pipeline with standardization and KNeighborsRegressor
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor(n_neighbors=3))  # You can tune n_neighbors for better performance
])

In [16]:
# Train the model
pipeline.fit(X_train, y_train)

In [18]:
# Evaluate the model
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.00040888888888893624


In [20]:
# Function to predict the next latitude and longitude
def predict_next_location(current_lat, current_lon):
    return pipeline.predict([[current_lat, current_lon]])

In [22]:
# Example usage of the prediction function
current_lat, current_lon = X_test.iloc[0]
predicted_lat_lon = predict_next_location(current_lat, current_lon)
print("Current Location (Lat, Lon):", (current_lat, current_lon))
print("Predicted Next Location (Lat, Lon):", predicted_lat_lon[0])

Current Location (Lat, Lon): (20.8, 71.3)
Predicted Next Location (Lat, Lon): [20.80666667 71.30666667]




In [36]:
# Now LOOP

In [56]:
import pandas as pd
import numpy as np

In [68]:
# Define the number of predictions
num_predictions = 400  # Change as needed

In [70]:
# Starting location based on the first row of the dataset
initial_location = X.iloc[0]
current_lat, current_lon = initial_location['Latitude'], initial_location['Longitude']

In [72]:
# Prepare a list to store the generated data
generated_data = []

In [84]:
# Generate future latitude and longitude values with consistent increments
for _ in range(num_predictions):
    # Predict the next latitude and longitude
    next_lat_lon = pipeline.predict([[current_lat, current_lon]])
    next_lat, next_lon = next_lat_lon[0]
    
    # Ensure values are in ascending order by adding a small incremental change
    next_lat += 0.0100  # Adjust increment as needed
    next_lon += 0.0100
    
    # Append the ordered values to the list
    generated_data.append({'Latitude': next_lat, 'Longitude': next_lon})
    
    # Update current latitude and longitude for the next iteration
    current_lat, current_lon = next_lat, next_lon




In [86]:
# Convert the generated data list to a DataFrame
generated_data_df = pd.DataFrame(generated_data)

# Ensure the DataFrame is in ascending order
generated_data_df = generated_data_df.sort_values(by=['Latitude', 'Longitude']).reset_index(drop=True)

# Export the generated data to a new Excel file
generated_file_path = "ordered_predicted_ship_route.xlsx"
generated_data_df.to_excel(generated_file_path, index=False)

print(f"Generated data saved to {generated_file_path}")

Generated data saved to ordered_predicted_ship_route.xlsx
