In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import joblib

# Load dataset
df = pd.read_csv("/content/NEWCaseStudyData1.csv")

# Convert time to timestamp (in seconds)
df['BaseDateTime'] = pd.to_datetime(df['BaseDateTime'], errors='coerce')
df.dropna(subset=['BaseDateTime', 'LAT', 'LON'], inplace=True)
df['Timestamp'] = df['BaseDateTime'].astype('int64') // 10**9

# Select features
features = df[['LAT', 'LON', 'Timestamp']]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Train Isolation Forest
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(X_scaled)

# Save model and scaler
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print(" Model and scaler saved successfully!")

 Model and scaler saved successfully!


In [None]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# === Step 1: Load trained model and scaler ===
model = joblib.load('model.pkl')
scaler = joblib.load('scaler.pkl')

# === Step 2: Load new ship data ===
df = pd.read_csv("/content/NEWCaseStudyData1.csv")  # Update path if needed

# === Step 3: Preprocess the data ===
# Convert BaseDateTime to proper datetime format
df['BaseDateTime'] = pd.to_datetime(df['BaseDateTime'], errors='coerce')

# Drop rows with missing critical data
df.dropna(subset=['BaseDateTime', 'LAT', 'LON'], inplace=True)

# Create a Unix timestamp column
df['Timestamp'] = df['BaseDateTime'].astype('int64') // 10**9

# Extract features used during training
features = df[['LAT', 'LON', 'Timestamp']]

# Scale features using the same scaler
X_scaled = scaler.transform(features)

# === Step 4: Use the trained model to predict anomalies ===
predictions = model.predict(X_scaled)  # Output: 1 (normal) or -1 (anomaly)
df['Anomaly'] = predictions

# === Step 5: Extract and display anomalies ===
anomalies = df[df['Anomaly'] == -1]

# Display the anomaly points
print("\nAnomalies Detected:")
print(anomalies[['VesselName', 'BaseDateTime', 'LAT', 'LON']])

# === Step 6: Optional – Save anomalies to a new CSV ===
anomalies.to_csv("anomalies_detected.csv", index=False)
print("\nAnomalies saved to 'anomalies_detected.csv'")



Anomalies Detected:
   VesselName              BaseDateTime    LAT   LON
0  VICTORIA L 2025-04-09 07:55:00+00:00  53.46  4.61
1  VICTORIA L 2025-04-09 08:05:00+00:00  53.43  4.58

Anomalies saved to 'anomalies_detected.csv'


In [None]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

# === Load and combine datasets ===
df1 = pd.read_csv("/content/SyntheticShipData.csv")
df2 = pd.read_csv("/content/NEWCaseStudyData1.csv")
df = pd.concat([df1, df2], ignore_index=True)

# === Preprocess ===
df['BaseDateTime'] = pd.to_datetime(df['BaseDateTime'], errors='coerce')
df.dropna(subset=['BaseDateTime', 'LAT', 'LON'], inplace=True)
df['Timestamp'] = df['BaseDateTime'].astype('int64') // 10**9

# === Extract features ===
features = df[['LAT', 'LON', 'Timestamp']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# === Train Isolation Forest on combined data ===
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(X_scaled)

# === Save model and scaler ===
joblib.dump(model, 'model_combined.pkl')
joblib.dump(scaler, 'scaler_combined.pkl')

# === Predict anomalies ===
predictions = model.predict(X_scaled)
df['Anomaly'] = predictions

# === Show stats ===
print("\nAnomaly label counts:")
print(df['Anomaly'].value_counts())

print("\n Top 10 anomaly points:")
print(df[df['Anomaly'] == -1][['VesselName', 'BaseDateTime', 'LAT', 'LON']])

print("\nVessel-wise anomaly count:")
print(df[df['Anomaly'] == -1]['VesselName'].value_counts())

# === Save anomalies to CSV ===
anomalies = df[df['Anomaly'] == -1]
anomalies.to_csv("all_anomalies.csv", index=False)
print("\nSaved all detected anomalies to 'all_anomalies.csv'")


Anomaly label counts:
Anomaly
 1    173
-1     10
Name: count, dtype: int64

 Top 10 anomaly points:
     VesselName              BaseDateTime        LAT        LON
0     SKATZOURA 2025-04-12 07:49:00+00:00   8.350594  76.473676
12    SKATZOURA 2025-04-12 08:14:18+00:00   8.395055  76.408694
13    SKATZOURA 2025-04-12 08:14:55+00:00   8.396153  76.407096
149  VICTORIA L 2025-04-09 07:55:00+00:00  53.460000   4.610000
150  VICTORIA L 2025-04-09 08:05:00+00:00  53.430000   4.580000
151  VICTORIA L 2025-04-09 08:26:00+00:00  53.350000   4.530000
152  VICTORIA L 2025-04-09 08:38:00+00:00  53.320000   4.510000
157  VICTORIA L 2025-04-09 09:33:00+00:00  53.120000   4.360000
181  VICTORIA L 2025-04-09 15:15:00+00:00  52.380000   3.470000
182  VICTORIA L 2025-04-09 15:38:00+00:00  52.370000   3.470000

Vessel-wise anomaly count:
VesselName
VICTORIA L    7
SKATZOURA     3
Name: count, dtype: int64

Saved all detected anomalies to 'all_anomalies.csv'
