In [3]:
import os
import pandas as pd
import numpy as np

# Define the input and output folders
input_folder = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/25 chosen perfect trajectory data"
output_single_folder = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/single"
output_multiple_folder = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/multiple"
output_realistic_folder = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/realistic_frequency"

# Create output directories if they don't exist
os.makedirs(output_single_folder, exist_ok=True)
os.makedirs(output_multiple_folder, exist_ok=True)
os.makedirs(output_realistic_folder, exist_ok=True)

# Function to apply missing data scenarios
def apply_reduction_methods(df, file_name):
    length = len(df)

    # Extract the MMSI number from the DataFrame (assuming the MMSI column exists)
    try:
        mmsi_number = df['MMSI'].iloc[0]  # Assuming MMSI is the same for all rows in the file
    except KeyError:
        print(f"MMSI column not found in {file_name}. Skipping this file.")
        return

    # Multiple gap reduction: remove 5-10 small sections, each 1/20 of the total length
    small_gap_size = length // 20
    number_of_small_gaps = np.random.randint(5, 11)
    indices_to_remove_multiple = []

    for _ in range(number_of_small_gaps):
        start_idx = np.random.randint(0, length - small_gap_size)
        indices_to_remove_multiple.extend(list(range(start_idx, start_idx + small_gap_size)))

    df_multiple_gap = df.drop(indices_to_remove_multiple).reset_index(drop=True)

    # Single large gap reduction: remove 1/3 of the trajectory
    large_gap_size = length // 3
    start_idx_large = np.random.randint(0, length - large_gap_size)
    indices_to_remove_large = list(range(start_idx_large, start_idx_large + large_gap_size))

    df_single_gap = df.drop(indices_to_remove_large).reset_index(drop=True)

    # Realistic frequency reduction:
    # 1. Several large gaps (each 1/6 of total length)
    # 2. Many very small gaps (each 1/100 of total length)

    large_gap_size_realistic = length // 6
    number_of_large_gaps = np.random.randint(2, 4)  # 2-3 large gaps
    indices_to_remove_realistic_large = []

    for _ in range(number_of_large_gaps):
        start_idx_large_realistic = np.random.randint(0, length - large_gap_size_realistic)
        indices_to_remove_realistic_large.extend(list(range(start_idx_large_realistic, start_idx_large_realistic + large_gap_size_realistic)))

    # Small gaps (1/100 of the total length)
    small_gap_size_realistic = length // 100
    number_of_small_gaps_realistic = np.random.randint(10, 21)  # 10-20 small gaps
    indices_to_remove_realistic_small = []

    for _ in range(number_of_small_gaps_realistic):
        start_idx_small_realistic = np.random.randint(0, length - small_gap_size_realistic)
        indices_to_remove_realistic_small.extend(list(range(start_idx_small_realistic, start_idx_small_realistic + small_gap_size_realistic)))

    # Combine the large and small gaps for realistic pattern
    combined_indices_to_remove_realistic = set(indices_to_remove_realistic_large + indices_to_remove_realistic_small)
    df_realistic_frequency = df.drop(list(combined_indices_to_remove_realistic)).reset_index(drop=True)

    # Define the new filenames based on the MMSI number and the scenario type
    single_gap_file_name = f"AIS data of MMSI {mmsi_number} Class A_single gap.csv"
    multiple_gaps_file_name = f"AIS data of MMSI {mmsi_number} Class A_multiple gaps.csv"
    realistic_frequency_file_name = f"AIS data of MMSI {mmsi_number} Class A_realistic_frequency.csv"

    # Save the files to the corresponding folders
    df_single_gap.to_csv(os.path.join(output_single_folder, single_gap_file_name), index=False)
    df_multiple_gap.to_csv(os.path.join(output_multiple_folder, multiple_gaps_file_name), index=False)
    df_realistic_frequency.to_csv(os.path.join(output_realistic_folder, realistic_frequency_file_name), index=False)

# Process each CSV file in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_folder, file_name)
        df = pd.read_csv(file_path)
        apply_reduction_methods(df, file_name)

print("Data reduction and saving complete.")

Data reduction and saving complete.


In [4]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import contextily as ctx

# Function to calculate the bounding box (extent) for the plots
def get_bounding_box(folder_path):
    min_lat, max_lat = float('inf'), -float('inf')
    min_lon, max_lon = float('inf'), -float('inf')
    
    # Loop through all CSV files and calculate the bounding box
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            data = pd.read_csv(file_path)
            if 'Latitude' in data.columns and 'Longitude' in data.columns:
                min_lat = min(min_lat, data['Latitude'].min())
                max_lat = max(max_lat, data['Latitude'].max())
                min_lon = min(min_lon, data['Longitude'].min())
                max_lon = max(max_lon, data['Longitude'].max())
    
    return min_lon, max_lon, min_lat, max_lat

# Function to plot and save trajectory with start (green), end (red) points, and smaller blue points for the rest
def plot_and_save_trajectory_with_map(csv_path, save_path, title, bbox):
    data = pd.read_csv(csv_path)
    
    # Ensure the columns for latitude and longitude exist
    if 'Latitude' in data.columns and 'Longitude' in data.columns:
        # Create a plot with a map background and fixed size (4:3 aspect ratio)
        fig, ax = plt.subplots(figsize=(8, 6), dpi=200)  # 8x6 inches at 200 DPI (4:3 ratio)
        
        # Extract the start, end, and middle points
        start_point = data.iloc[0]  # First row (start point)
        end_point = data.iloc[-1]   # Last row (end point)
        middle_points = data.iloc[1:-1]  # All other points
        
        # Plot the remaining points (blue)
        ax.scatter(middle_points['Longitude'], middle_points['Latitude'], c='blue', s=2, label='Middle Points')
        
        # Plot the start point (green)
        ax.scatter(start_point['Longitude'], start_point['Latitude'], c='green', s=100, label='Start Point')
        
        # Plot the end point (red)
        ax.scatter(end_point['Longitude'], end_point['Latitude'], c='red', s=100, label='End Point')
        
        # Set the aspect ratio and labels
        ax.set_aspect('equal')
        ax.set_xlabel('Longitude')
        ax.set_ylabel('Latitude')
        ax.set_title(f'Trajectory: {title}')
        
        # Apply the bounding box to ensure all plots have the same region
        ax.set_xlim(bbox[0], bbox[1])
        ax.set_ylim(bbox[2], bbox[3])
        
        # Add OpenStreetMap tiles using contextily
        ctx.add_basemap(ax, crs='EPSG:4326', source=ctx.providers.OpenStreetMap.Mapnik)
        
        # Save the plot with fixed size
        save_file_path = os.path.join(save_path, f"{title}.png")
        plt.savefig(save_file_path)
        plt.close()  # Close the figure after saving to avoid display

# Function to plot and save all trajectories from a directory with a map background
def plot_and_save_all_trajectories_from_folder_with_map(folder_path, save_folder_path, bbox):
    if not os.path.exists(save_folder_path):
        os.makedirs(save_folder_path)  # Create the directory if it doesn't exist
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            plot_and_save_trajectory_with_map(file_path, save_folder_path, filename, bbox)

# Define the paths to the input and output folders
single_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/single"
multiple_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/multiple"
realistic_frequency_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/realistic_frequency"

# Define the paths to the output plot folders
single_plot_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/plots/single"
multiple_plot_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/plots/multiple"
realistic_frequency_plot_folder_path = r"C:/Users/HU84VR/Downloads/AIS Project1/Test Trajectories for AIS 20240831/output/plots/realistic_frequency"

# Calculate the bounding box across all the data to ensure uniformity in the plotted region
bbox_single = get_bounding_box(single_folder_path)
bbox_multiple = get_bounding_box(multiple_folder_path)
bbox_realistic = get_bounding_box(realistic_frequency_folder_path)

# Plot and save all trajectories from the 'single' folder with a map background
plot_and_save_all_trajectories_from_folder_with_map(single_folder_path, single_plot_folder_path, bbox_single)

# Plot and save all trajectories from the 'multiple' folder with a map background
plot_and_save_all_trajectories_from_folder_with_map(multiple_folder_path, multiple_plot_folder_path, bbox_multiple)

# Plot and save all trajectories from the 'realistic_frequency' folder with a map background
plot_and_save_all_trajectories_from_folder_with_map(realistic_frequency_folder_path, realistic_frequency_plot_folder_path, bbox_realistic)