###This notebook implements a modular GPS spoofing simulator entirely in software. It uses real GPS/AIS data, injects spoofing attacks, labels each GPS point, and visualizes the results.

Tools: Python, Pandas, Numpy, Folium, Matplotlib

Goal: Generate labeled GPS datasets for spoofing research and ML model training.

####These libraries handle data manipulation, numerical operations, randomness, date-time handling, and visualization.

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import timedelta
import folium


####Load Real GPS Data

In [2]:
df = pd.read_csv("/content/drive/MyDrive/GPS final.csv")
df.head()

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,speed_calc,acceleration,delta_heading,speed_ma,acceleration_ma,delta_heading_ma,LAT_pred,LON_pred,prediction_error
0,270995,2020-01-01 00:00:03,35.09744,-90.10038,0.0,245.1,511.0,0.0,,0.0,0.0,,0.0,,,
1,507027,2020-01-01 00:00:08,37.00643,-76.25625,0.1,308.3,511.0,0.0,,0.0,0.0,,0.0,37.00643,-76.25625,0.0
2,1056261,2020-01-01 00:00:12,26.11822,-80.14813,0.0,248.4,511.0,0.0,,0.0,0.0,,0.0,26.11822,-80.14813,0.0
3,1193046,2020-01-01 00:00:08,32.68644,-117.13303,0.0,0.0,511.0,0.0,,0.0,0.0,,0.0,32.68644,-117.13303,0.0
4,3669999,2020-01-01 00:00:01,33.7234,-118.28068,0.1,258.5,511.0,0.0,,0.0,0.0,,0.0,33.7234,-118.28068,0.0


In [3]:
type(df['BaseDateTime'][0])


str

#### Convert str to datetime object

In [4]:
import pandas as pd


df = pd.read_csv("/content/drive/MyDrive/GPS final.csv")


df['BaseDateTime'] = pd.to_datetime(df['BaseDateTime'])

print(df['BaseDateTime'].head())
print(type(df['BaseDateTime'][0]))


0   2020-01-01 00:00:03
1   2020-01-01 00:00:08
2   2020-01-01 00:00:12
3   2020-01-01 00:00:08
4   2020-01-01 00:00:01
Name: BaseDateTime, dtype: datetime64[ns]
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [6]:
df.head()
type(df['BaseDateTime'][0])


pandas._libs.tslibs.timestamps.Timestamp

#### function of feature engineering

In [7]:
def compute_features(df):
    df = df.sort_values(by='BaseDateTime').reset_index(drop=True)


    df['speed_calc'] = df['SOG']
    df['delta_heading'] = df['COG'].diff().fillna(0)
    df['acceleration'] = df['speed_calc'].diff().fillna(0)


    df['speed_ma'] = df['speed_calc'].rolling(3, min_periods=1).mean()
    df['acceleration_ma'] = df['acceleration'].rolling(3, min_periods=1).mean()
    df['delta_heading_ma'] = df['delta_heading'].rolling(3, min_periods=1).mean()


    df['LAT_pred'] = df['LAT'] + np.random.normal(0, 0.0001, len(df))
    df['LON_pred'] = df['LON'] + np.random.normal(0, 0.0001, len(df))


    df['prediction_error'] = np.sqrt((df['LAT']-df['LAT_pred'])**2 + (df['LON']-df['LON_pred'])**2)

    return df


###Attack Configurations and Details
#####A) Ghost Vehicle Attack

Goal: Create fake vessels appearing on the map.

Configuration:

ghost_count: number of fake vessels

steps: number of GPS points per ghost vessel

LAT/LON start: random location globally

SOG: random speed (5–15 knots)

COG/Heading: random heading (0–360°)

Feature Engineering:

Compute speed_calc, acceleration, delta_heading along path

Moving averages updated

LAT_pred, LON_pred slightly offset with noise

prediction_error compute

In [11]:
def simulate_ghost(df, ghost_count=3, steps=50):
    spoofed_data = []
    base_time = df['BaseDateTime'].min()
    for i in range(ghost_count):
        lat, lon = random.uniform(-90,90), random.uniform(-180,180)
        speed = random.uniform(5,15)
        heading = random.uniform(0,360)
        for step in range(steps):
            spoofed_data.append({
                'MMSI': 999000+i,
                'BaseDateTime': base_time + timedelta(seconds=step*10),
                'LAT': lat + np.random.normal(0, 0.0001),
                'LON': lon + np.random.normal(0, 0.0001),
                'SOG': speed,
                'COG': heading,
                'Heading': heading,
            })
    df_ghost = pd.DataFrame(spoofed_data)
    df_ghost = compute_features(df_ghost)
    df_ghost['Label'] = 'Spoofed'
    return df_ghost



In [14]:

df_ghost = simulate_ghost(df, ghost_count=3, steps=50)


print(df_ghost.head())


     MMSI        BaseDateTime        LAT         LON       SOG         COG  \
0  999000 2020-01-01 00:00:00  47.373661 -118.376384  8.685090  116.498132   
1  999001 2020-01-01 00:00:00  50.020616 -134.350289  8.011990  254.506573   
2  999002 2020-01-01 00:00:00  46.937132    8.903859  7.124136  186.657103   
3  999000 2020-01-01 00:00:10  47.373583 -118.376298  8.685090  116.498132   
4  999001 2020-01-01 00:00:10  50.020550 -134.350132  8.011990  254.506573   

      Heading  speed_calc  delta_heading  acceleration  speed_ma  \
0  116.498132    8.685090       0.000000      0.000000  8.685090   
1  254.506573    8.011990     138.008441     -0.673100  8.348540   
2  186.657103    7.124136     -67.849471     -0.887853  7.940405   
3  116.498132    8.685090     -70.158970      1.560953  7.940405   
4  254.506573    8.011990     138.008441     -0.673100  7.940405   

   acceleration_ma  delta_heading_ma   LAT_pred    LON_pred  prediction_error  \
0         0.000000          0.000000  47.

####B) Gradual Drift Attack

Goal: Slowly shift GPS positions over time → hard to detect visually.

Configuration:

drift_per_step: e.g., 0.00005° per step

Slice of original dataset to drift

Feature Engineering:

Update LAT/LON gradually

Recompute speed_calc, acceleration, delta_heading

Update moving averages, predicted positions, prediction_error

In [15]:
def apply_drift_attack(df_slice, drift_per_step=0.00005):
    df_copy = df_slice.copy()
    df_copy['LAT'] += np.linspace(0, drift_per_step*len(df_copy), len(df_copy))
    df_copy['LON'] += np.linspace(0, drift_per_step*len(df_copy), len(df_copy))
    df_copy = compute_features(df_copy)
    df_copy['Label'] = 'Spoofed'
    return df_copy


####C) Location Jump Attack

Goal: Suddenly teleport a vessel to fake coordinates.

Configuration:

jump_lat / jump_lon offset (e.g., +1°)

Slice of dataset for jump

Feature Engineering:

Speed and acceleration spike due to jump

delta_heading may change abruptly

Recompute moving averages, predictions, prediction_error

In [16]:
def apply_jump_attack(df_slice, jump_lat=1.0, jump_lon=1.0):
    df_copy = df_slice.copy()
    df_copy['LAT'] += jump_lat
    df_copy['LON'] += jump_lon
    df_copy = compute_features(df_copy)
    df_copy['Label'] = 'Spoofed'
    return df_copy


#### D) Replay Attack

Goal: Re-inject old GPS track at a later time.

Configuration:

Slice of original dataset

time_offset_hours: e.g., 5 hours

Feature Engineering:

Recompute moving averages, predictions, prediction_error

Derived features largely similar to original

In [17]:
def apply_replay_attack(df_slice, time_offset_hours=5):
    df_copy = df_slice.copy()
    df_copy['BaseDateTime'] += timedelta(hours=time_offset_hours)
    df_copy = compute_features(df_copy)
    df_copy['Label'] = 'Spoofed'
    return df_copy


####E) Meaconing Attack

Goal: Delay GPS signals → vessel appears behind real position.

Configuration:

time_delay: e.g., 10 seconds

Slice of dataset to delay

Feature Engineering:

Update BaseDateTime

Recompute speed, acceleration, heading, moving averages

Update predicted positions and prediction_error

In [20]:
def apply_meaconing_attack(df_slice, time_delay=10):
    df_copy = df_slice.copy()
    df_copy['BaseDateTime'] += timedelta(seconds=time_delay)
    df_copy = compute_features(df_copy)
    df_copy['Label'] = 'Spoofed'
    return df_copy


In [21]:

ghost_df = simulate_ghost(df, ghost_count=3, steps=50)
drift_df = apply_drift_attack(df.iloc[:500], drift_per_step=0.00005)
jump_df = apply_jump_attack(df.iloc[100:200], jump_lat=1.0, jump_lon=1.0)
replay_df = apply_replay_attack(df.iloc[:100], time_offset_hours=5)
meaconing_df = apply_meaconing_attack(df.iloc[200:300], time_delay=10)


normal_data = df.assign(Label='Normal')
spoofed_attacks = [ghost_df, drift_df, jump_df, replay_df, meaconing_df]

final_dataset = pd.concat([normal_data] + spoofed_attacks)
final_dataset = final_dataset.sort_values(by='BaseDateTime').reset_index(drop=True)

print(final_dataset.head())


        MMSI BaseDateTime       LAT        LON  SOG    COG  Heading  \
0  367109910   2020-01-01  47.65925 -122.37875  0.0  342.3     95.0   
1  338127175   2020-01-01  47.64416 -122.31557  0.0  360.0    511.0   
2  367186210   2020-01-01  36.83719  -76.29111  0.0  360.0    511.0   
3  367182980   2020-01-01  29.93012  -93.87452  7.3  219.3    511.0   
4  368111750   2020-01-01  29.37558  -94.88956  0.0  119.9    266.0   

   speed_calc  acceleration  delta_heading  speed_ma  acceleration_ma  \
0         0.0           NaN            0.0       0.0              NaN   
1         0.0           NaN            0.0       0.0              NaN   
2         0.0           NaN            0.0       0.0              NaN   
3         0.0           NaN            0.0       0.0              NaN   
4         0.0           NaN            0.0       0.0              NaN   

   delta_heading_ma  LAT_pred   LON_pred  prediction_error   Label  
0               0.0  47.65925 -122.37875               0.0  Norma

#### Visualisation

In [22]:
import folium
m = folium.Map(location=[final_dataset['LAT'].mean(), final_dataset['LON'].mean()], zoom_start=4)
for _, row in final_dataset.iterrows():
    color = 'red' if row['Label']=='Spoofed' else 'blue'
    folium.CircleMarker([row['LAT'], row['LON']], radius=2, color=color).add_to(m)
m.save("gps_spoofing_simulation.html")


In [23]:
final_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6351 entries, 0 to 6350
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   MMSI              6351 non-null   int64         
 1   BaseDateTime      6351 non-null   datetime64[ns]
 2   LAT               6351 non-null   float64       
 3   LON               6351 non-null   float64       
 4   SOG               6351 non-null   float64       
 5   COG               6351 non-null   float64       
 6   Heading           6351 non-null   float64       
 7   speed_calc        6351 non-null   float64       
 8   acceleration      1462 non-null   float64       
 9   delta_heading     6351 non-null   float64       
 10  speed_ma          6351 non-null   float64       
 11  acceleration_ma   1462 non-null   float64       
 12  delta_heading_ma  6351 non-null   float64       
 13  LAT_pred          6350 non-null   float64       
 14  LON_pred          6350 n

In [25]:
final_dataset.describe()

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,speed_calc,acceleration,delta_heading,speed_ma,acceleration_ma,delta_heading_ma,LAT_pred,LON_pred,prediction_error
count,6351.0,6351,6351.0,6351.0,6351.0,6351.0,6351.0,6351.0,1462.0,6351.0,6351.0,1462.0,6351.0,6350.0,6350.0,6350.0
mean,348481800.0,2020-01-01 00:05:08.630294272,35.128111,-94.796527,2.994935,200.78764,345.18364,0.991731,0.001042,-0.082647,0.925069,0.000739,-0.057393,35.128116,-94.797266,1.9e-05
min,270995.0,2020-01-01 00:00:00,-83.849748,-178.22307,0.0,0.0,0.0,0.0,-24.4,-360.0,0.0,-8.1,-177.5,-83.849756,-178.22307,0.0
25%,338188500.0,2020-01-01 00:00:05,29.73202,-117.927435,0.0,109.75,182.0,0.0,-0.022659,0.0,0.0,-0.007553,0.0,29.73194,-117.927458,0.0
50%,367146400.0,2020-01-01 00:00:09,33.75277,-90.53179,0.0,219.3,357.0,0.0,0.000293,0.0,0.0,0.000251,0.0,33.752765,-90.531766,0.0
75%,367638200.0,2020-01-01 00:00:30,40.82675,-80.449495,4.5,289.25,511.0,0.0,0.1,0.0,0.0,0.069774,0.0,40.83429,-80.446373,0.0
max,1073709000.0,2020-01-01 05:02:04,72.392433,87.255126,102.3,360.0,511.0,24.4,22.6,360.0,17.4,8.1,172.0,72.392634,87.255215,0.000408
std,80540150.0,,13.83928,27.727491,6.185172,109.171231,181.226084,3.284561,6.129358,61.341608,2.705451,2.133956,20.6123,13.84037,27.729613,5.2e-05


In [26]:

label_counts = final_dataset['Label'].value_counts()
print(label_counts)




Label
Normal     5401
Spoofed     950
Name: count, dtype: int64


In [27]:

final_dataset.to_csv("gps_spoofing.csv", index=False)
