In [1]:
# -------------------------------------
# 📌 STEP 0: IMPORT REQUIRED LIBRARIES
# -------------------------------------

In [2]:
import os
import sqlite3
import pandas as pd
import time
from geopy.distance import geodesic
import unittest
DB_NAME = "ais_data.sqlite"

In [3]:
# ------------------------------------------
# 📌 STEP 1: CHECK DATABASE FILE AND TABLES
# ------------------------------------------

In [4]:
if not os.path.exists(DB_NAME):
    print(f"❌ Database file '{DB_NAME}' not found in {os.getcwd()}")
else:
    print(f"✅ Database found at: {os.path.abspath(DB_NAME)}")
    with sqlite3.connect(DB_NAME) as conn:
        tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
        print(f"📋 Tables in DB: {tables}")
        if ('ais_messages',) in tables:
            df = pd.read_sql_query("SELECT * FROM ais_messages LIMIT 5", conn)
            print("\n📊 Sample rows from 'ais_messages':")
            print(df)
        else:
            print("⚠️ Table 'ais_messages' not found in database.")

✅ Database found at: C:\Users\Divya.M\ais_data.sqlite
📋 Tables in DB: [('ais_messages',), ('sqlite_sequence',)]

📊 Sample rows from 'ais_messages':
   id       mmsi                          timestamp        lat         lon  \
0   1  123456789  2025-04-30T09:38:40.869824+00:00Z  30.732393  121.827393   
1   2  123456789  2025-04-30T09:43:40.869824+00:00Z  30.735913  121.856710   
2   3  123456789  2025-04-30T09:48:40.869824+00:00Z  30.739435  121.886028   
3   4  123456789  2025-04-30T09:53:40.869824+00:00Z  30.742955  121.915345   
4   5  123456789  2025-04-30T09:58:40.869824+00:00Z  30.746477  121.944663   

   speed  heading  course                                      raw_payload  
0   18.0       90    90.0  !AIVDO,1,1,,A,11mg=5OP2l8ecW`AUM33Q2l1P000,0*69  
1   18.0       90    90.0  !AIVDO,1,1,,A,11mg=5OP2l8el=DAUUC3Q2l1P000,0*40  
2   18.0       90    90.0  !AIVDO,1,1,,A,11mg=5OP2l8etk2AUeSCQ2l1P000,0*28  
3   18.0       90    90.0  !AIVDO,1,1,,A,11mg=5OP2l8f5HfAUmkCQ2l1P000,0*2D 

In [5]:
# --------------------------
# 📌 STEP 2: BASIC DB STATS
# --------------------------

In [6]:
with sqlite3.connect(DB_NAME) as conn:
    total_rows = conn.execute("SELECT COUNT(*) FROM ais_messages").fetchone()[0]
    distinct_mmsi = conn.execute("SELECT COUNT(DISTINCT mmsi) FROM ais_messages").fetchone()[0]
    time_range = conn.execute("SELECT MIN(timestamp), MAX(timestamp) FROM ais_messages").fetchone()

print("\n🧠 Basic AIS DB Stats:")
print(f"📦 Total AIS messages: {total_rows}")
print(f"🛥️ Distinct vessels (MMSI): {distinct_mmsi}")
print(f"⏰ Time range: {time_range[0]} → {time_range[1]}")


🧠 Basic AIS DB Stats:
📦 Total AIS messages: 11487
🛥️ Distinct vessels (MMSI): 3
⏰ Time range: 2025-04-30T09:38:40.869824+00:00Z → 2025-05-18T11:38:41.054395+00:00Z


In [7]:
# ------------------------------
# 📌 STEP 3: GET DISTINCT MMSIs
# ------------------------------

In [8]:
with sqlite3.connect(DB_NAME) as conn:
    mmsi_list = [row[0] for row in conn.execute("SELECT DISTINCT mmsi FROM ais_messages").fetchall()]
print(f"\n🛥️ Found {len(mmsi_list)} vessels: {mmsi_list}")


🛥️ Found 3 vessels: [123456789, 192837465, 987654321]


In [9]:
# -----------------------------------
# 📌 STEP 4: GET FULL TRACK FUNCTION
# -----------------------------------

In [10]:
def get_full_track(mmsi):
    conn = sqlite3.connect(DB_NAME)
    query = f"""
    SELECT timestamp, lat, lon, speed, heading, course
    FROM ais_messages
    WHERE mmsi = {mmsi}
    ORDER BY timestamp
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

In [11]:
# -------------------------------------------------
# 📌 STEP 5: CALCULATE DISTANCE AND SPEED FUNCTION
# -------------------------------------------------

In [12]:
def calculate_distance_and_speed(df):
    if df.empty or len(df) < 2:
        return None, None

    df["timestamp"] = pd.to_datetime(df["timestamp"].str.replace("Z", "", regex=False), utc=True, errors="coerce")
    df = df.dropna(subset=["timestamp"])
    df["latitude"] = df["lat"]
    df["longitude"] = df["lon"]

    total_distance_km = 0
    for i in range(1, len(df)):
        coord1 = (df.loc[i-1, "latitude"], df.loc[i-1, "longitude"])
        coord2 = (df.loc[i, "latitude"], df.loc[i, "longitude"])
        total_distance_km += geodesic(coord1, coord2).km

    time_diff_hours = (df["timestamp"].iloc[-1] - df["timestamp"].iloc[0]).total_seconds() / 3600
    avg_speed = total_distance_km / time_diff_hours if time_diff_hours > 0 else 0

    return total_distance_km, avg_speed

In [13]:
# -------------------------------------
# 📌 STEP 6: CALCULATE FOR ALL VESSELS
# -------------------------------------

In [14]:
print("\n📋 Vessel Distance and Speed Summary:")
results = []

for mmsi in mmsi_list:
    df_track = get_full_track(mmsi)
    distance, avg_speed = calculate_distance_and_speed(df_track)

    if distance is not None:
        print(f"\n🚢 MMSI {mmsi}:")
        print(f"📏 Total distance traveled: {distance:.2f} km")
        print(f"🚀 Average speed: {avg_speed:.2f} km/h")
        results.append({
            "MMSI": mmsi,
            "Total Distance (km)": round(distance, 2),
            "Average Speed (km/h)": round(avg_speed, 2)
        })
    else:
        print(f"\n⚠️ MMSI {mmsi}: Not enough data to calculate.")
    # Save results to CSV
df_results = pd.DataFrame(results)
df_results.to_csv("vessel_analysis.csv", index=False)
print("\n📄 'vessel_analysis.csv' created successfully.")


📋 Vessel Distance and Speed Summary:

🚢 MMSI 123456789:
📏 Total distance traveled: 10700.66 km
🚀 Average speed: 33.61 km/h

🚢 MMSI 192837465:
📏 Total distance traveled: 6176.98 km
🚀 Average speed: 30.18 km/h

🚢 MMSI 987654321:
📏 Total distance traveled: 17842.91 km
🚀 Average speed: 41.11 km/h

📄 'vessel_analysis.csv' created successfully.


In [15]:
# ------------------------
# 📌 STEP 7: QUERY TIMING
# ------------------------

In [16]:
print("\n🕒 Query time check for each MMSI:")
for mmsi in mmsi_list:
    start = time.time()
    _ = get_full_track(mmsi)
    end = time.time()
    print(f"🚢 MMSI {mmsi}: {end - start:.4f} sec")


🕒 Query time check for each MMSI:
🚢 MMSI 123456789: 0.0372 sec
🚢 MMSI 192837465: 0.0249 sec
🚢 MMSI 987654321: 0.0493 sec


In [17]:
# ----------------------
# 📌 STEP 8: UNIT TESTS
# ----------------------

In [18]:
class TestAISClient(unittest.TestCase):
    def setUp(self):
        self.conn = sqlite3.connect(DB_NAME)
        self.mmsi_list = [row[0] for row in self.conn.execute("SELECT DISTINCT mmsi FROM ais_messages").fetchall()]

    def tearDown(self):
        self.conn.close()

    def test_tracks_not_empty(self):
        for mmsi in self.mmsi_list:
            df = get_full_track(mmsi)
            self.assertFalse(df.empty, f"No data for MMSI {mmsi}")

    def test_lat_lon_valid(self):
        for mmsi in self.mmsi_list:
            df = pd.read_sql_query(f"SELECT lat, lon FROM ais_messages WHERE mmsi={mmsi} LIMIT 100", self.conn)
            self.assertTrue(df['lat'].between(-90, 90).all(), f"Invalid latitudes for {mmsi}")
            self.assertTrue(df['lon'].between(-180, 180).all(), f"Invalid longitudes for {mmsi}")

    def test_timestamps_not_null(self):
        for mmsi in self.mmsi_list:
            df = pd.read_sql_query(f"SELECT timestamp FROM ais_messages WHERE mmsi={mmsi} LIMIT 100", self.conn)
            self.assertFalse(df['timestamp'].isnull().any(), f"Null timestamps for {mmsi}")

unittest.main(argv=[''], exit=False)

...
----------------------------------------------------------------------
Ran 3 tests in 0.158s

OK


<unittest.main.TestProgram at 0x27591cfba10>