In [12]:
import re
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime
from geopy import distance

In [13]:
# This line uses the glob library to find all directories matching the specified path, and sorts them alphabetically.
# The resulting list contains the full file path to each directory.
folders = sorted(glob.glob('D:/MELA/Test Dataset SRT/SM_Lek1/*'))

# This sets the drift threshold to 5 metres. The drift threshold is used later in the code to determine whether or not
# a GPS coordinate drifts too far from the average coordinate of a section of a trajectory.
drift_thresh = 5
height_thresh = 7
time_thresh = 2

In [14]:
def extract_data_from_srt(file_path):
    """
    Extracts data from a SubRip subtitle file.

    Args:
        file_path (str): The path to the subtitle file.

    Returns:
        pandas.DataFrame: A DataFrame containing the extracted data.
    """
    with open(file_path, 'r') as f:
        content = f.read()
    # Use regular expressions to extract the frame number, timestamp, latitude, longitude, and altitude from each frame
    frames = re.findall(r'(\d+)\n.*?(\d{2}:\d{2}:\d{2},\d{3},\d{3}).*?\[latitude:\s*([-+]?\d+\.\d+)\]\s*\[longitude:\s*([-+]?\d+\.\d+)\]\s*\[altitude:\s*([-+]?\d+\.\d+)\]', content, re.DOTALL)
    # Convert the extracted data into a list of dictionaries, where each dictionary represents one frame
    results = [{'frame': int(frame), 'timestamp': str(timestamp), 'latitude': float(latitude), 'longitude': float(longitude), 'altitude': float(altitude)} for frame, timestamp, latitude, longitude, altitude in frames]
    # Convert the list of dictionaries into a pandas DataFrame and return it
    return pd.DataFrame(results)


def geodist(coord1, coord2_lat, coord2_long):
    """
    Computes the geodesic distance between two geographic coordinates.

    Args:
        coord1 (tuple): A tuple representing the first coordinate, in the format (latitude, longitude).
        coord2_lat (float): The latitude of the second coordinate.
        coord2_long (float): The longitude of the second coordinate.

    Returns:
        float: The geodesic distance between the two coordinates, in metres.
    """
    return distance.distance(coord1, (coord2_lat, coord2_long)).m

In [15]:
# Create an empty list to store dataframes
df_list = []

# Create an empty list to store maximum drift distance from mean drone position
dist_max = []

# Create an empty list to store the drift status
drift_status = []
# Create an empty list to store the height status
height_status = []
# Create an empty list to store the time status
time_status =[]

# Loop through each folder
for folder in folders:
    
    # Get all .SRT files in the folder
    files = sorted(glob.glob(folder + '/*.SRT'))
    
    # Loop through each file
    for file in files:
        
        # Extract data from the .SRT file
        df_rec = extract_data_from_srt(file)
        
        #WORKING ON TIME TO FIND GAP
        
        #extracting info from timestamp column
        df_rec[['time', 'milliseconds']] = df_rec['timestamp'].str.extract(r'(\d{2}:\d{2}:\d{2}),(\d{3})')
        df_rec[['hours', 'minutes', 'seconds']] = df_rec['time'].str.split(':', expand=True)

        # Convert the columns to integer type
        df_rec['hours'] = df_rec['hours'].astype(int)
        df_rec['minutes'] = df_rec['minutes'].astype(int)
        df_rec['seconds'] = df_rec['seconds'].astype(int)
        df_rec['milliseconds'] = df_rec['milliseconds'].astype(int)


        df_rec['time'] = pd.to_datetime(df_rec['time'])

        # Calculate the time difference between consecutive entries
        df_rec['time_diff'] = df_rec['time'].diff().dt.total_seconds().abs()

        # Fill the first entry with 0 (or any other desired value)
        df_rec['time_diff'].fillna(0, inplace=True)
        
        ## Compute if the diffence in time in each frame is higher than the threshold mentioned above
        if df_rec['time_diff'].max() >= time_thresh:
            time_status.append(1) # Drift above time_thresh
        else:
            time_status.append(0) # Drift below or equal to height_thresh
            
        #WORKING ON ALTITUDE TO FIND RELAY   
        
        # Create a new column with value 80
        df_rec['normal_altitude'] = 80.0
        
        #Create a new column to find the variation in altitude from 80
        df_rec['height_difference'] = df_rec['altitude'] - df_rec['normal_altitude']
        
        # Compute if the diffence in altitude is higher than the threshold mentioned above
        if df_rec['height_difference'].max() > height_thresh:
            height_status.append(1)  # Drift above height_thresh
        else:
            height_status.append(0)  # Drift below or equal to height_thresh
            
        #WORKING ON DRIFT
        
        # Compute the mean coordinate from the filtered dataframe
        coord1 = (df_rec['latitude'].mean(), df_rec['longitude'].mean())
        
        # Compute the distance of each point from the mean coordinate and store it in a new column
        df_rec['distance'] = df_rec.apply(lambda row: geodist(coord1, row.latitude, row.longitude), axis=1)
        
        # Compute if the fifference in cordinates is higher than the threshold mentioned above
        if df_rec['distance'].max() > drift_thresh:
            drift_status.append(1)  # Drift above 5m
        else:
            drift_status.append(0)  # Drift below or equal to 5m
            
        # Extract start and end timestamps
        start_ts = df_rec['timestamp'].iloc[0]
        end_ts = df_rec['timestamp'].iloc[-1]
        
        # Append folder, file, start and end timestamps to the dataframe list
        df_list.append({
            'folder': folder[-4:],  # Only include the last 4 characters of the folder name
            'Video_ID': file[-12:-4],  # Only include the last 8 characters of the file name (without the .SRT extension)
            'start_timestamp': start_ts,
            'end_timestamp': end_ts
        })
        
    
    
# Create the final dataframe from the list of dictionaries
df_day = pd.DataFrame(df_list)
# Add the 'drift_status' column to the df_day DataFrame
df_day['drift_status'] = drift_status
# Add the 'flight_status' column to the df_day DataFrame
df_day['flight_altitude'] = height_status
# Add the 'time_status' column to the df_day DataFrame
df_day['Missing_data'] = time_status

In [16]:
df_day['position'] = df_day['folder'].str[:2]  # Extract first 2 letters into 'position' column
df_day['drone'] = df_day['folder'].str[2:]  # Extract last 2 letters into 'drone' column

# Rearrange the columns to match the desired position
df_day = df_day[['Video_ID','position', 'drone', 'start_timestamp', 'end_timestamp', 'flight_altitude', 'drift_status','Missing_data']]


In [9]:
df_day.to_csv('name.csv', index=False)

In [17]:
print(df_day)

     Video_ID position drone   start_timestamp     end_timestamp  \
0    DJI_0343       P1    D1  06:50:10,947,073  06:53:24,528,826   
1    DJI_0344       P1    D1  17:01:36,229,817  17:04:49,745,597   
2    DJI_0345       P1    D1  17:04:49,812,320  17:08:03,795,268   
3    DJI_0346       P1    D1  17:08:03,861,994  17:11:17,711,485   
4    DJI_0347       P1    D1  17:11:17,778,214  17:14:31,760,957   
..        ...      ...   ...               ...               ...   
97   DJI_0978       P3    D6  18:06:19,419,955  18:08:07,420,747   
98   DJI_0979       P3    D6  18:25:09,306,130  18:28:22,920,058   
99   DJI_0980       P3    D6  18:28:22,986,789  18:31:36,934,355   
100  DJI_0981       P3    D6  18:31:37,001,084  18:34:50,848,550   
101  DJI_0982       P3    D6  18:34:50,915,280  18:37:09,511,214   

     flight_altitude  drift_status  Missing_data  
0                  0             0             1  
1                  0             0             0  
2                  0          