In [None]:
import os
from pathlib import Path
from read_data import NmeaFile
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from pynmea2.nmea import ParseError

In [None]:
clean_datadir = Path('./example_data')
datadir = Path('/media/alex/alex_backup/Van_6-10-18')
processed = []
dataframes = []
for file in datadir.iterdir():

    if len(dataframes) > 5000:
        break

    if file.suffix != '.NMEA':
        continue

    if not Path(datadir / (file.stem + '.MP4')).exists():
        print(datadir / (file.stem + '.MP4'), 'missing.')
        continue

    try:
        dataframes.append(NmeaFile.DataFrame(file))
    except ParseError:
        print(file)

data = pd.concat(dataframes)
data.head()

In [None]:
data.describe()

In [None]:
data.plot.scatter('longitude','latitude', c='speed')
plt.show()

In [None]:
data.sort_values('time', inplace=True)

In [None]:
stopped_data = data[data['speed'] == 0]

# data to create new dataframe from
section_cols = 'time latitude longitude start_vid end_vid duration video_files directory'.split()
section_vals = {v:[] for v in section_cols}

# initialize stop seperation variables
prev_row = stopped_data.iloc[0]
start_row = stopped_data.iloc[0]

# ordered list of videofiles during which the stop occurs. Important if a stop traverses
# one or more breaks between files.
curr_videofiles = [stopped_data.iloc[0]['video_file']]  
curr_duration = pd.Timedelta(0)

# seperate stops
for idx, row in stopped_data.iterrows():
    tdiff = row['time'] - prev_row['time']
    if tdiff.total_seconds() > 2:  # detect a break in continuity,
        # could detect by change in latitude/longitude as well.
        
     # add the calculated data
        section_vals['time'].append(start_row['time'])
        section_vals['latitude'].append(start_row['latitude'])
        section_vals['longitude'].append(start_row['longitude'])
        section_vals['start_vid'].append(start_row['video_time'])
        section_vals['end_vid'].append(prev_row['video_time'])
        section_vals['video_files'].append(tuple(curr_videofiles))
        section_vals['directory'].append(row['directory'])
        section_vals['duration'].append(curr_duration)
        
        # start next data row
        curr_duration = pd.Timedelta(0)
        start_row = row
        prev_row = row
        curr_videofiles = [row['video_file']]
        continue
        
    prev_row = row
    curr_duration += tdiff
    if row['video_file'] not in curr_videofiles:
        curr_videofiles.append(row['video_file'])
stopped_sections = pd.DataFrame(section_vals)
stopped_sections.head()

In [None]:
stopped_sections.describe()

In [None]:
stopped_sections['duration_secs'] = [s.total_seconds() for s in stopped_sections['duration']]
min_time = 5
print('there are',
      sum(stopped_sections[stopped_sections['duration_secs'] > min_time]['duration_secs']) * 3 / 60 / 60,
      f'hours of stopped footage longer then {min_time} seconds.')

In [None]:
stopped_long_sections = stopped_sections[stopped_sections['duration_secs'] > min_time].sort_values('duration')
stopped_long_sections.describe()

In [None]:
stopped_long_sections.hist()
plt.show()

In [None]:
stopped_sections.to_csv(clean_datadir / 'stopped_sections.csv')