In [142]:
import os
import csv
import pandas as pd
import datetime
import dateparser

In [1]:
data_dir = '../../data/'

In [199]:
class CSVIterator:
  def __init__(self, filename, data_start_mark, data_end_mark, stellwagen = False):
    self.stellwagen = stellwagen
    self.twoDArray = self.csvToArr(filename)
    self.data_start_mark = data_start_mark
    self.data_end_mark = data_end_mark
    self.curr_ind = 0

  def csvToArr(self, filename):
    twoDArray = []
    with open(filename, newline='') as csvfile:
      data = csv.reader(csvfile, delimiter=',', quotechar='|')
      for row in data:
        twoDArray.append(row)
    return [arr if arr else [''] for arr in twoDArray]
  
  def hasNextData(self):
    return self.curr_ind < len(self.twoDArray) and self.data_start_mark in [arr[0] for arr in self.twoDArray[self.curr_ind:]]
    
  def getNextData(self):
    if not self.hasNextData():
      raise Exception('No data left')
    
    # Move to the column row
    while self.twoDArray[self.curr_ind][0] != self.data_start_mark:
      self.curr_ind += 1

    if self.stellwagen:
      self.curr_ind += 1
      
    columns = self.twoDArray[self.curr_ind]

    # Move to the start of the data
    self.curr_ind += 1
    
    data_start_ind = self.curr_ind

    while self.curr_ind < len(self.twoDArray) and self.twoDArray[self.curr_ind][0] != self.data_end_mark:
      self.curr_ind += 1

    data = self.twoDArray[data_start_ind:self.curr_ind]

    try:
      return pd.DataFrame(data, columns = columns)
    except:
      return columns, data

In [132]:
def get_all_data(csv_it):
  chunks = []
  while csv_it.hasNextData():
    chunks.append(csv_it.getNextData())
  all_data = pd.concat(chunks, ignore_index=True)
  return all_data

#### Read Sensor Data

In [135]:
start = 'DATE'
end = 'Eureka_Manta_2'
csvs = []

for file in os.listdir(data_dir + 'Stellwagen_Bank_NMS/'):
    if file.endswith(".csv"):
      if file.split('.')[0].split('_')[-1] == 'SBNMS':
        filename = data_dir + 'Stellwagen_Bank_NMS/' + file
        it = CSVIterator(filename, start, end)
        all_data = get_all_data(it)
        csvs.append(all_data)
sensor_data = pd.concat(csvs, ignore_index=True)

sensor_data['Timestamp'] = [dateparser.parse(row['DATE'] + " " +  row['TIME']) for _, row in sensor_data.iterrows()]

#### Read telemetry data

In [152]:
start = 'wpt'
end = ''
for file in os.listdir(data_dir + 'Stellwagen_Bank_NMS/'):
    if file.endswith(".csv"):
      if len(file.split('_')) == 3:
        filename = data_dir + 'Stellwagen_Bank_NMS/' + file
        it = CSVIterator(filename, start, end, stellwagen = True)
        try:
          all_data = get_all_data(it)
        except:
          print(filename)

../../data/Stellwagen_Bank_NMS/2019_02_23.csv
../../data/Stellwagen_Bank_NMS/2019_04_30.csv
../../data/Stellwagen_Bank_NMS/2019_04_25.csv
../../data/Stellwagen_Bank_NMS/2019_03_18.csv
../../data/Stellwagen_Bank_NMS/2019_03_19.csv
../../data/Stellwagen_Bank_NMS/2019_04_12.csv
../../data/Stellwagen_Bank_NMS/2019_04_17.csv
../../data/Stellwagen_Bank_NMS/2019_04_29.csv


In [145]:
def csvToArr(filename):
    twoDArray = []
    with open(filename, newline='') as csvfile:
      data = csv.reader(csvfile, delimiter=',', quotechar='|')
      for row in data:
        twoDArray.append(row)
    return twoDArray

In [197]:
csvToArr(data_dir + 'Stellwagen_Bank_NMS/2019_04_30.csv')

[['\ufeff'],
 ['metadata'],
 ['ID',
  'name',
  'desc',
  'time',
  'keywords',
  'minlat',
  'minlon',
  'maxlat',
  'maxlon',
  ''],
 ['1',
  '',
  '',
  '2019-05-03T12:22:48Z',
  '',
  '41.543357027694583',
  '-70.720271961763501',
  '42.309009684249759',
  '-69.908007960766554',
  ''],
 [],
 ['author'],
 ['ID', 'name', 'email', ''],
 [],
 ['copyright'],
 ['year', 'license', 'author', ''],
 [],
 ['link'],
 ['metadataID',
  'authorID',
  'wptID',
  'rteptID',
  'trkptID',
  'rteID',
  'trkID',
  'text',
  'type',
  'href',
  ''],
 ['1',
  '',
  '',
  '',
  '',
  '',
  '',
  '"Garmin International"',
  '',
  'http://www.garmin.com',
  ''],
 [],
 ['rte'],
 ['ID',
  'name',
  'cmt',
  'desc',
  'src',
  'number',
  'type',
  'IsAutoNamed',
  'DisplayColor',
  'TransportationMode',
  'TripName',
  'Date',
  'DayNumber',
  ''],
 [],
 ['rtept'],
 ['ID',
  'rteID',
  'lat',
  'lon',
  'ele',
  'time',
  'magvar',
  'geoidheight',
  'name',
  'cmt',
  'desc',
  'src',
  'sym',
  'type',
  'f

In [185]:
x =  []
bool(x)

False

In [204]:
start = 'wpt'
end = ''
it = CSVIterator(data_dir + 'Stellwagen_Bank_NMS/2019_04_30.csv', start, end, stellwagen = True)

28
