Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added a load routine to load in bulk from a CSV file (database export)

  • Loading branch information...
commit 9200eacd62345c5d1874648af98f0deacb573831 1 parent 6748e79
K. Arthur Endsley authored
165 campaigns/loaders.py
View
@@ -1,117 +1,104 @@
-# -*- coding: utf-8 -*-
import os, sys
-import datetime, numpy, re, shutil, random
+import csv, datetime, re, shutil
+import logging
+logger = logging.getLogger('loading.buoys')
sys.path.append('/usr/local/project/GreatLakesObs/')
+
try:
- os.environ["DJANGO_SETTINGS_MODULE"] = "settings_production"
- import settings_production as settings # Assumed to be in the same directory.
+ os.environ["DJANGO_SETTINGS_MODULE"] = "settings"
+ import settings as settings # Assumed to be in the same directory.
+
except ImportError:
sys.stderr.write("Couldn't find the django settings file\n")
sys.exit(1)
try:
from django.db.utils import IntegrityError
+
except ImportError:
from django.db import IntegrityError
+
except:
- raise
+ sys.stderr.write("Couldn't import the IntegrityError class\n")
+ sys.exit(1)
+
+from xml.dom import minidom
+from xml.parsers.expat import ExpatError
from django.core.management import setup_environ
+from django.core.management.base import BaseCommand, CommandError
+from django.core.exceptions import ValidationError, ObjectDoesNotExist
from django.contrib.gis.geos import *
-from settings import SUPPORTED_BUOYS
+from assets.models import *
from campaigns.models import *
-from xml.dom import minidom
+from utils import UTC, EDT, EST
-#output_folder = '/netfs/cluster8/data/gis_lab/project/GLOS/data/buoy/'
-#failed_folder = '/netfs/cluster8/data/gis_lab/project/GLOS/data/buoy/failed_import'
-#dir_path = '/netfs/cluster8/data/public/ftp/incoming'
-def load_buoys(path, id_list=None):
+def load_from_csv(path, buoy_id=None, model=None, tzone=EDT()):
'''
- Loads all available buoy data in a specified location for, optionally, only
- the buoys specified in a list of buoy IDs.
+ Loads buoy data in bulk from a CSV file.
+ <path> {String} The path to the CSV file
+ <buoy_id> {String} The NDBC ID of the buoy; if not provided, <model> must be
+ <model> {models.Model} A Django model; may be provided instead of <buoy_id>
+ <tzone> {datetime.tzinfo} A time zone representation (defaults to EDT)
+
+ Anticipates output from a query similar to the following:
+ SELECT station AS sid,
+ date_time::timestamp without time zone AS timestamp,
+ ...
'''
- finder = re.compile('\d{5}_\d*\.xml')
-
- def load_from_xml(xml_name, model):
- '''
- Loads buoy data from an XML file in the NDBC schema.
- '''
- expected_fields = []
- for field in model._meta.fields:
- expected_fields.append(field.name)
-
- try:
- xmldoc = minidom.parse(xml_name)
- objects = []
- attrs = {}
- attrs['date_time'] = datetime.datetime.strptime(str(xmldoc.childNodes[0].childNodes[3].childNodes[0].data),"%m/%d/%Y %H:%M:%S")
- attrs['station'] = str(xmldoc.childNodes[0].childNodes[1].childNodes[0].data)
-
- for node in xmldoc.childNodes[0].childNodes[5].childNodes:
+ models = {
+ '45023': Buoy45023,
+ '45024': Buoy45024,
+ '45025': Buoy45025,
+ 'UMBIO': BuoyUMBIO
+ }
+
+ if buoy_id is None and model is None:
+ raise TypeError("load_from_csv() requires that at least one of the arguments, <buoy_id> or <model>, be provided")
+
+ if buoy_id is not None and model is None:
+ model = models[buoy_id]
+
+ else: # We can look up the buoy ID for you if a model was specified
+ for pair in models.iteritems():
+ if pair[1] == model:
+ buoy_id = pair[0]
+ break
+
+ reader = csv.reader(open(path, 'rb'), delimiter=',', quotechar='"')
+ for line in reader:
+ l = reader.line_num # Lines start numbering at 1, not 0
+ if line == []:
+ line = reader.next() # Check for empty lines
+
+ if l == 1:
+ header = line # Get field names
+ line = reader.next() # Move on to the first line of data
+
+ data_dict = {}
+ for field in header:
+ value = line[header.index(field)] # The value for that field
+ # Catch empty values that should be null
+ if len(value) == 0 or value == '_':
+ if model._meta.get_field(field.lower()).null:
+ data_dict[field.lower()] = None
- if node.__class__.__name__ == 'Element':
-
- # Don't assume every tag in the feed is a model field
- if str(node.tagName) in expected_fields:
-
- if node.childNodes[0].data not in ['NAN', '']:
- attrs[str(node.tagName)] = eval(node.childNodes[0].data)
-
- else:
- attrs[str(node.tagName)] = None
-
- except:
- sys.stderr.write("Unhandled error while parsing %s\n" % xml_name)
-
- for errorline in sys.exc_info():
- sys.stderr.write(" " + str(errorline) + "\n")
- return -1
+ else:
+ data_dict[field.lower()] = value
- inst = model(**attrs)
+ data_dict['asset'] = Asset.objects.get(uid__exact=buoy_id)
+ data_obj = model(**data_dict) # Create a model instance
+ data_obj.clean(tzinfo=tzone) # Perform initial validation
try:
- inst.save()
- except IntegrityError:
- sys.stderr.write("IntegrityError encountered while saving record.\n")
- return -1
- except:
- sys.stderr.write("Unhandled error while running load_BuoyRecord()\n")
- for errorline in sys.exc_info():
- sys.stderr.write(" " + str(errorline) + "\n")
- return -1
-
-def process_incoming(dir_path, output_folder, failed_folder):
- '''
- Adds the contents of a directory to the database
- Leaves behind a processed.txt file that lists the files
- that have already been processed. Will not process a file
- that has already been processed according to the text file.
- '''
+ model.objects.get(timestamp__exact=data_obj.timestamp) # Check to see if the record already exists
- sys.stdout.write('Starting data import at ' + str(datetime.datetime.now()) + '\n')
- files = os.listdir(dir_path)
-
- for item in files:
- if northbuoy_finder.match(item) != None:
- sys.stdout.write( "Processing: " + os.path.join(dir_path, item)+ '\n')
- if load_buoy(os.path.join(dir_path, item), NorthBuoyRecord) == -1:
- shutil.move(os.path.join(dir_path, item),os.path.join(failed_folder, item))
- else:
- shutil.move(os.path.join(dir_path, item),os.path.join(output_folder, item))
+ except ObjectDoesNotExist:
+ data_obj.save() # Save the record to the database only if it doesn't already exist
+ logger.debug("Saved record of %s with timestamp %s [Saved]" % (model._meta.object_name, data_dict['timestamp']))
- if southbuoy_finder.match(item) != None:
- sys.stdout.write( "Processing: " + os.path.join(dir_path, item)+ '\n')
- if load_buoy(os.path.join(dir_path, item), SouthBuoyRecord) == -1:
- shutil.move(os.path.join(dir_path, item),os.path.join(failed_folder, item))
- else:
- shutil.move(os.path.join(dir_path, item),os.path.join(output_folder, item))
-if __name__=="__main__":
+if __name__ == "__main__":
setup_environ(settings)
-
- process_incoming(sys.argv[1], sys.argv[2], sys.argv[3])
-
- print 'Done!'
- print datetime.datetime.now()
-
+ pass
10 campaigns/management/commands/loadbuoys.py
View
@@ -75,13 +75,13 @@ def handle(self, *args, **options):
document = minidom.parse(os.path.join(path, item))
except ExpatError:
- logger.error("Could not parse '%s' [Skipping]" % item)
+ logger.error("Could not parse '%s' [Skipped]" % item)
continue
# Check the station ID
station_id = str(document.getElementsByTagName('station')[0].childNodes[0].data)
if station_id != buoy_id:
- logger.error("Station ID mismatch in '%s' [Skipping]" % item)
+ logger.error("Station ID mismatch in '%s' [Skipped]" % item)
continue
attrs = {}
@@ -102,18 +102,18 @@ def handle(self, *args, **options):
attrs[str(node.tagName)] = None
else:
- logger.warning("Unexpected tag '%s' found in '%s' [Skipping]" % (node.tagName, item))
+ logger.warning("Unexpected tag '%s' found in '%s' [Skipped]" % (node.tagName, item))
try:
record = model(**attrs) # Create an instance of the model
except:
- logger.error("Error creating %s instance for '%s' [Skipping]" % (model._meta.object_name, item))
+ logger.error("Error creating %s instance for '%s' [Skipped]" % (model._meta.object_name, item))
continue
try:
record.save() # Save the object to the database
- logger.debug("Saving an instance of %s after '%s' [Saving]" % (item, model._meta.object_name))
+ logger.debug("Saving an instance of %s after '%s' [Saved]" % (item, model._meta.object_name))
except:
logger.error("Error saving %s instance after '%s'" % (item, model._meta.object_name))
76 campaigns/models.py
View
@@ -1,4 +1,6 @@
+import datetime
from django.contrib.gis.db import models
+from campaigns.utils import UTC, EDT, EST
class Observation(models.Model):
'''
@@ -6,12 +8,28 @@ class Observation(models.Model):
'''
objects = models.GeoManager()
asset = models.ForeignKey('assets.Asset')
- timestamp = models.DateTimeField()
+ timestamp = models.DateTimeField(unique=True)
+
+ def clean(self, *args, **kwargs):
+ '''
+ Accepts a tzinfo keyword argument where tzinfo is an instance of
+ datetime.tzinfo that can be passed to the replace() method.
+ '''
+ if isinstance(self.timestamp, str):
+ try:
+ self.timestamp = datetime.datetime.strptime(self.timestamp, '%Y-%m-%d %H:%M:%S%z').replace(tzinfo=kwargs['tzinfo'])
+ except:
+ self.timestamp = datetime.datetime.strptime(self.timestamp, '%Y-%m-%d %H:%M:%S').replace(tzinfo=kwargs['tzinfo'])
+
def get_field_list(self):
+ '''
+ Returns a flat list of the field names.
+ '''
expected_fields = []
for field in self._meta.fields:
expected_fields.append(field.name)
+
return expected_fields
@@ -74,12 +92,7 @@ class Meta:
abstract = True
-class Buoy45025(BuoyObservation):
- '''
- '''
-
-
-class Buoy45023(BuoyObservation):
+class ADCPBuoyObservation(BuoyObservation):
'''
'''
# ADCP Parameters ##########################################################
@@ -108,6 +121,24 @@ class Buoy45023(BuoyObservation):
uv008 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
vv008 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
zcellheading = models.FloatField(help_text="Nortek ADCP (Z-Cell) magnetic compass reading in degrees", blank=True, null=True)
+
+ class Meta:
+ abstract = True
+
+
+class Buoy45025(BuoyObservation):
+ '''
+ '''
+
+
+class Buoy45024(ADCPBuoyObservation):
+ '''
+ '''
+
+
+class Buoy45023(ADCPBuoyObservation):
+ '''
+ '''
# YSI Sonde Parameters #####################################################
ysitemp = models.FloatField(help_text="YSI sonde temperature in degrees Celsius (C)", blank=True, null=True)
ysidepth = models.FloatField(help_text="YSI sonde depth in meters (m)", blank=True, null=True)
@@ -121,37 +152,6 @@ class Buoy45023(BuoyObservation):
ysiodovol = models.FloatField(help_text="YSI sonde optical dissolved oxygen volume in milligrams per liter (mg/L)", blank=True, null=True)
-class Buoy45024(BuoyObservation):
- '''
- '''
- # ADCP Parameters ##########################################################
- dv001 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv001 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv001 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv002 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv002 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv002 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv003 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv003 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv003 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv004 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv004 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv004 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv005 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv005 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv005 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv006 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv006 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv006 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv007 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv007 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv007 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- dv008 = models.FloatField(help_text="Depth of a current meter (in meters)", blank=True, null=True)
- uv008 = models.FloatField(help_text="Eastward water velocity at a curent meter (cm/s)", blank=True, null=True)
- vv008 = models.FloatField(help_text="Northward water velocity at a curent meter (cm/s)", blank=True, null=True)
- zcellheading = models.FloatField(help_text="Nortek ADCP (Z-Cell) magnetic compass reading in degrees", blank=True, null=True)
-
-
class BuoyUMBIO(BuoyObservation):
'''
'''
49 campaigns/utils.py
View
@@ -0,0 +1,49 @@
+from datetime import tzinfo, timedelta, datetime
+
+ZERO = timedelta(0)
+HOUR = timedelta(hours=1)
+
+class UTC(tzinfo):
+ '''UTC'''
+
+ def utcoffset(self, dt):
+ return ZERO
+
+
+ def tzname(self, dt):
+ return 'UTC'
+
+
+ def dst(self, dt):
+ return ZERO
+
+
+class EDT(tzinfo):
+ '''Eastern Daylight Time (EDT) or GMT-4'''
+
+ def utcoffset(self, dt):
+ return -4*HOUR
+
+
+ def tzname(self, dt):
+ return 'EDT'
+
+
+ def dst(self, dt):
+ return -4*HOUR
+
+
+class EST(tzinfo):
+ '''Eastern Standard Time (EST) or GMT-5'''
+
+ def utcoffset(self, dt):
+ return -5*HOUR
+
+
+ def tzname(self, dt):
+ return 'EDT'
+
+
+ def dst(self, dt):
+ return -5*HOUR
+
Please sign in to comment.
Something went wrong with that request. Please try again.