This code processes chlorophyll data from NJDEP. A file containing all data is 100MB.

The final processed code is analyzed in file 2.2 Chl Analysis and Model.

http://njdep.rutgers.edu/aircraft/

In [1]:
import numpy as np
import pandas as pd
import datetime
import urllib.request
import netCDF4
from netCDF4 import Dataset
import xarray as xr
import matplotlib.pyplot as plt

In [3]:
# Parse turns the data into Datetime format.
# Index sets the date as the index - this step is important in later data condensing.
njdep = pd.read_csv('./NJDEPaircraftData20070101_To_20191001.txt',parse_dates=['date'], index_col='date')
njdep = njdep.drop('id', axis=1)
njdep = njdep.drop('time', axis=1)
njdep.head(10)

Unnamed: 0_level_0,lat,lng,chlora
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-05-21,40.463245,-74.235184,2.117404
2007-05-21,40.462158,-74.234329,1.384279
2007-05-21,40.461193,-74.233322,1.429075
2007-05-21,40.460308,-74.232178,1.494683
2007-05-21,40.459473,-74.230965,1.51522
2007-05-21,40.458725,-74.229668,1.678402
2007-05-21,40.458012,-74.228325,1.391031
2007-05-21,40.457371,-74.226936,1.541263
2007-05-21,40.456776,-74.225525,1.407845
2007-05-21,40.456192,-74.224091,1.416808


In [None]:
# NJ is around 40 N/S and -74 E/W
# must trim out some bad data
njdep = njdep[njdep.lat>38.5]

In [None]:
# This code plots a map of all points along New Jersey, but it takes a long time to process
# figure displays 3.5 deg n/s and 2 deg e/w
njdep.plot.scatter('lng','lat',figsize=(7, 14), s=0.1)

In [None]:
# delaware bay is lon <-74.8 roughly
delbay = njdep[njdep.lng<-74.8]
delbay = delbay[delbay.lat>38.5]
delbay = delbay[delbay.lat<39.5]
# figure displays 1.0 deg n/s and 0.8 deg e/w
delbay.plot.scatter('lng','lat',figsize=(8, 10),s=0.1) #s controls data point dot size

In [None]:
# This code redefines our dataframe to be only this subset of the delbay
delbay = delbay[delbay.lng<-75.0]
delbay = delbay[delbay.lng>-75.1]
delbay = delbay[delbay.lat<39.15]
delbay = delbay[delbay.lat>39.05]
# figure displays 0.1 deg n/s and 0.1 deg e/w
delbay.plot.scatter('lng','lat',figsize=(10, 10),s=0.1)

In [None]:
# This creates a new dataframe where the all the data points from a single day are averaged into a single point. 
# This may be a little heavy-handed
delbay_avg = delbay.resample('D').mean()
delbay_avg

In [None]:
# Drop all the NaN values
delbay_avg = delbay_avg.dropna(axis=0,how="any")
delbay_avg

In [None]:
# If you look at the headers of the previous tables, you will see that the column names aren't aligned correctly.
# Alignment is necessary to call the column data. Resetting the index corrects the formatting, but changes the index.
delbay_avg = delbay_avg.reset_index()
delbay_avg

In [None]:
plot1 = delbay_avg.plot('date','chlora',figsize=(10, 10)) #style='.'

In [None]:
plot2 = delbay_avg.plot('date','chlora',figsize=(10, 10), style='.') #

In [None]:
plot1 = delbay_avg.plot('date','chlora',figsize=(10, 10)) #style='.'
plot1.set_xlim(pd.Timestamp('2016-01-01'), pd.Timestamp('2018-01-01'))

In [None]:
plot1 = delbay_avg.plot('date','chlora',figsize=(10, 10),style='.') #style='.'
plot1.set_xlim(pd.Timestamp('2017-05-01'), pd.Timestamp('2017-10-01'))

In [None]:
delbay_avg.size

In [None]:
delbay_avg['chlora'].plot.hist(bins=15, figsize=(20, 20))