### Read a file listing and assess image inventory
* Parse the listing using Pandas
* Extract list of filenames
* Use Unix epoch portion of filename to determine day of year, solar hour, and tide height.
* Make some graphs

In [1]:
from pathlib import Path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import glob, os

# for tides
from scipy.interpolate import interp1d
import h5py

# these are our in-house routines
from coastcam_funcs import *

In [2]:
# Read the data in. Skip the header info, use variable-length whitespace as a delimiter.
# This does not work exactly right because there are some extra lines at the bottom.
df = pd.read_csv('./data/doodledlist.txt',skiprows=5, header=None, delim_whitespace=True)
df

Unnamed: 0,0,1,2,3,4
0,08/24/2022,07:20,AM,874109,1576261801.c2.timex.x4_Enter-user-ID.npz
1,08/24/2022,12:46,PM,741131,1576593001.c1.timex.x4_Enter-user-ID.npz
2,08/24/2022,07:36,AM,944657,1576684801.c1.timex.x4_Enter-user-ID.npz
3,08/24/2022,07:28,AM,818293,1576693801.c2.timex.x4_Enter-user-ID.npz
4,08/24/2022,12:26,PM,691791,1576699201.c1.timex.x4_Enter-user-ID.npz
...,...,...,...,...,...
733,08/30/2022,04:25,PM,738234,1622239200.c2.timex.x4_Enter-user-ID.npz
734,08/30/2022,04:42,PM,571099,1622395800.c2.timex.x4_Enter-user-ID.npz
735,08/30/2022,04:56,PM,786711,1622593830.c2.timex.x4_Enter-user-ID.npz
736,736,File(s),1162466067,bytes,


In [3]:
# This should work, but it doesn't, so easiest thing is to just use a text editor to clean up the file.
df.drop([736, 737])
df

Unnamed: 0,0,1,2,3,4
0,08/24/2022,07:20,AM,874109,1576261801.c2.timex.x4_Enter-user-ID.npz
1,08/24/2022,12:46,PM,741131,1576593001.c1.timex.x4_Enter-user-ID.npz
2,08/24/2022,07:36,AM,944657,1576684801.c1.timex.x4_Enter-user-ID.npz
3,08/24/2022,07:28,AM,818293,1576693801.c2.timex.x4_Enter-user-ID.npz
4,08/24/2022,12:26,PM,691791,1576699201.c1.timex.x4_Enter-user-ID.npz
...,...,...,...,...,...
733,08/30/2022,04:25,PM,738234,1622239200.c2.timex.x4_Enter-user-ID.npz
734,08/30/2022,04:42,PM,571099,1622395800.c2.timex.x4_Enter-user-ID.npz
735,08/30/2022,04:56,PM,786711,1622593830.c2.timex.x4_Enter-user-ID.npz
736,736,File(s),1162466067,bytes,


In [4]:
# I removed the last couple of lines and saved with a new name.
# Also, this time I am assigning column names as I read in.
df = pd.read_csv('./data/doodledlist_CRS.txt',skiprows=5, header=None, delim_whitespace=True, names = ['date','time','ampm','size','filename'])
df

Unnamed: 0,date,time,ampm,size,filename
0,08/24/2022,07:20,AM,874109,1576261801.c2.timex.x4_Enter-user-ID.npz
1,08/24/2022,12:46,PM,741131,1576593001.c1.timex.x4_Enter-user-ID.npz
2,08/24/2022,07:36,AM,944657,1576684801.c1.timex.x4_Enter-user-ID.npz
3,08/24/2022,07:28,AM,818293,1576693801.c2.timex.x4_Enter-user-ID.npz
4,08/24/2022,12:26,PM,691791,1576699201.c1.timex.x4_Enter-user-ID.npz
...,...,...,...,...,...
731,08/23/2022,01:58,PM,669738,1622133000.c1.timex.x4_Enter-user-ID.npz
732,08/23/2022,01:51,PM,681825,1622140200.c1.timex.x4_Enter-user-ID.npz
733,08/30/2022,04:25,PM,738234,1622239200.c2.timex.x4_Enter-user-ID.npz
734,08/30/2022,04:42,PM,571099,1622395800.c2.timex.x4_Enter-user-ID.npz


In [5]:
# Now I am going to clean up a little by removing the columns I don't need.
# (I could also drop them by name)
df = df.drop(df.columns[[0, 1, 2, 3]], axis=1) 
df

Unnamed: 0,filename
0,1576261801.c2.timex.x4_Enter-user-ID.npz
1,1576593001.c1.timex.x4_Enter-user-ID.npz
2,1576684801.c1.timex.x4_Enter-user-ID.npz
3,1576693801.c2.timex.x4_Enter-user-ID.npz
4,1576699201.c1.timex.x4_Enter-user-ID.npz
...,...
731,1622133000.c1.timex.x4_Enter-user-ID.npz
732,1622140200.c1.timex.x4_Enter-user-ID.npz
733,1622239200.c2.timex.x4_Enter-user-ID.npz
734,1622395800.c2.timex.x4_Enter-user-ID.npz


In [6]:
# Test the logic using the first row.
# grab the filename in the first row as string
fn = df.iloc[0]['filename']
print(fn)
epoch = fn[0:10] # not weird requirement to go to 10
print(epoch)
cam = fn[11:13]
print(cam)

1576261801.c2.timex.x4_Enter-user-ID.npz
1576261801
c2


In [7]:
# enter site-specific info needed for tide and time routines
station = 'CACO02'
if station == 'CACO01':
    print('Using info for CACO01 - Head of the Meadow')
    # tide_file = 'C:/crs/src/CoastCam/data/HoM_ADCIRC_tide_predictions.mat'
    tide_file = 'D:/crs/src/CoastCam/data/HoM_ADCIRC_tide_predictions.mat'
    latitude = 42.0504803
    longitude = -76.0773798

elif station == 'CACO02':
    print('Using info for CACO02 - Marconi Beach')
    latitude = 42.0504796
    longitude = -76.0773778
    #tide_file = 'C:/crs/src/CoastCam/data/Marconi_ADCIRC_tide_predictions.mat'
    tide_file = 'D:/crs/src/CoastCam/data/Marconi_ADCIRC_tide_predictions.mat'

# load tide file
data = h5py.File(tide_file,'r')
T = np.squeeze(np.array(data['T']))
tid = np.squeeze(np.array(data['tid']))

Using info for CACO02 - Marconi Beach


In [8]:
# loop through and calc info for database
nfiles = df.shape[0]
# dfs = df.sample(nfiles)
print(nrows, 'in database')
az = np.nan*np.ones(nfiles)
zen = np.nan*np.ones(nfiles)
dto = np.nan*np.ones(nfiles)
hsn = np.nan*np.ones(nfiles)
dn = np.nan*np.ones(nfiles)
tidi = np.nan*np.ones(nfiles)
doy = np.nan*np.ones(nfiles)
dts_list = []
dto_list = []

for i in range(0,nfiles):
    fn = df.iloc[i]['filename']
    dts, dto = filetime2timestr( fn )
    dts_list.append( dts )
    dto_list.append( dto )
    hsn[i] = solar_noon_offset( dts, longitude)

    az[i], zen[i] = sun_az_zen( dts, latitude, longitude )
    dn[i] = datetime2matlabdn( dto )
    doy[i] = dto.timetuple().tm_yday
    tidi[i] = np.interp(dn[i], T, tid)

NameError: name 'nrows' is not defined

In [None]:
plt.hist(tidi)