### Random selection of files

In [1]:
from pathlib import Path
import imageio
import fsspec
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil import tz
import pandas as pd

from coastcam_funcs import *
from calibration_crs import *
from rectifier_crs import *

  from .collection import imread_collection_wrapper


#### Read the inventory of products  
Generated with `aws s3 ls s3://cmgp-coastcam/cameras/caco-01/products/ --profile coastcam > inventory.txt`  
Then edited to remove a few bad entries.
These have beeen deleted from the S3 bucket.

In [2]:
df = pd.read_csv('inventory.txt', header=None, delim_whitespace=True, parse_dates={'datetime': [0, 1]})
df.rename(columns={2:'fid',3:'filename'}, inplace=True)
del df['fid']
df

Unnamed: 0,datetime,filename
0,2019-12-13 13:45:15,1576260000.c1.snap.jpg
1,2019-12-13 13:45:15,1576260000.c2.snap.jpg
2,2019-12-13 13:45:16,1576260001.c1.timex.jpg
3,2019-12-13 13:45:17,1576260001.c2.timex.jpg
4,2019-12-13 13:45:18,1576260002.c1.var.jpg
...,...,...
126214,2021-02-12 14:54:42,1613151003.c2.bright.jpg
126215,2021-02-12 14:54:42,1613151004.c1.dark.jpg
126216,2021-02-12 14:54:42,1613151004.c2.dark.jpg
126217,2021-02-12 14:54:43,1613151005.c1.rundark.jpg


### Get list of `c1.timex` files and find matching `c2.timex` files. Make list of `c1.timex` images with matches. Randomly sample *n*.

In [3]:
# find instances of timex.c1
df2 = df[df['filename'].str.contains('c1.timex')]
df2.reset_index(drop=True, inplace=True)
# random sample n
dfs = df2.sample(n=200)
dfs.reset_index(drop=True, inplace=True)
dfs

Unnamed: 0,datetime,filename
0,2020-08-06 12:01:06,1596718801.c1.timex.jpg
1,2020-11-25 16:02:54,1606334401.c1.timex.jpg
2,2020-04-01 18:48:08,1585778401.c1.timex.jpg
3,2020-11-01 14:59:54,1604257201.c1.timex.jpg
4,2020-10-23 18:03:42,1603486801.c1.timex.jpg
...,...,...
195,2020-01-19 06:47:05,1579431601.c1.timex.jpg
196,2020-07-29 13:35:12,1596036601.c1.timex.jpg
197,2020-06-06 17:49:37,1591477201.c1.timex.jpg
198,2020-06-06 18:49:36,1591482601.c1.timex.jpg


In [4]:
# filelist will contain all of the c1.timex images that have matching c2 images
filelist = [ ]
matches = 0
no_matches = 0
for i, row in dfs.iterrows():
    fs1 = row['filename']
    # print(fs1)
    fn1 = list(fs1)
    fn2 = fn1.copy()
    fn2[12]='2'
    fs2="".join(fn2)
    # print(fs2)
    if len(df[df['filename'].isin([fs2])]) == 1:
        filelist.append(fs1)
        matches += 1
    else:
        no_matches += 1

print(matches, no_matches, matches+no_matches)

173 27 200


In [5]:
# setup S3 buckets
imdir='cmgp-coastcam/cameras/caco-01/products/'
fs = fsspec.filesystem('s3',profile='coastcam')

# fs2 = fsspec.filesystem('s3', profile='default')


In [6]:
# function to create a c2 image name from a c1 image name
def c1toc2(c1name):
    fn1 = list(c1name)
    fn2 = fn1.copy()
    fn2[12]='2'
    fs2="".join(fn2)
    return fs2

# test it
print(filelist[22])
print(c1toc2(filelist[22]))

1602496801.c1.timex.jpg
1602496801.c2.timex.jpg


In [7]:
# loop through the filelist and calculate image time, brightness, sharpness, and contrast.
# If the brightness > 40, copy both obliques to local dir
localdir = r"C:\\crs\\proj\\2019_CACO_CoastCam\\random_obliques\\"

stats_to_csv = True
if stats_to_csv:
    # set up file for output
    with open('random_stats.csv','w') as csvfile:
        csvfile.write('filname, time (UTC),avg0,s0,c0,avg1,s1,c1'+'\n')

icount = 0
for i, fn0 in enumerate(filelist):

    # second path is same but for camera 2
    fn1 = c1toc2(fn0)

    impaths = [imdir+fn0, imdir+fn1]

    s0, c0 = estimate_sharpness(impaths[0],fs)
    rgb0, avg0 = average_color(impaths[0],fs)
    s1, c1 = estimate_sharpness(impaths[1],fs)
    rgb1, avg1 = average_color(impaths[1],fs)
    
    ftime, e = filetime2timestr(fn0, timezone='eastern')

    if avg0 > 40.:

        fs.download(impaths[0],localdir+fn0)
        fs.download(impaths[1],localdir+fn1)

        # copy to local dir
        icount += 1
       
        if stats_to_csv:
            ostring = '{}.jpg,{},{:.0f},{:3.1f},{:5.1f},{:.0f},{:3.1f},{:5.1f}'.format(e,ftime,avg0,s0,c0,avg1,s1,c1)
            #print(ostring)
            with open('random_stats.csv', 'a') as csvfile:
                csvfile.write(ostring+'\n')
            
        if not(icount % 20):
            print(icount)
        
#         if icount > 11:
#             break
            
print(icount,' images processed.')                    

20
40
60
80
100
120
140
151  images processed.
