In [None]:
import pandas as pd
import numpy as np
import requests
import os
import time
from ratelimit import limits, sleep_and_retry

# The main.py file is the main code body for the PyBuoy application.
#note: captain_seemore is available as a repo name on GitHub

# Helper functions
@sleep_and_retry
def get_drifting_buoy_data(buoy_id):
    # Get buoy data from NOAA # https://www.ndbc.noaa.gov/data/realtime2/{buoy_id}.drift
    url = f'https://www.ndbc.noaa.gov/data/realtime2/{buoy_id}.drift' # f-string url
    r = requests.get(url) # Get the data from the URL
    with open('data.csv', 'wb') as f: # Write the data to a file
        f.write(r.content) # Write the data to a file
    df = pd.read_csv('data.csv', header=1, parse_dates=True, delimiter = '\s+') # Read in the data
    return df # return the buoy data

@sleep_and_retry
def get_stationary_buoy_data(buoy_id):
    # https://www.ndbc.noaa.gov/data/realtime2/21415.dart
    # example buoy data pull for water column height (capture the latest one)
    url = f'https://www.ndbc.noaa.gov/data/realtime2/{buoy_id}.dart' # f-string url
    r = requests.get(url)
    with open('data.csv', 'wb') as f:
        f.write(r.content)
    df = pd.read_csv('data.csv', header=1, parse_dates=True, delimiter = '\s+')
    df.head()
    return df


# Buoy Cam Data
def get_buoy_cam(buoy_id):
    """
    NDBC operates BuoyCAMs at several stations. These BuoyCAMs typically take photos only during daylight hours.

    To view the most recent BuoyCAM image from an NDBC station, use this URL:

        https://www.ndbc.noaa.gov/buoycam.php?station=xxxxx

    where xxxxx is the desired station ID. To see which stations are currently reporting BuoyCAM images, check the BuoyCAMs map.

    If the server encounters any difficulties in processing your request, you will receive one of these error messages:

        No station specified

        Modify your URL to use the station parameter to specify a valid station with a BuoyCAM (station=xxxxx where xxxxx is the station ID). Review the BuoyCAMs map to see which stations have a BuoyCAM.
        Invalid station specified

        The station on the URL is not a valid station. Review the BuoyCAMs map to see which stations have a BuoyCAM.
        This station has no BuoyCAM

        The station on the URL is valid but has no BuoyCAM installed. Look at the BuoyCAMs map to see which stations have a BuoyCAM.
        BuoyCAM photo for this station is older than 16 hours

        The BuoyCAM on the specified station has not reported in the past 16 hours, hence there is no image to display.
        Unable to access BuoyCAMs at this time

        There is an issue preventing the BuoyCAM process from functioning properly. Recommend waiting at least 30 minutes and trying again, if the problem persist contact the NDBC webmaster with the URL used and the date/time the error was received."""

    # https://www.ndbc.noaa.gov/buoycam.php?station=21415
    url = f'https://www.ndbc.noaa.gov/buoycam.php?station={buoy_id}' # f-string url
    r = requests.get(url) # Get the data from the URL
    r_text = r.text # get the text from the request
    print(r_text)

    with open('data.csv', 'wb') as f:
        f.write(r.content) # this writes in latin-1 encoding
    df = pd.read_csv('data.csv', header=1, parse_dates=True, delimiter = '\s+') # Read in the data
    df.head() # this is a pandas dataframe
    return df # Main code body



def get_available_buoy_ids():

    # source for this data: https://www.ndbc.noaa.gov/to_station.shtml

    # task 1 -  use regex to extract all numbers like 44004 from stations.txt file and save to a list.
    # task 2 - use regex to also identify any identifiers like ALXN6 or ALXN7 and append them to the list as well.
    import re
    stations_text = open('data/stations.txt', 'r').read()
    task_one_stations_list = re.findall(r' \d+ ', stations_text) # find all numbers in the file
    task_two_stations_list = re.findall(r' \w+\d ', stations_text) # find all identifiers in the file
    # combine by adding the second list to the first with .extend()
    task_one_stations_list.extend(task_two_stations_list)
    # remove the spaces from the list
    stations_list = [x.strip() for x in task_one_stations_list] # remove whitespace
    # strip the whitespace from the list
    stations_list = [x.strip() for x in stations_list] # remove whitespace
    # remove duplicates
    stations_list = [x for x in task_one_stations_list if x != ''] # remove empty strings
    stations_list = list(dict.fromkeys(stations_list))
    # remove the first element

    # remove all nonalpha characters from the elements in the list
    stations_list = [re.sub(r'\W+', '', x) for x in stations_list]
    print(len(stations_list), ' stations were identified')
    # print(stations_list)


    return stations_list



In [None]:
def main():
    # First we need to get the list of available buoy ids
    # Then we need to ask the question, "Does this buoy transmit photos? (BuoyCam) Y/N"
    # If it does then we want to get the latest photo and save it to the data folder with the buoy id as the file name (every time we run this script we want to overwrite the existing photo with the latest one.)

    buoy_ids = get_available_buoy_ids()
    #* Now we have a list of buoy ids that we can use to get photos from (potentially).
    #? Question: How do we know if a buoy has a camera? (BuoyCam)
    # https://www.ndbc.noaa.gov/buoycam.php?station=XXXXX

    # We have to check the buoy id to see if it has a BuoyCam if it does not then we will see an error message: "This station has no BuoyCAM"
    # The errors that merit dropping the buoy id are:
    # an error that contains: "There is an issue preventing the BuoyCAM process"
    # an error that contains: "URL is valid but has no BuoyCAM installed"
    # an error that contains: "BuoyCAM photo for this station is older than 16 hours"

    # other errors just mean that the buoy is not transmitting data at the moment and we can try again later.

    # Process Flow:
    # 1. Get the list of buoy ids
    # 2. For each buoy id:
    # 3. Check if the buoy has a BuoyCam
    # 4. If it does...
    # 4a. The response from the request has an encoding that will need to be converted to utf-8.
        #then get the latest photo and save it to the data folder with the buoy id as the file name (every time we run this script we want to overwrite the existing photo with the latest one.)



main()

# Variant update rates for buoys in the dataset
Now going through the list of buoy ids I want to check the buoy_update_rates_dict.csv file for the update rate (how often the image changes).
The key is the buoy id and the value is a tuple [update rate, last update datetime in seconds].
Before I pull another photo I want to check if the update rate has passed since the last update. If it has, I pull the photo and update the last update datetime.
# Get the buoy update rates
```python
buoy_update_rates = pd.read_csv('buoy_update_rates_dict.csv', index_col=0)
buoy_update_rates.head()
```



In [2]:
import pandas as pd

In [1]:
buoy_update_rates = pd.read_csv('buoy_update_rates_dict.csv', index_col=0)
buoy_update_rates.head()

NameError: name 'pd' is not defined

In [3]:
import requests
import json
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import re
import os
from tqdm import tqdm
import time
# the list of buoys to check for cameras are in the file data/buoy_ids.csv
with open('data/buoy_ids.csv', 'r') as f:
    buoy_ids = f.read().splitlines()
import datetime
import cv2

In [4]:
import cv2
import numpy as np
import urllib.request
import time

# Image Processing Functions
# we have the images in the images/buoys directory already downloaded.

def divide_into_panels(buoy_id, image_file):
    # divide the image into six panels, as dictated in the image processing pipeline for NOAA Buoy Cameras Comments above.

    # read the image
    img = cv2.imread(image_file)
    # get the name of the image file
    image_name = image_file.split('/')[-1]

    # get the dimensions of the image
    height, width, channels = img.shape

    # Before dividing into panels, crop the image to remove 30 pixels from the bottom of the image.
    # This is to remove the "Buoy Camera" text from the image.
    # img = img[0:height-30, 0:width]

    # divide the image into six panels named: image_name_panel_#.jpg
    image_name = image_name.replace('.jpg', '') # remove the .jpg extension for now
    panel_1 = img[0:height-30, 0:int(width/6)]
    cv2.imwrite('images/panels/{}/{}_panel_1.png'.format(buoy_id, image_name), panel_1)
    panel_2 = img[0:height-30, int(width/6):int(width/3)]
    cv2.imwrite('images/panels/{}/{}_panel_2.png'.format(buoy_id, image_name), panel_2)
    panel_3 = img[0:height-30, int(width/3):int(width/2)]
    cv2.imwrite('images/panels/{}/{}_panel_3.png'.format(buoy_id, image_name), panel_3)
    panel_4 = img[0:height-30, int(width/2):int(width*2/3)]
    cv2.imwrite('images/panels/{}/{}_panel_4.png'.format(buoy_id, image_name), panel_4)
    panel_5 = img[0:height-30, int(width*2/3):int(width*5/6)]
    cv2.imwrite('images/panels/{}/{}_panel_5.png'.format(buoy_id, image_name), panel_5)
    panel_6 = img[0:height-30, int(width*5/6):width]
    cv2.imwrite('images/panels/{}/{}_panel_6.png'.format(buoy_id, image_name), panel_6)
    return panel_1, panel_2, panel_3, panel_4, panel_5, panel_6



in a while loop

    continually check the buoy_update_rates_dictionary by pinging it every 5 seconds. Once any of the buoys are due for an update, pull the image and update the last update datetime for that buoy. Then go back to checking the buoy_update_rates_dictionary every 5 seconds.



In [6]:
# read cam urls from file
with open('data/camera_urls.csv', 'r') as f:
    cam_urls = f.read().splitlines()


buoy_update_rates_dict = {} # blank dictionary to store the update rates for each buoy (i.e. how often the buoy takes a picture (measured in seconds))
# fill the dictionary with blank update rate arrays for each buoy.
# these arrays will be averaged to get the average update rate for each buoy in real time.

for buoy_id in buoy_ids:
    buoy_update_rates_dict[buoy_id] = 600 # set the initial update rate to 600 seconds (10 minutes)
    

In [7]:
def trigger_buoy_update(list_of_buoys):
    for buoy_id in tqdm(list_of_buoys):
        cam_url = 'https://www.ndbc.noaa.gov/buoycam.php?station={}'.format(buoy_id)
        cam_urls.append(cam_url) # add the camera url to the list of camera urls

    start_time = datetime.datetime.now() # use this to calculate the next time to download images (every ten minutes)
    print('I can still see things! Downloading images...')
    for cam_url in tqdm(cam_urls): #note: temporarily limiting to 5 cameras
        # get the buoy id from the camera url
        buoy_id = re.search('station=(.*)', cam_url).group(1)
        # get the current time
        now = datetime.datetime.now()
        # create a directory for the buoy id if it doesn't already exist
        if not os.path.exists('images/buoys/{}'.format(buoy_id)):
            os.makedirs('images/buoys/{}'.format(buoy_id))
        # get the image
        if 'images/buoys/{}/{}_{}_{}_{}_{}.jpg'.format(buoy_id, now.year, now.month, now.day, now.hour, now.minute) not in os.listdir('images/buoys/{}'.format(buoy_id)): # if the image has not already been downloaded
            time.sleep(0.25) # wait 0.25 seconds to avoid getting blocked by the server
            img = requests.get(cam_url) # get the image
            # load the last image downloaded for this buoy
            if len(os.listdir('images/buoys/{}'.format(buoy_id))) > 0:
                last_image = 'images/buoys/{}/{}'.format(buoy_id, os.listdir('images/buoys/{}'.format(buoy_id))[-1])
                last_image = cv2.imread(last_image)
            else: # if this is the first image downloaded for this buoy
                last_image = None # note: need this to be shown in function above
            
            # check if the current image is different from the last image downloaded for this buoy

            # img needs to be converted to a numpy array for cv2 to work with it
            img = np.asarray(bytearray(img.content), dtype="uint8")
            img = cv2.imdecode(img, cv2.IMREAD_COLOR) # convert the image to a numpy array
            if image_has_changed(img, last_image) == False and last_image is not None: # if the image has not changed from the last image downloaded
                # if the images are the same, then don't save the image, and update the buoy_update_rates_dict with the time delta between now and when we last downloaded an image for this buoy.
                buoy_update_rates_dict[buoy_id] =  [(datetime.datetime.now() - start_time).seconds, datetime.datetime.now()] # update the buoy_update_rates_dict with the time delta between now and when we last downloaded an image for this buoy.
            else:
                # if the images are different, then save the image, and update the buoy_update_rates_dict with the time delta between now and when we last downloaded an image for this buoy.
                # buoy_update_rates_dict[buoy_id] = (now - buoy_update_rates_dict[buoy_id]).seconds
                # make the buoy_update_rates_dict value for buoy_id the current time in seconds minus the last time we downloaded an image for this buoy in seconds (i.e. the time delta between now and when we last downloaded an image for this buoy)
                buoy_update_rates_dict[buoy_id] = [(datetime.datetime.now() - start_time).seconds, datetime.datetime.now()]

                # save the image
                with open('images/buoys/{}/{}_{}_{}_{}_{}.jpg'.format(buoy_id, now.year, now.month, now.day, now.hour, now.minute), 'wb+') as f:
                    f.write(img) # write the image to the file
        else:
            pass # if the image already exists, don't download it again
    # wait ten minutes

    # Save the panels to the images/panels directory

    list_of_buoys = os.listdir('images/buoys') # get the list of buoy ids by their directory names

    for buoy_id in list_of_buoys:
        # get the list of images for the buoy
        if buoy_id != '.DS_Store':
            images = os.listdir('images/buoys/{}'.format(buoy_id))
            # if the image has not already been used to create panels, create the panels and save them to the images/panels directory
            for image in images:
                try:
                    # if the image is not None
                    if image == '.DS_Store' or image != 'None':
                        continue
                    # If the panels directory for the buoy doesn't exist, create it.
                    if not os.path.exists('images/panels/{}'.format(buoy_id)):
                        os.makedirs('images/panels/{}'.format(buoy_id))
                    if 'images/buoys/{}/{}'.format(buoy_id, image) in os.listdir('images/panels/{}'.format(buoy_id)):
                        print('This image has already been used to create panels.')
                        continue
                    if image == '.DS_Store' and buoy_id != '.DS_Store':
                        continue # skip the .DS_Store file
                    #print('Processing image: {}'.format(image))

                    # get the panels
                    panel_1, panel_2, panel_3, panel_4, panel_5, panel_6 = divide_into_panels(buoy_id, 'images/buoys/{}/{}'.format(buoy_id, image))
                    # save the panels to the images/panels directory
                except Exception as e:
                    print('Error: {}'.format(e))
                    pass
    # Stage 3: Create a video from the images (later)
    # Stage 4: save buoy_update_rates_dict to a csv file
    buoy_update_rates_dict_df = pd.DataFrame.from_dict(buoy_update_rates_dict, orient='index')
    buoy_update_rates_dict_df.to_csv('data/buoy_update_rates_dict.csv')


In [8]:
def check_for_updates(buoy_update_rates_dictionary):
    # Check the buoy_update_rates_dictionary to see if any of the buoys satistfy the update rate requirements:
    # Requirements: the current time minus the last time we downloaded an image for this buoy must be greater than the update rate for this buoy. If it is, then we will add the buoy id to the list of buoys that need to be updated and return it to the main function.
    # If the buoy_update_rates_dictionary is empty, then we will return an empty list.
    # If the buoy_update_rates_dictionary is not empty, then we will check the update rates for each buoy and return a list of the buoy ids that need to be updated.

    if len(buoy_update_rates_dictionary) == 0:
        return []
    else:
        buoys_to_update = []
        for buoy_id in buoy_update_rates_dictionary:
            if (datetime.datetime.now() - buoy_update_rates_dictionary[buoy_id][1]).seconds > buoy_update_rates_dictionary[buoy_id][0]:
                buoys_to_update.append(buoy_id)
        return buoys_to_update # return a list of the buoy ids that need to be updated


In [10]:
type(buoy_update_rates_dict)

dict

In [9]:
while True:

    # The Buoy IDs

    # continually check the buoy_update_rates_dictionary by pinging it every 5 seconds. Once any of the buoys are due for an update, pull the image and update the last update datetime for that buoy. Then go back to checking the buoy_update_rates_dictionary every 5 seconds.

    # check the buoy_update_rates_dictionary to see if any of the buoys satistfy the update rate requirements:
    updates_due = check_for_updates(buoy_update_rates_dict) # check the buoy_update_rates_dictionary to see if any of the buoys satistfy the update rate requirements:
    
    if len(updates_due) > 0:
        # if there are any buoys that need to be updated, then update them
        print('Updates due: {}'.format(updates_due))
        trigger_buoy_update(updates_due)
    else:
        # if there are no buoys that need to be updated, then wait 5 seconds and check again
        time.sleep(5)

TypeError: 'int' object is not subscriptable

In [31]:
# Go into each panel directory and sort the images into folders by the date in their filename (if they haven't already been sorted)
# example unsorted directory: 'images/panels/46078/2022_11_5_15_44_panel_1.png'
# example sorted directory: images/panels/51000/2022_11_5_15_44/panel_1.png
import shutil

for buoy_id in os.listdir('images/panels'):
    if buoy_id != '.DS_Store' and '.' not in buoy_id:
        for image in os.listdir('images/panels/{}'.format(buoy_id)):
            if image != '.DS_Store' and '.' not in image:
                try:
                    # find the 2022_11_5_15_44 (#_#_#_#_#) part of the filename and make a new folder with that name.
                    # move the image that contain the matching pattern in their filename into the new folder
                    # if the folder already exists, then move the image into the existing folder
                    # if the folder doesn't exist, then create the folder and move the image into the new folder
                    # if the image has already been moved into a folder, then skip it
                    # if the image is the .DS_Store file, then skip it
                    # if the image is None, then skip it
                
                    pattern = re.compile(r'\d{4}_\d{1,2}_\d{1,2}_\d{1,2}_\d{1,2}') # create a pattern to match the date in the filename
                    match = pattern.search(image) # search the filename for the date
                    if match:
                        date = match.group() # get the date from the match
                        if not os.path.exists('images/panels/{}/{}'.format(buoy_id, date)): # if the folder doesn't exist, then create it
                            os.makedirs('images/panels/{}/{}'.format(buoy_id, date)) # create the folder
                        shutil.move('images/panels/{}/{}'.format(buoy_id, image), 'images/panels/{}/{}/{}'.format(buoy_id, date, image)) # move the image into the new folder
                    else:
                        pass
                except Exception as e:
                    pass
        else:
            pass
    else:
        pass

Error: Cannot move a directory 'images/panels/45007/2022_11_5_15_50' into itself 'images/panels/45007/2022_11_5_15_50/2022_11_5_15_50'.

https://helios.earth/developers/api/cameras/#live-video

In [27]:
# Panorama Generator with Python and OpenCV
import cv2
import os

images_folder = 'images/buoys/41001' # the folder containing the images to be used to create the panorama

mainFolder = 'images/panels' # the main folder containing the images folder

myFolders = os.listdir(mainFolder) # get the list of folders in the main folder

#print(myFolders)

# for folder in myFolders:
#     path = os.path.join(mainFolder, folder) # get the path to the folder
#     images = []
#     if '.' not in path:
#         myList = os.listdir(path) # get the list of images in the folder
#         # retrieving the names of the images
#         # the images in this folder are segmented by time stamp and have six panels each
#         # create a list of the images in the folder (for each set) 
        
#         for imgN in myList:

#             curImg = cv2.imread(f'{path}/{imgN}')
#             # resize the images
#             curImg = cv2.resize(curImg, (0,0), None, 0.2, 0.2)
#             images.append(curImg)

#         # using the stitcher function to create the panorama
#         stitcher = cv2.Stitcher.create() # create the stitcher object
#         (status, result) = stitcher.stitch(images) # status = 0 if the panorama was created successfully
#         if (status == cv2.STITCHER_OK):
#             cv2.imshow(folder, result)
#             cv2.waitKey(1)
#             cv2.imwrite(f'images/panoramas/{folder}_{str(datetime.datetime.now())}.jpg', result)
#         else:
#             print('Panorama was not created successfully.')


    

Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created successfully.
Panorama was not created 

In [5]:
# Import required packages
import cv2
import pytesseract

def get_text_from_image(image_path):
    pytesseract.pytesseract.tesseract_cmd = '/Users/grahamwaters/opt/anaconda3/envs/EmersonWriter/lib/python3.9/site-packages/pytesseract/pytesseract.py'

    #read image
    img = cv2.imread(image_path)
    # get grayscale image
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #noise removal
    noise=cv2.medianBlur(gray,3)
    # thresholding# converting it to binary image by Thresholding
    # this step is require if you have colored image because if you skip this part
    # then tesseract won’t able to detect text correctly and this will give incorrect #result
    thresh = cv2.threshold(noise, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    #Configuration
    config = ('-l eng — oem 3 — psm 3')
    # pytessercat
    text = pytesseract.image_to_string(thresh,config=config)
    print(text)








In [7]:
# Testing this on W03A_2022_11_06_0010.jpg
get_text_from_image('images/panels/41001/2022_11_6_0_10/panel_1.png')

[ WARN:0@162.049] global /Users/runner/work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('images/panels/41001/2022_11_6_0_10/panel_1.png'): can't open/read file: check file path/integrity


error: OpenCV(4.6.0) /Users/runner/work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
