### MAIN GOAL: Combine multimedia and occurrence data, make it upload-ready with available data
#### Primary Data: 
 - concept: Scientific Name, Class
   - Scientific name preferred
 - url: Image URL
 - x: Bounding box X Coord
 - y: Bounding box Y Coord
 - width: Bounding Box Width (px)
 - height: Bounding Box Height (px)


#### Optional Data
 - altitude
   - m
 - altconcept: Secondary name for localization
 - depth
   - m
 - groupof: If box contains multiple or singular entity
   - TRUE/FALSE
 - imagingtype: Any specialized imaging systems
 - latitude
   - Btwn -90S to 90N
 - longitude
   - Btwn -180W to 180E
 - observer: Name of person or program who created the localization
 - occluded: If object is occluded by another in an image
   - TRUE/FALSE
 - oxygen: O2 concentration
   - ml/L
 - pressure
   - dbar
 - salinity
 - temperature
   - Celsius
 - timestamp
   - ISO 8601 format
 - truncated: If object extends beyond boundary of image
   - TRUE/FALSE
 - uderdefinedkey: UUID
 - Any additional columns are added as key-value tags
 


### 2nd GOAL: Integrate Pythia into code, can try and upload directly from link in multimedia CSV, read bounding box results from website and put the box coordinates into csv
- For larger image sets, use integrated models to create bounding boxes
- https://colab.research.google.com/github/fathomnet/fathomnet-py/blob/main/tutorial.ipynb#scrollTo=jrTNntfuGRLQ (use as referece)

In [35]:
#to run pythia js scripts in python


#!pip install pandas fathomnet datetime
#!pip install openpyxl
#!pip install simplejson
#!pip install selenium
#!pip install wget
#!pip install pathlib
#!pip install webdriver-manager

In [36]:
import pandas as pd
import fathomnet.api
import os
from datetime import datetime

In [37]:
#Using GBIF Dataset Layout
def initGBIFDataset(path):
    occdf = pd.read_excel(path, 'occurrence')
    occdf = occdf.dropna(how='all', axis = 1)
    mediadf = pd.read_excel(path, 'multimedia')
    mediadf = mediadf.dropna(how='all', axis = 1)

    totaldf = pd.merge(occdf, mediadf, on='gbifID')

    cols = ['concept', 'url', 'x', 'y', 'width', 'height', 'acc_score']
    inputdf = pd.DataFrame(columns=cols)
    inputdf['concept'] = totaldf['verbatimScientificName']
    inputdf['url'] = totaldf['identifier_y']
    # inputdf['depth'] = totaldf['verbatimDepth']
    inputdf['latitude'] = totaldf['decimalLatitude']
    inputdf['longitude'] = totaldf['decimalLongitude']
    inputdf['timestamp'] = totaldf['eventDate'].apply(lambda x: x.isoformat())
    inputdf['notes'] = totaldf['occurrenceRemarks']

    return inputdf

In [38]:
inputdf = initGBIFDataset(r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\Elasmobranchii.xlsx")

In [39]:
import urllib.request
import wget
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
import time
from pathlib import Path
import math

#For manual downloading and json data importing
import os
import json
import glob

In [40]:
imgpath = Path(r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\downloaded_images")
jsonpath = Path(r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\downloaded_jsons")

In [41]:
#Empty img and json directories (TESTING ONLY)

def clearDirs(imgpath, jsonpath):
    if len(os.listdir(imgpath)) != 0:
        for f in os.listdir(imgpath):
            os.remove(os.path.join(imgpath, f))

    if len(os.listdir(jsonpath)) != 0:
        for f in os.listdir(jsonpath):
            os.remove(os.path.join(jsonpath, f))


In [42]:
d = DesiredCapabilities.CHROME
d['goog:loggingPrefs'] = {'browser':'ALL'}
chromeoptions = webdriver.ChromeOptions()
prefs = {"download.default_directory": str(jsonpath)}
chromeoptions.add_experimental_option("prefs", prefs)

def pythiaUpload(idxStart, idxEnd):
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chromeoptions, desired_capabilities=d)
    driver.get('http://fathomnet.org:7777/') #Pythia website

    for i in range(idxStart, idxEnd):
        imgname = "img" + str(i) + ".png"
        url = inputdf.iloc[i].url
        #Downloads img from URL (figure out how to do without downloading later)
        urllib.request.urlretrieve(url, str(imgpath/imgname))

        #print(imgname + ' successfully downloaded!')

        driver.find_element(By.XPATH, '//input[@id="file_upload"]').send_keys(str(imgpath/imgname))
        driver.find_element(By.XPATH, '//input[@id="runAlgoButton"]').click()

        downloadBut = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, '//button[@id="downloadResults"]')))
        downloadBut.click()
        # time.sleep(10)
        # #Downloads results as json (figure out how to do it without downloading later)
        # driver.find_element(By.XPATH, '//button[@id="downloadResults"]').click()
        
        #print("JSON Results for " + imgname + " downloaded!")
        time.sleep(2)

        #Most recent JSON file, for most recent downloaded image
        list_of_files = glob.glob(str(jsonpath / '*.json'))
        latest_file = max(list_of_files, key=os.path.getctime)
        data = json.load(open(latest_file))

        if (len(data['predictions']) > 0):
            boundingdata = max(data['predictions'], key=lambda x:x['scores'])
            inputdf.loc[i, 'x':'acc_score'] = [boundingdata['bbox'][0], boundingdata['bbox'][1], boundingdata['bbox'][2] - boundingdata['bbox'][0], boundingdata['bbox'][3] - boundingdata['bbox'][1], boundingdata['scores'][0]]
            # inputdf.at[i, 'x'] = boundingdata['bbox'][0]
            # inputdf.at[i, 'y'] = boundingdata['bbox'][1]
            # inputdf.at[i, 'width'] = boundingdata['bbox'][2] - boundingdata['bbox'][0]
            # inputdf.at[i, 'height'] = boundingdata['bbox'][3] - boundingdata['bbox'][1]
        else:
            #print("No bounding coordinates found!")
            continue
        print()


In [44]:
clearDirs(imgpath, jsonpath)
pythiaUpload(0, 100)































TimeoutException: Message: 


In [49]:
inputdf[0:100].to_csv(r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\testsubset.csv", index=False)
inputdf[inputdf['x'].notnull()]
# for idx in inputdf[inputdf['x'].notnull()].index:
#     print(inputdf.iloc[idx].url)

Unnamed: 0,concept,url,x,y,width,height,acc_score,latitude,longitude,timestamp,notes
3,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,118.758899,94.757008,424.553535,356.528856,0.347837,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
7,Triaenodon obesus,https://images.ala.org.au/image/proxyImageThum...,174.093724,129.700239,336.083572,118.264889,0.223122,-16.932733,145.985687,2023-04-07T10:57:00,Swimming in the reef at Fitzroy island. White ...
14,Carcharhinus falciformis,https://images.ala.org.au/image/proxyImageThum...,2.961547,118.315623,649.356246,319.441054,0.234439,-16.7,146.1,2023-03-04T10:38:00,Released unharmed with hook removed
17,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,263.891287,235.725358,334.224403,119.160078,0.592687,-22.0,113.9,2021-11-29T13:31:00,"Smallish, diameter of about 40cm, in the shall..."
18,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,635.739408,436.042913,14.208778,24.811655,0.322416,-22.0,113.9,2021-11-29T13:31:00,"Smallish, diameter of about 40cm, in the shall..."
21,Trygonorrhina dumerilii,https://images.ala.org.au/image/proxyImageThum...,0.0,2.311138,639.211526,358.306414,0.37613,-33.7,115.2,2023-02-22T16:17:00,"Was swimming in the shallows, occasionally bur..."
22,Trygonorrhina dumerilii,https://images.ala.org.au/image/proxyImageThum...,0.0,0.0,636.573653,386.965422,0.253561,-33.7,115.2,2023-02-22T16:17:00,"Was swimming in the shallows, occasionally bur..."
25,Sphyrna,https://images.ala.org.au/image/proxyImageThum...,0.0,187.69942,80.390125,49.347948,0.224042,-16.6,145.9,2022-12-08T15:34:00,About 3.5 meters long. 2 of them circling the ...
30,Mobula alfredi,https://images.ala.org.au/image/proxyImageThum...,139.970144,71.340459,476.750486,272.259292,0.413743,-16.7,145.9,2023-03-12T10:01:28,"Very Large, maybe 2m across. It just went sail..."
31,Mobula alfredi,https://images.ala.org.au/image/proxyImageThum...,147.255474,57.610494,419.808538,235.17323,0.529895,-16.7,145.9,2023-03-12T06:09:00,
