In [49]:
import pandas as pd
import fathomnet.api
import os
from datetime import datetime

In [2]:
path = r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\Elasmobranchii.xlsx"

In [3]:
occdf = pd.read_excel(path, 'occurrence')
mediadf = pd.read_excel(path, 'multimedia')

In [4]:
occdf = occdf.dropna(how='all', axis = 1)
mediadf = mediadf.dropna(how='all', axis = 1)

In [5]:
totaldf = pd.merge(occdf, mediadf, on='gbifID')

In [6]:
totaldf.columns

Index(['gbifID', 'identifier_x', 'license_x', 'basisOfRecord',
       'dataGeneralizations', 'dynamicProperties', 'occurrenceID',
       'recordNumber', 'recordedBy', 'occurrenceStatus', 'occurrenceRemarks',
       'eventDate', 'year', 'month', 'day', 'continent', 'countryCode',
       'stateProvince', 'decimalLatitude', 'decimalLongitude',
       'coordinateUncertaintyInMeters', 'identifiedBy',
       'identificationVerificationStatus', 'scientificNameID',
       'taxonConceptID', 'scientificName', 'kingdom', 'phylum', 'class',
       'order', 'family', 'genus', 'genericName', 'specificEpithet',
       'taxonRank', 'vernacularName', 'taxonomicStatus', 'taxonRemarks',
       'datasetKey', 'publishingCountry', 'lastInterpreted', 'issue',
       'mediaType', 'hasCoordinate', 'hasGeospatialIssues', 'taxonKey',
       'acceptedTaxonKey', 'kingdomKey', 'phylumKey', 'classKey', 'orderKey',
       'familyKey', 'genusKey', 'speciesKey', 'species',
       'acceptedScientificName', 'verbatimScie

### MAIN GOAL: Combine multimedia and occurrence data, make it upload-ready with available data
- Primary Data: 
 - concept: Scientific Name, Class
   - Scientific name preferred
 - url: Image URL
 - x: Bounding box X Coord
 - y: Bounding box Y Coord
 - width: Bounding Box Width (px)
 - height: Bounding Box Height (px)


- Optional Data
 - altitude
   - m
 - altconcept: Secondary name for localization
 - depth
   - m
 - groupof: If box contains multiple or singular entity
   - TRUE/FALSE
 - imagingtype: Any specialized imaging systems
 - latitude
   - Btwn -90S to 90N
 - longitude
   - Btwn -180W to 180E
 - observer: Name of person or program who created the localization
 - occluded: If object is occluded by another in an image
   - TRUE/FALSE
 - oxygen: O2 concentration
   - ml/L
 - pressure
   - dbar
 - salinity
 - temperature
   - Celsius
 - timestamp
   - ISO 8601 format
 - truncated: If object extends beyond boundary of image
   - TRUE/FALSE
 - uderdefinedkey: UUID
 - Any additional columns are added as key-value tags
 


### 2nd GOAL: Integrate Pythia into code, can try and upload directly from link in multimedia CSV, read bounding box results from website and put the box coordinates into csv
- For larger image sets, use integrated models to create bounding boxes
- https://colab.research.google.com/github/fathomnet/fathomnet-py/blob/main/tutorial.ipynb#scrollTo=jrTNntfuGRLQ (use as referece)

In [8]:
cols = ['concept', 'url', 'x', 'y', 'width', 'height']

In [9]:
inputdf = pd.DataFrame(columns=cols)
inputdf['concept'] = totaldf['verbatimScientificName']
inputdf['url'] = totaldf['identifier_y']
# inputdf['depth'] = totaldf['verbatimDepth']
inputdf['latitude'] = totaldf['decimalLatitude']
inputdf['longitude'] = totaldf['decimalLongitude']
inputdf['timestamp'] = totaldf['eventDate'].apply(lambda x: x.isoformat())
inputdf['notes'] = totaldf['occurrenceRemarks']

In [10]:
#Input dataset w/o Bounding Boxes
inputdf

Unnamed: 0,concept,url,x,y,width,height,latitude,longitude,timestamp,notes
0,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
1,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
2,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
3,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
4,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
...,...,...,...,...,...,...,...,...,...,...
978,Manta,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.436437,153.546137,2016-09-21T19:31:00,It was in deep waters in a gorge walk
979,Orectolobus,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.398803,153.437653,2016-09-19T17:38:00,We think might be a banded wobbegong. It was s...
980,Orectolobus,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.398803,153.437653,2016-09-19T17:38:00,We think might be a banded wobbegong. It was s...
981,HETERODONTIDAE,https://images.ala.org.au/image/proxyImageThum...,,,,,-33.926870,137.618758,2016-09-08T21:21:00,


In [1]:
#Try different ML models, change code

In [79]:
import torchvision                              # Library of datasets, models, and image transforms
import pickle                                   # Data serialization library
import json                                     # Data storage (JavaScript Object Notation)
import matplotlib.pyplot as plt                 # Plotting utilities
import torch                                    # Tensor library for manipulating large models and data
import requests                                 # Manages HTTP requests
import random                                   # Random number generator
import numpy as np                              # Array manipulations

# Import key functions & modules from detectron2
from detectron2 import model_zoo
from detectron2.data import Metadata
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import Visualizer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
import detectron2.data.transforms as T

# Import from pyplot and PIL for easy plotting
from matplotlib.pyplot import imshow
from PIL import Image

In [80]:
CONFIG_FILE = "fathomnet_config_v2_1280.yaml"   # training configuration file
WEIGHT_FILE = "model_final.pth"                 # fathomnet model weights

NMS_THRESH = 0.45   # Set an NMS threshold to filter all the boxes proposed by the model
SCORE_THRESH = 0.3  # Set the model score threshold to suppress low confidence annotations