In [1]:
import pandas as pd
import fathomnet.api
import os
from datetime import datetime
import urllib.request
from PIL import Image

In [2]:
path = r"D:\Databases\Earth Guardian's Weekly Feed\Elasmobranchii\Elasmobranchii.xlsx"

In [3]:
occdf = pd.read_excel(path, 'occurrence')
mediadf = pd.read_excel(path, 'multimedia')

In [4]:
# occdf = occdf.dropna(how='all', axis = 1)
# mediadf = mediadf.dropna(how='all', axis = 1)

In [5]:
totaldf = pd.merge(occdf, mediadf, on='gbifID')

In [6]:
totaldf.columns

Index(['gbifID', 'abstract', 'accessRights', 'accrualMethod',
       'accrualPeriodicity', 'accrualPolicy', 'alternative', 'audience_x',
       'available', 'bibliographicCitation',
       ...
       'title_y', 'description_y', 'source_y', 'audience_y', 'created_y',
       'creator_y', 'contributor_y', 'publisher_y', 'license_y',
       'rightsHolder_y'],
      dtype='object', length=273)

### MAIN GOAL: Combine multimedia and occurrence data, make it upload-ready with available data
- Primary Data: 
 - concept: Scientific Name, Class
   - Scientific name preferred
 - url: Image URL
 - x: Bounding box X Coord
 - y: Bounding box Y Coord
 - width: Bounding Box Width (px)
 - height: Bounding Box Height (px)


- Optional Data
 - altitude
   - m
 - altconcept: Secondary name for localization
 - depth
   - m
 - groupof: If box contains multiple or singular entity
   - TRUE/FALSE
 - imagingtype: Any specialized imaging systems
 - latitude
   - Btwn -90S to 90N
 - longitude
   - Btwn -180W to 180E
 - observer: Name of person or program who created the localization
 - occluded: If object is occluded by another in an image
   - TRUE/FALSE
 - oxygen: O2 concentration
   - ml/L
 - pressure
   - dbar
 - salinity
 - temperature
   - Celsius
 - timestamp
   - ISO 8601 format
 - truncated: If object extends beyond boundary of image
   - TRUE/FALSE
 - uderdefinedkey: UUID
 - Any additional columns are added as key-value tags
 


### 2nd GOAL: Integrate Pythia into code, can try and upload directly from link in multimedia CSV, read bounding box results from website and put the box coordinates into csv
- For larger image sets, use integrated models to create bounding boxes
- https://colab.research.google.com/github/fathomnet/fathomnet-py/blob/main/tutorial.ipynb#scrollTo=jrTNntfuGRLQ (use as referece)

In [7]:
cols = ['concept', 'url', 'x', 'y', 'width', 'height']

In [8]:
inputdf = pd.DataFrame(columns=cols)
inputdf['concept'] = totaldf['verbatimScientificName']
inputdf['url'] = totaldf['identifier_y']
# inputdf['depth'] = totaldf['verbatimDepth']
inputdf['latitude'] = totaldf['decimalLatitude']
inputdf['longitude'] = totaldf['decimalLongitude']
inputdf['timestamp'] = totaldf['eventDate'].apply(lambda x: x.isoformat())
inputdf['notes'] = totaldf['occurrenceRemarks']

In [9]:
inputdf

Unnamed: 0,concept,url,x,y,width,height,latitude,longitude,timestamp,notes
0,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
1,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
2,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
3,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
4,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
...,...,...,...,...,...,...,...,...,...,...
978,Manta,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.436437,153.546137,2016-09-21T19:31:00,It was in deep waters in a gorge walk
979,Orectolobus,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.398803,153.437653,2016-09-19T17:38:00,We think might be a banded wobbegong. It was s...
980,Orectolobus,https://images.ala.org.au/image/proxyImageThum...,,,,,-27.398803,153.437653,2016-09-19T17:38:00,We think might be a banded wobbegong. It was s...
981,HETERODONTIDAE,https://images.ala.org.au/image/proxyImageThum...,,,,,-33.926870,137.618758,2016-09-08T21:21:00,


### Pythia Test

In [10]:
testdf = inputdf.iloc[:10]

In [11]:
testdf

Unnamed: 0,concept,url,x,y,width,height,latitude,longitude,timestamp,notes
0,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
1,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
2,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
3,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
4,Taeniura lymma,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932188,145.987183,2023-04-05T13:18:00,In the shallows of Fitzroy Island FNQ
5,Myliobatis,https://images.ala.org.au/image/proxyImageThum...,,,,,-32.43339,152.536282,2023-04-01T13:47:25,"Large, perhaps 1 m across"
6,Triaenodon obesus,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932733,145.985687,2023-04-07T10:57:00,Swimming in the reef at Fitzroy island. White ...
7,Triaenodon obesus,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932733,145.985687,2023-04-07T10:57:00,Swimming in the reef at Fitzroy island. White ...
8,Triaenodon obesus,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932733,145.985687,2023-04-07T10:57:00,Swimming in the reef at Fitzroy island. White ...
9,Triaenodon obesus,https://images.ala.org.au/image/proxyImageThum...,,,,,-16.932733,145.985687,2023-04-07T10:57:00,Swimming in the reef at Fitzroy island. White ...


In [12]:
testdf.iloc[5].url

'https://images.ala.org.au/image/proxyImageThumbnailLarge?imageId=e87dacb9-a0d7-458c-a18a-ae14cf3294c1'

In [13]:
testurl = testdf.iloc[0].url
urllib.request.urlretrieve(testurl, 'test.jpg')
img = Image.open('test.jpg')
# img.show()

# pythiaurl = 'http://fathomnet.org:7777/'

### ML Generated Bounding Boxes

In [26]:
!pip install ruamel-yaml pathlib

Collecting ruamel-yaml
  Downloading ruamel.yaml-0.17.21-py3-none-any.whl (109 kB)
     -------------------------------------- 109.5/109.5 kB 2.1 MB/s eta 0:00:00
Collecting pathlib
  Downloading pathlib-1.0.1-py3-none-any.whl (14 kB)
Collecting ruamel.yaml.clib>=0.2.6 (from ruamel-yaml)
  Downloading ruamel.yaml.clib-0.2.7-cp39-cp39-win_amd64.whl (118 kB)
     -------------------------------------- 118.4/118.4 kB 3.4 MB/s eta 0:00:00
Installing collected packages: pathlib, ruamel.yaml.clib, ruamel-yaml
Successfully installed pathlib-1.0.1 ruamel-yaml-0.17.21 ruamel.yaml.clib-0.2.7


In [15]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 --user

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.0.0%2Bcu118-cp39-cp39-win_amd64.whl (2611.4 MB)
     ---------------------------------------- 2.6/2.6 GB 1.7 MB/s eta 0:00:00
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.15.1%2Bcu118-cp39-cp39-win_amd64.whl (4.9 MB)
     ---------------------------------------- 4.9/4.9 MB 7.3 MB/s eta 0:00:00
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.0.1%2Bcu118-cp39-cp39-win_amd64.whl (2.5 MB)
     ---------------------------------------- 2.5/2.5 MB 6.0 MB/s eta 0:00:00
Installing collected packages: torch, torchvision, torchaudio
Successfully installed torch-2.0.0+cu118 torchaudio-2.0.1+cu118 torchvision-0.15.1+cu118


In [27]:
!pip install pyyaml==5.4.1 git+https://github.com/facebookresearch/detectron2.git --user

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to c:\users\tushar\appdata\local\temp\pip-req-build-jo5la9ij
  Resolved https://github.com/facebookresearch/detectron2.git to commit e020497c85873c2b811ac87dd2e4a34a806e4c2b
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git 'C:\Users\Tushar\AppData\Local\Temp\pip-req-build-jo5la9ij'


In [30]:
!pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.7.0.72-cp37-abi3-win_amd64.whl (38.2 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.7.0.72


In [34]:
!pip install wget



In [31]:
import torchvision                              # Library of datasets, models, and image transforms
import pickle                                   # Data serialization library
import json                                     # Data storage (JavaScript Object Notation)
import matplotlib.pyplot as plt                 # Plotting utilities
import torch                                    # Tensor library for manipulating large models and data
import requests                                 # Manages HTTP requests
import random                                   # Random number generator
import numpy as np                              # Array manipulations

# Import key functions & modules from detectron2
from detectron2 import model_zoo
from detectron2.data import Metadata
from detectron2.structures import BoxMode
from detectron2.utils.visualizer import Visualizer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
import detectron2.data.transforms as T

# Import from pyplot and PIL for easy plotting
from matplotlib.pyplot import imshow
from PIL import Image

In [37]:
!wget -nc https://zenodo.org/record/5571043/files/model_final.pth

--2023-05-01 11:57:07--  https://zenodo.org/record/5571043/files/model_final.pth
Resolving zenodo.org (zenodo.org)... 188.185.124.72
Connecting to zenodo.org (zenodo.org)|188.185.124.72|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 303011247 (289M) [application/octet-stream]
Saving to: 'model_final.pth'

     0K .......... .......... .......... .......... ..........  0%  248K 19m51s
    50K .......... .......... .......... .......... ..........  0%  255K 19m34s
   100K .......... .......... .......... .......... ..........  0% 3.96M 13m27s
   150K .......... .......... .......... .......... ..........  0% 7.21M 10m15s
   200K .......... .......... .......... .......... ..........  0%  269K 11m52s
   250K .......... .......... .......... .......... ..........  0% 11.6M 9m57s
   300K .......... .......... .......... .......... ..........  0%  244K 11m25s
   350K .......... .......... .......... .......... ..........  0% 6.79M 10m5s
   400K .......... .........

 49650K .......... .......... .......... .......... .......... 16% 8.40M 49s
 49700K .......... .......... .......... .......... .......... 16% 9.14M 49s
 49750K .......... .......... .......... .......... .......... 16% 13.4M 49s
 49800K .......... .......... .......... .......... .......... 16% 20.2M 49s
 49850K .......... .......... .......... .......... .......... 16% 10.9M 49s
 49900K .......... .......... .......... .......... .......... 16% 21.9M 49s
 49950K .......... .......... .......... .......... .......... 16% 15.4M 49s
 50000K .......... .......... .......... .......... .......... 16% 9.34M 49s
 50050K .......... .......... .......... .......... .......... 16% 7.83M 49s
 50100K .......... .......... .......... .......... .......... 16% 21.3M 49s
 50150K .......... .......... .......... .......... .......... 16% 16.3M 49s
 50200K .......... .......... .......... .......... .......... 16% 19.3M 49s
 50250K .......... .......... .......... .......... .......... 16% 9.76M 49s

174600K .......... .......... .......... .......... .......... 59% 8.44M 22s
174650K .......... .......... .......... .......... .......... 59% 13.2M 22s
174700K .......... .......... .......... .......... .......... 59% 17.5M 22s
174750K .......... .......... .......... .......... .......... 59% 15.1M 22s
174800K .......... .......... .......... .......... .......... 59% 14.3M 21s
174850K .......... .......... .......... .......... .......... 59% 9.81M 21s
174900K .......... .......... .......... .......... .......... 59% 11.2M 21s
174950K .......... .......... .......... .......... .......... 59% 13.9M 21s
175000K .......... .......... .......... .......... .......... 59% 5.48M 21s
175050K .......... .......... .......... .......... .......... 59% 84.3M 21s
175100K .......... .......... .......... .......... .......... 59% 6.30M 21s
175150K .......... .......... .......... .......... .......... 59% 15.6M 21s
175200K .......... .......... .......... .......... .......... 59%  889K 21s

224600K .......... .......... .......... .......... .......... 75% 7.69M 13s
224650K .......... .......... .......... .......... .......... 75% 10.7M 13s
224700K .......... .......... .......... .......... .......... 75% 2.41M 13s
224750K .......... .......... .......... .......... .......... 75% 16.5M 13s
224800K .......... .......... .......... .......... .......... 75% 8.52M 13s
224850K .......... .......... .......... .......... .......... 76% 12.4M 13s
224900K .......... .......... .......... .......... .......... 76% 19.9M 13s
224950K .......... .......... .......... .......... .......... 76% 15.0M 13s
225000K .......... .......... .......... .......... .......... 76% 8.57M 13s
225050K .......... .......... .......... .......... .......... 76%  465M 13s
225100K .......... .......... .......... .......... .......... 76% 19.5M 13s
225150K .......... .......... .......... .......... .......... 76% 8.28M 13s
225200K .......... .......... .......... .......... .......... 76% 7.11M 13s

In [38]:
!wget -nc https://zenodo.org/record/5571043/files/fathomnet_config_v2_1280.yaml

--2023-05-02 14:04:08--  https://zenodo.org/record/5571043/files/fathomnet_config_v2_1280.yaml
Resolving zenodo.org (zenodo.org)... 188.185.124.72
Connecting to zenodo.org (zenodo.org)|188.185.124.72|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1112 (1.1K) [application/octet-stream]
Saving to: 'fathomnet_config_v2_1280.yaml'

     0K .                                                     100% 1.15G=0s

2023-05-02 14:04:09 (1.15 GB/s) - 'fathomnet_config_v2_1280.yaml' saved [1112/1112]



In [39]:
CONFIG_FILE = "fathomnet_config_v2_1280.yaml"   # training configuration file
WEIGHT_FILE = "model_final.pth"                 # fathomnet model weights

NMS_THRESH = 0.45   # Set an NMS threshold to filter all the boxes proposed by the model
SCORE_THRESH = 0.3  # Set the model score threshold to suppress low confidence annotations