# Notebook to Access and Extract DataSets from the DynamoDB JGE Catalog

## Load Libraries

In [1]:
import pandas as pd
# Note: The Notebook must use the environment from ArcGIS Pro
#       In particular to be able to use the ArcPy library
from catalogs import jge_Catalog as jge
from pprint import pprint

## Display Settings

In [2]:
pd.set_option("display.max_colwidth", 90)
pd.set_option("display.max_columns", 10)
pd.set_option('expand_frame_repr', True)
pd.set_option('display.width', 240)

## Instantiate the Catalog

In [19]:
catalog = jge()  ## it loads by default the table 'jge-catalog-test-03'
print(catalog.getCatalogName())

jge-catalog-test-03


## List all existent Features in the Catalog

In [20]:
# retrieve current features in the catalog
df = catalog.getFeatures()
df.head()

Unnamed: 0,feature-name,s3-versionID,feature-type,s3-file-gdb-zip-location
0,GEOL_PorphyryCu_deposits_JGEM,4la81_Iu4yia9He79nGSVUNnR371kNAR,File GeoDatabase Feature Class,s3://jge-catalog-lake/Approved_Data/Cu_Geology/JGE_Template_20201022.gdb.zip
1,GEOL_Jurassic_Early_Cretaceous_vol_intru_fault_dist_JGEM,UJXoAHMzd2cUfa8dfQY8hAp0dDagIlyP,File GeoDatabase Raster Dataset,s3://jge-catalog-lake/Approved_Data/Pet_Reservoir/JGE_Template_20201026.gdb.zip
2,GEOL_Carbonate_Reservoir_Prediction_Enhanced_Feature_All_Ages_Mean_JGEM,MvGUP_VSKZGtva5whWafs9gTnt5aO6Sz,File GeoDatabase Raster Dataset,s3://jge-catalog-lake/Approved_Data/Pet_Reservoir/Pet_Reservoir.gdb.zip


## Dump the Catalog as a JSON document

In [5]:
# catalog_entries = catalog.dumps()
# for entry in catalog_entries:
#     print(120*'-')
#     pprint(entry)

## Test whether the object exist with that key in the Catalog

In [6]:
# test -- not registered as a Raster Dataset
featureName = 'GEOL_PorphyryCu_deposits_JGEM'
versionID = 'i-am-not-the-right-key-btw' # 'NHfZ8v08o5btxcRBAmoroWYi01OVciqC'  # 'dasdfjfhkjsdhflkajhdjlaksh'
md = catalog.getMetaData(featureName, versionID)
if len(md) != 0:
    pprint(md, indent=1)
else:
    print(f'{featureName} does not exist under the key: {versionID}')

GEOL_PorphyryCu_deposits_JGEM does not exist under the key: i-am-not-the-right-key-btw


In [21]:
versionID = '4la81_Iu4yia9He79nGSVUNnR371kNAR'
md = catalog.getMetaData(featureName, versionID)
if len(md) != 0:
    pprint(md[0], indent=1)
else:
    print(f'{featureName} does not exist under the key: {versionID}')

GEOL_Carbonate_Reservoir_Prediction_Enhanced_Feature_All_Ages_Mean_JGEM does not exist under the key: 4la81_Iu4yia9He79nGSVUNnR371kNAR


In [22]:
featureName = \
    'GEOL_Carbonate_Reservoir_Prediction_Enhanced_Feature_All_Ages_Mean_JGEM'
versionID = 'MvGUP_VSKZGtva5whWafs9gTnt5aO6Sz'
md = catalog.getMetaData(featureName, versionID)
print(f"Feature: {featureName}\nVersionID: {versionID}")
pprint(md[0], indent=1)

Feature: GEOL_Carbonate_Reservoir_Prediction_Enhanced_Feature_All_Ages_Mean_JGEM
VersionID: MvGUP_VSKZGtva5whWafs9gTnt5aO6Sz
{'NodataValue': '-10000000000',
 'commodity-type': '',
 'critical-process-stage': '',
 'feature-group-name': '',
 'feature-units': '',
 'label-variable-type': '',
 'max-abs-age': '',
 'min-abs-age': '',
 'null-data-reassignment-value': '',
 'rotation': '',
 'timescale': ''}


## Retrieve a Dataset from the Catalog using a versionID (s3-versionId)

In [23]:
ds = catalog.retrieveDataset('4la81_Iu4yia9He79nGSVUNnR371kNAR')
print(ds[0].keys())

dict_keys(['GEOL_Jurassic_Early_Cretaceous_vol_intru_fault_dist_JGEM', 'GEOL_Carbonate_Reservoir_Prediction_Enhanced_Feature_All_Ages_Mean_JGEM', 'GEOL_PorphyryCu_deposits_JGEM', 'RESO_FISHNET_IHS_Rystad_Discovered_JGEM'])


### Get the s3-versionId of the object inside the GDB zipped file on the S3 Bucket

In [10]:
print(ds[0]['GEOL_Jurassic_Early_Cretaceous_vol_intru_fault_dist_JGEM']['s3-versionID'])

4la81_Iu4yia9He79nGSVUNnR371kNAR


### Get the dictionary & dataset for the individual TIF or SHP

In [11]:
ds[0]['GEOL_Jurassic_Early_Cretaceous_vol_intru_fault_dist_JGEM']

{'s3-versionID': '4la81_Iu4yia9He79nGSVUNnR371kNAR',
 'feature-type': 'Raster Dataset',
 'shape': (447, 799),
 'columns': [],
 'data': [array([[ 5789.29219969,  5789.29219969,  5789.29219969, ...,
              0.        ,     0.        ,     0.        ],
         [ 5789.29219969,  5789.29219969,  5789.29219969, ...,
              0.        ,     0.        ,     0.        ],
         [ 5789.29219969,  5789.29219969,  5789.29219969, ...,
              0.        ,     0.        ,     0.        ],
         ...,
         [15022.75839471, 15022.75839471, 15022.75839471, ...,
              0.        ,     0.        ,     0.        ],
         [15022.75839471, 15022.75839471, 15022.75839471, ...,
              0.        ,     0.        ,     0.        ],
         [15022.75839471, 15022.75839471, 15022.75839471, ...,
              0.        ,     0.        ,     0.        ]])]}

In [12]:
ds[0]['GEOL_PorphyryCu_deposits_JGEM'].keys()

dict_keys(['s3-versionID', 'feature-type', 'shape', 'columns', 'data'])

In [24]:
ds[0]['GEOL_PorphyryCu_deposits_JGEM']['shape']

(747,)

In [14]:
columns = ds[0]['GEOL_PorphyryCu_deposits_JGEM']['columns']
columns[:7]

['OBJECTID',
 'Shape',
 'Date_Last__Published',
 'ID_No',
 'Deposit_Name',
 'Alternative_Name',
 'Tier']

In [15]:
ds[0]['GEOL_PorphyryCu_deposits_JGEM']['data'][0][-3:]

array([(745, [ 111.6537,   22.174 ], '2017-09-09T00:00:00.000000', 48560, 'SHILU', '', 4,  22.174 ,  111.6537, '<100m', 'Skarn', '', 'Skarn', 'Moderate', 'Cu,Mo,Ag', 'Copper', 'BASE METAL', '11.343805', '0.916799962622771', '1.39763477875563', 'CHINA', 'China', 'Closed Mine', 'Open Pit', 1961, '1979', '1987', 'Lower Cretaceous', 'Cretaceous', 'Mesozoic', 'Mesozoic', 'Phanerozoic', 105.3, 107.9, '106.6', 'High', '', 28026000., '-', '-', '-', '-', 28026000., 0., 0., 0., 20312879., '-', '-', '-', '-', 11343805., '-', '104000', '23821.9905', '-', '158544.96391422', '1.39763477875563', '-', 104000., '23821.9905', '-'),
       (746, [-118.6036,   48.9961], '2018-08-13T00:00:00.000000', 31100, 'LONE STAR', 'Danville', 4,  48.9961, -118.6036, '<100m', 'Porphyry', '', 'Porphyry', 'Moderate', 'Cu,Ag', 'Copper', 'BASE METAL', '7.4', '1.4', '1.4', 'NTH AM', 'United States', 'Undeveloped Deposit', 'Both Open Pit & Underground', 1897, '', '', 'Eocene', 'Paleogene', 'Cenozoic', 'Cenozoic', 'Phanerozo

In [16]:
data = list(ds[0]['GEOL_PorphyryCu_deposits_JGEM']['data'][0][-5:])
data

[(743, [126.066,   7.48 ], '2009-07-27T00:00:00.000000', 405, 'SABENA-TAGPURA', 'Also see nearby Sabena-Batoto, also nearby Maangob, Kalmatan / Kalamantan porphyries', 4, 7.48, 126.066, '1-5km', 'Porphyry', '', 'Porphyry', 'Moderate', 'Cu,Au,Ag', 'Copper', 'BASE METAL', '28.026', '0.387005', '0.528995649271795', 'PAC / SEA', 'Philippines', 'Closed Mine', 'Open Pit', 1969, '1979', '1981', '', 'Neogene', 'Cenozoic', 'Cenozoic', 'Phanerozoic', 2.58, 23.03, '12.805', 'Low', '', 28026000., '187068.980793838', '108462.0213', '-', '-', 28026000., 187076., 108462., 0., 20312879., '-', '-', '-', '-', 3500000., '-', '-', '-', '-', '148256.320664913', '0.528995649271795', '187068.980793838', 108462.0213, '-', '-'),
 (744, [-126.7142,   57.2122], '2013-10-28T00:00:00.000000', 45871, 'PINE', 'Fin Prospect', 4, 57.2122, -126.7142, '100-500m', 'Porphyry', '', 'Porphyry', 'Major', 'Au,Cu,Zn', 'Gold', 'GOLD', '70', '0.15', '0.664809497502489', 'NTH AM', 'Canada', 'Undeveloped Deposit', 'Open Pit', 1968

In [17]:
# create DataFrame using data 
df = pd.DataFrame.from_records(data, columns=columns)
df.head()

Unnamed: 0,OBJECTID,Shape,Date_Last__Published,ID_No,Deposit_Name,...,Pre_Mine_GRADE_Cu_eq_t,Pre_Mine_Gold_oz,Pre_Mine_Copper_t,Pre_Mine_Moly_t,Pre_Mine_Cobalt_t
0,743,"[126.06600000000003, 7.480000000000075]",2009-07-27,405,SABENA-TAGPURA,...,0.528995649271795,187068.980793838,108462.0213,-,-
1,744,"[-126.71419999999995, 57.21220000000005]",2013-10-28,45871,PINE,...,0.664809497502489,1282768.46511448,105000.0,-,-
2,745,"[111.65370000000007, 22.174000000000035]",2017-09-09,48560,SHILU,...,1.39763477875563,-,104000.0,23821.9905,-
3,746,"[-118.60359999999997, 48.99610000000007]",2018-08-13,31100,LONE STAR,...,1.4,-,103600.0,-,-
4,747,"[151.8510000000001, -25.640999999999963]",2016-01-22,753,COALSTOUN,...,0.382267311988086,-,102677.0,-,-
