In [1]:
import pandas as pd

In [2]:
media = pd.read_hdf('/mnt/gbif/clean_data.h5', 'media_merged_filtered-by-species_350pt')

In [3]:
from parallel_download import download

In [4]:
media.columns.tolist()

['gbifID',
 'format',
 'identifier_media',
 'identifier_obs',
 'eventDate',
 'decimalLatitude',
 'decimalLongitude',
 'taxonID',
 'taxonRank',
 'taxonomicStatus',
 'taxonKey',
 'acceptedTaxonKey',
 'phylumKey',
 'classKey',
 'orderKey',
 'familyKey',
 'genusKey',
 'speciesKey',
 'species',
 'acceptedScientificName',
 'verbatimScientificName',
 'media_count_per_taxonID']

In [5]:
import mimetypes as mt

In [6]:
mt.add_type('image/pjpeg', '.jpg')

In [7]:
media['extension'] = media['format'].map(lambda x: mt.guess_extension(x, strict=False))

In [8]:
media['extension'].value_counts(dropna=False)

.jpg    663805
.png      1991
.gif         7
Name: extension, dtype: int64

In [9]:
errors_df = pd.read_csv('errors.4.txt', delim_whitespace=True, header=None)
errors_df.columns = ['index']

In [10]:
errors_df

Unnamed: 0,index
0,232
1,576025


In [11]:
media.head()

Unnamed: 0,gbifID,format,identifier_media,identifier_obs,eventDate,decimalLatitude,decimalLongitude,taxonID,taxonRank,taxonomicStatus,...,classKey,orderKey,familyKey,genusKey,speciesKey,species,acceptedScientificName,verbatimScientificName,media_count_per_taxonID,extension
0,891021265,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,119647,2012-08-31 00:00:00,38.036174,-122.755828,147221,SPECIES,ACCEPTED,...,180.0,1048.0,4812.0,8360956.0,2608086.0,Niebla homalea,Niebla homalea (Ach.) Rundel & Bowler,Niebla homalea,865,.jpg
5,891026285,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,344410,2013-07-23 09:36:00,38.58273,-122.69666,54134,SPECIES,ACCEPTED,...,186.0,1145.0,3286.0,2519084.0,2548311.0,Trametes versicolor,Trametes versicolor (L.) Lloyd,Trametes versicolor,43873,.jpg
6,891026330,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,345889,2013-07-25 19:33:13,43.627681,-72.529947,63274,SPECIES,ACCEPTED,...,186.0,1145.0,3290.0,2519220.0,7241318.0,Ganoderma tsugae,Ganoderma tsugae Murrill,Ganoderma tsugae,6970,.jpg
7,891027410,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,381407,2013-08-19 11:10:07,40.499055,-121.40242,54608,SPECIES,ACCEPTED,...,180.0,1048.0,8305.0,2605402.0,2605405.0,Letharia columbiana,Letharia columbiana (Nutt.) J.W.Thomson,Letharia columbiana,2011,.jpg
8,891027410,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,381407,2013-08-19 11:10:07,40.499055,-121.40242,54608,SPECIES,ACCEPTED,...,180.0,1048.0,8305.0,2605402.0,2605405.0,Letharia columbiana,Letharia columbiana (Nutt.) J.W.Thomson,Letharia columbiana,2011,.jpg


In [12]:
errors_df = media[media.index.isin(errors_df['index'])]

In [13]:
errors_df

Unnamed: 0,gbifID,format,identifier_media,identifier_obs,eventDate,decimalLatitude,decimalLongitude,taxonID,taxonRank,taxonomicStatus,...,classKey,orderKey,familyKey,genusKey,speciesKey,species,acceptedScientificName,verbatimScientificName,media_count_per_taxonID,extension
232,891745805,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,413221,2013-09-24 00:00:00,49.063312,-120.791845,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
576025,1572327681,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,1481463,2015-05-10 11:42:25,37.247084,-96.974725,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg


In [14]:
# download_sequential( zip ( media['identifier_media'].to_numpy(), media.index.to_numpy() ) )
download( zip (
    errors_df['identifier_media'].to_numpy(),
    errors_df.index.to_numpy(),
    errors_df['extension'],
), parallel=True )

In [15]:
print('done')

done


In [16]:
errors_df.columns

Index(['gbifID', 'format', 'identifier_media', 'identifier_obs', 'eventDate',
       'decimalLatitude', 'decimalLongitude', 'taxonID', 'taxonRank',
       'taxonomicStatus', 'taxonKey', 'acceptedTaxonKey', 'phylumKey',
       'classKey', 'orderKey', 'familyKey', 'genusKey', 'speciesKey',
       'species', 'acceptedScientificName', 'verbatimScientificName',
       'media_count_per_taxonID', 'extension'],
      dtype='object')

In [17]:
errors_df['acceptedScientificName']

232             Coprinus comatus (O.F.Müll.) Pers.
576025    Lysurus periphragmoides (Klotzsch) Dring
Name: acceptedScientificName, dtype: object

In [20]:
media[media['acceptedScientificName'] == 'Coprinus comatus (O.F.Müll.) Pers.']

Unnamed: 0,gbifID,format,identifier_media,identifier_obs,eventDate,decimalLatitude,decimalLongitude,taxonID,taxonRank,taxonomicStatus,...,classKey,orderKey,familyKey,genusKey,speciesKey,species,acceptedScientificName,verbatimScientificName,media_count_per_taxonID,extension
35,891056835,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,556755,2014-03-07 09:30:00,38.455367,-122.720133,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
36,891056835,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,556755,2014-03-07 09:30:00,38.455367,-122.720133,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
106,891108790,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,152540,2008-09-09 00:00:00,48.938739,-118.538361,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
116,891109625,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,154817,2012-10-31 13:51:19,47.730333,-122.252000,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
117,891109625,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,154817,2012-10-31 13:51:19,47.730333,-122.252000,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27174,1837064895,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,10395155,2017-10-07 18:54:00,51.541280,9.915804,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
27260,1837075895,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,10428975,2018-03-26 18:17:00,-43.307942,172.678803,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
27355,1837087265,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,10474627,2017-09-29 10:21:00,49.920823,16.330277,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg
27740,1837764200,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,10586960,2018-03-10 11:35:51,-43.520803,172.666580,47392,SPECIES,ACCEPTED,...,186.0,1499.0,4170.0,2526779.0,7987658.0,Coprinus comatus,Coprinus comatus (O.F.Müll.) Pers.,Coprinus comatus,32911,.jpg


In [21]:
!cp /mnt/gbif/media/35.jpg /mnt/gbif/media/232.jpg

In [22]:
media[media['acceptedScientificName'] == 'Lysurus periphragmoides (Klotzsch) Dring']

Unnamed: 0,gbifID,format,identifier_media,identifier_obs,eventDate,decimalLatitude,decimalLongitude,taxonID,taxonRank,taxonomicStatus,...,classKey,orderKey,familyKey,genusKey,speciesKey,species,acceptedScientificName,verbatimScientificName,media_count_per_taxonID,extension
12841,1571066925,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,5286144,2017-01-20 10:41:00,30.241245,-97.696367,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
12842,1571066925,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,5286144,2017-01-20 10:41:00,30.241245,-97.696367,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
12843,1571066925,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,5286144,2017-01-20 10:41:00,30.241245,-97.696367,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
12844,1571066925,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,5286144,2017-01-20 10:41:00,30.241245,-97.696367,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
41811,1914219885,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,16845551,2018-09-23 15:00:33,33.121983,-96.874800,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1414528,3321247515,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,85199748,2021-05-21 10:38:19,30.443492,-98.060197,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
1445483,3384093475,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,94443075,2021-09-11 09:50:13,33.214187,-97.146343,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
1445484,3384093475,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,94443075,2021-09-11 09:50:13,33.214187,-97.146343,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg
1453211,3384763005,image/jpeg,https://inaturalist-open-data.s3.amazonaws.com...,95869897,2021-09-23 12:44:54,39.935356,-74.072395,338070,SPECIES,ACCEPTED,...,186.0,532.0,3292.0,2524086.0,5445796.0,Lysurus periphragmoides,Lysurus periphragmoides (Klotzsch) Dring,Lysurus periphragmoides,637,.jpg


In [23]:
!cp /mnt/gbif/media/12841.jpg /mnt/gbif/media/576025.jpg