#### 與iNaturalist 資料比對的Ebird驗證資料集
- 按照iNaturalist 物種名錄(num_sp=1486)
    - 每種最多選10張


In [2]:
import sys 
import os
import pandas as pd
import numpy as np
from pathlib import Path 
from PIL import Image
import shutil
import time
import datetime


print(pd.__version__)
print(np.__version__)
!python -V
!pwd

dir_meta = Path('meta')
dir_meta.mkdir(exist_ok=True, parents=True)


1.4.1
1.22.2
Python 3.9.7
/home/esslab/AI_projects/shared/eBird/download_ebird


## work pipiline
1. 根據iNaturalist 2021 dayaset取得鳥類物種名錄與流水號
2. 產出`ebird_1486_selected/val` 目錄 
3. 根據物種名，至ebird meta 資料取得對應得物種名
4. 依據該物種名索引，參照`Average Community Rating`與`Number of Ratings`取top10
5. 照檔案路徑取得資料後複製到所屬的資料夾

### 1. 根據iNaturalist 2021 dayaset取得鳥類物種名錄與流水號


In [None]:
!ls downloaded/ebird_1486_sel/val -l


In [89]:
dir_iNat = Path('../../../shared/iNaturalist_2021') 
iNat_val = dir_iNat/'val'
dir_name_iNatVal  = [dir_.name for dir_ in iNat_val.iterdir()] 
print(len(dir_name_iNatVal))

# get Specie & Family list 
sp_list = [file.split('_')[-2] + ' ' + file.split('_')[-1]  
           for file in dir_name_iNatVal]

family_list = [file.split('_')[-3] for file in dir_name_iNatVal]

df_iNat_val =  pd.concat([pd.DataFrame(dir_name_iNatVal),
                          pd.DataFrame(family_list),
                          pd.DataFrame(sp_list)],
              axis='columns').set_axis(['Dir','Family','Specie'],axis='columns')
df_iNat_val

1486


Unnamed: 0,Dir,Family,Specie
0,03908_Animalia_Chordata_Aves_Passeriformes_Lan...,Laniidae,Lanius collurio
1,04326_Animalia_Chordata_Aves_Passeriformes_Tyr...,Tyrannidae,Todirostrum cinereum
2,04026_Animalia_Chordata_Aves_Passeriformes_Par...,Paridae,Baeolophus atricristatus
3,03284_Animalia_Chordata_Aves_Bucerotiformes_Ph...,Phoeniculidae,Phoeniculus purpureus
4,03111_Animalia_Chordata_Aves_Accipitriformes_A...,Accipitridae,Accipiter badius
...,...,...,...
1481,03388_Animalia_Chordata_Aves_Charadriiformes_J...,Jacanidae,Hydrophasianus chirurgus
1482,03859_Animalia_Chordata_Aves_Passeriformes_Hir...,Hirundinidae,Riparia riparia
1483,03362_Animalia_Chordata_Aves_Charadriiformes_C...,Charadriidae,Charadrius tricollaris
1484,04556_Animalia_Chordata_Aves_Strigiformes_Stri...,Strigidae,Ninox novaeseelandiae


### 2. 產出ebird_1486_selected/val 目錄
- 物種目錄參照iNaturalist 

In [None]:
# mkdir ebird_1486_selected/val
ebird_val = dir_iNat/'val_ebird'

for dir_ in  dir_name_iNatVal:
    dir_ = ebird_val/dir_
    dir_.mkdir(exist_ok=True, parents=True)
    print(f'{dir_} maked')

### 3. 根據物種名，至ebird meta 資料取得對應得物種名

#### load ebird meta

In [111]:
%%time
meta_file = dir_meta/'jpg_top100_meta3.csv'
df_jpg100_meta = pd.read_csv(meta_file, index_col=0, 
                            #  dtype=column_dtypes
                             )
df_jpg100_meta.info()



<class 'pandas.core.frame.DataFrame'>
Int64Index: 896555 entries, 0 to 896554
Data columns (total 62 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   ML Catalog Number            896555 non-null  int64  
 1   Dir_FName                    896555 non-null  object 
 2   Parent_Dir                   896555 non-null  object 
 3   Size                         896555 non-null  float64
 4   Sci_N                        896555 non-null  object 
 5   Macaulay_public              37 non-null      float64
 6   Format                       896521 non-null  object 
 7   Scientific Name              896535 non-null  object 
 8   Common Name                  896521 non-null  object 
 9   Background Species           1807 non-null    object 
 10  Recordist                    896478 non-null  object 
 11  Date                         884239 non-null  object 
 12  Year                         884239 non-null  float64
 13 

In [123]:
col_basic =  ['ML Catalog Number','Dir_FName', 'Parent_Dir', 'Sci_N',]
# col_geo = ['County', 'Locality', 'Latitude','Longitude']
col_taxon = ['Scientific Name',  'Parent Species', 'Taxon Category']
col_rating = ['Number of Ratings', 'Average Community Rating',]

In [127]:
df_jpg100_meta_sel = df_jpg100_meta[col_basic + col_taxon + col_rating]
df_jpg100_meta_sel

Unnamed: 0,ML Catalog Number,Dir_FName,Parent_Dir,Sci_N,Scientific Name,Parent Species,Taxon Category,Number of Ratings,Average Community Rating
0,114289801,Acanthisittidae/Acanthisitta_chloris_0_114289801,Acanthisittidae,Acanthisitta_chloris_0,Acanthisitta chloris,Acanthisitta chloris,Species,2.0,4.000
1,114289821,Acanthisittidae/Acanthisitta_chloris_0_114289821,Acanthisittidae,Acanthisitta_chloris_0,Acanthisitta chloris,Acanthisitta chloris,Species,2.0,4.000
2,114404941,Acanthisittidae/Acanthisitta_chloris_0_114404941,Acanthisittidae,Acanthisitta_chloris_0,Acanthisitta chloris,Acanthisitta chloris,Species,2.0,4.500
3,114404951,Acanthisittidae/Acanthisitta_chloris_0_114404951,Acanthisittidae,Acanthisitta_chloris_0,Acanthisitta chloris,Acanthisitta chloris,Species,2.0,4.000
4,115495451,Acanthisittidae/Acanthisitta_chloris_0_115495451,Acanthisittidae,Acanthisitta_chloris_0,Acanthisitta chloris,Acanthisitta chloris,Species,21.0,4.906
...,...,...,...,...,...,...,...,...,...
896550,59630711,Zosteropidae/Zosterornis_whiteheadi_0_59630711,Zosteropidae,Zosterornis_whiteheadi_0,Zosterornis whiteheadi,Zosterornis whiteheadi,Species,1.0,3.000
896551,59630721,Zosteropidae/Zosterornis_whiteheadi_0_59630721,Zosteropidae,Zosterornis_whiteheadi_0,Zosterornis whiteheadi,Zosterornis whiteheadi,Species,0.0,0.000
896552,63554951,Zosteropidae/Zosterornis_whiteheadi_0_63554951,Zosteropidae,Zosterornis_whiteheadi_0,Zosterornis whiteheadi,Zosterornis whiteheadi,Species,1.0,3.000
896553,79738531,Zosteropidae/Zosterornis_whiteheadi_0_79738531,Zosteropidae,Zosterornis_whiteheadi_0,Zosterornis whiteheadi,Zosterornis whiteheadi,Species,4.0,3.000


#### checking sp list

In [139]:
sp_iNat =  df_iNat_val.Specie.unique()

sp_ebird = df_jpg100_meta_sel['Parent Species'].unique()

set_diff =  set(sp_iNat) - set(sp_ebird)
print(f'#_sp in iNat  : {len(sp_iNat)}')
print(f'#_sp in ebird : {len(sp_ebird)}')
print(f'#_sp_lack in ebird : {len(set_diff):,d}, {set_diff}') 

assert len(set_diff)==0

#_sp in iNat  : 1486
#_sp in ebird : 11215
#_sp_lack in ebird : 0, set()


### 4. 依據該物種名索引取得檔案路徑
- 參照`Average Community Rating`與`Number of Ratings`取top10

In [160]:
# sp_ = 'Lanius collurio' # 'Accipiter badius'
sp_ = 'Hydrophasianus chirurgus'

def get_path(df_:pd.DataFrame, sp_:str)->tuple:
    mask = df_['Parent Species'] == sp_
    df_ = (df_[mask]
           .sort_values(by=col_rating, ascending=False)
           .head(10))
    
    return df_['Dir_FName'].values
    # return df_ 

# print(sp_)
paths = get_path(df_jpg100_meta_sel, sp_)
paths

array(['Jacanidae/Hydrophasianus_chirurgus_0_714944',
       'Jacanidae/Hydrophasianus_chirurgus_0_205014781',
       'Jacanidae/Hydrophasianus_chirurgus_0_206055751',
       'Jacanidae/Hydrophasianus_chirurgus_0_205593231',
       'Jacanidae/Hydrophasianus_chirurgus_0_108262961',
       'Jacanidae/Hydrophasianus_chirurgus_0_115911651',
       'Jacanidae/Hydrophasianus_chirurgus_0_126261371',
       'Jacanidae/Hydrophasianus_chirurgus_0_139014661',
       'Jacanidae/Hydrophasianus_chirurgus_0_243700561',
       'Jacanidae/Hydrophasianus_chirurgus_0_26123191'], dtype=object)

### 5. 照檔案路徑取得資料後複製到所屬的資料夾

In [150]:
dir_ebird = Path('downloaded/jpg_top100')

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
encoder.to(device)

start_time = time.time()
for i, (images, *_) in enumerate(data_loader):
    images = images.to(device)
    
    with torch.no_grad():
        embedding_ = encoder.forward_features(images)
        # embedding_ = embedding_.detach().cpu().numpy()

    if i == 0 :
        embeddings = embedding_
    else:
        embeddings = torch.cat((embeddings, embedding_), axis=0)    
    
    passing_time = str(datetime.timedelta(seconds=int(time.time() - start_time )))
    info = f'Progress:{i+1:4d}, {100*(i+1)/len(data_loader):3.2f}%. ' 
    info += f'| Feature_shape: {embeddings.shape}.' 
    info += f'| Time: {passing_time}. '
    print(info, end='\r')


In [206]:
start_time = time.time()
for idx, rows in df_iNat_val.iterrows():
    dir_, family_, sp_ = rows
    
    paths = get_path(df_jpg100_meta_sel, sp_)
    for idx_p, path in enumerate(paths):
        src = dir_ebird.joinpath(path  + '.jpg')
        dst = ebird_val.joinpath(dir_, Path(path).name + '.jpg')
        shutil.copyfile(src, dst)
        # print(f'\t{idx_p:2d}, Copy imgs : {Path(path).name}')
    
    passing_time = str(datetime.timedelta(seconds=int(time.time() - start_time )))
    # print(f'{idx:4d}, {family_:10s}, {sp_:15s}, data dst : {dir_}')
    info = f'Progress : {idx:4d}, Time: {passing_time} '
    info += f'| Taxon: {family_:15s}, {sp_:25s}, n:{len(paths)}  '
    info += f'| Data dst : {dir_:<40s}'
    print(info, end='\r')
    # break

Progress : 1485, Time: 0:01:20 | Taxon: Alcedinidae    , Megaceryle alcyon        , n:10  | Data dst : 03557_Animalia_Chordata_Aves_Coraciiformes_Alcedinidae_Megaceryle_alcyoneollaristelisrissisuss

## 建立 meta data
－'AI_projects/shared/iNaturalist_2021/meta ebird_val.csv'

In [126]:
%%time
meta_file =  'meta/jpg_top100_meta4.csv'
df_jpg100_meta =pd.read_csv(meta_file, index_col=0)
df_jpg100_meta



CPU times: user 6.8 s, sys: 1.33 s, total: 8.13 s
Wall time: 8.14 s


Unnamed: 0,ML_Catalog_Number,Dir_FName,Parent_Dir,Size,Sci_N,Macaulay_public,Format,Sci_Name,Common_Name,Background_Species,...,Original_Image_Height,Original_Image_Width,Specimen_Page_URL,eBird_Checklist_URL,ebird_Species_URL,isInternalUser,mediaDownloadUrl,report_as,Genus,SCI_NAME_2021
0,114289801,Acanthisittidae/Acanthisitta_chloris_0_114289801,Acanthisittidae,287.80,Acanthisitta_chloris_0,,Photo,Acanthisitta chloris,Rifleman,,...,2836.0,4120.0,https://macaulaylibrary.org/asset/114289801,https://ebird.org/view/checklist/S26591349,https://ebird.org/species/riflem1,False,https://cdn.download.ams.birds.cornell.edu/api...,riflem1,Acanthisitta,Acanthisitta chloris
1,114289821,Acanthisittidae/Acanthisitta_chloris_0_114289821,Acanthisittidae,205.43,Acanthisitta_chloris_0,,Photo,Acanthisitta chloris,Rifleman,,...,4000.0,6000.0,https://macaulaylibrary.org/asset/114289821,https://ebird.org/view/checklist/S26591349,https://ebird.org/species/riflem1,False,https://cdn.download.ams.birds.cornell.edu/api...,riflem1,Acanthisitta,Acanthisitta chloris
2,114404941,Acanthisittidae/Acanthisitta_chloris_0_114404941,Acanthisittidae,335.58,Acanthisitta_chloris_0,,Photo,Acanthisitta chloris,Rifleman,,...,1665.0,2500.0,https://macaulaylibrary.org/asset/114404941,https://ebird.org/view/checklist/S48312775,https://ebird.org/species/riflem1,False,https://cdn.download.ams.birds.cornell.edu/api...,riflem1,Acanthisitta,Acanthisitta chloris
3,114404951,Acanthisittidae/Acanthisitta_chloris_0_114404951,Acanthisittidae,261.37,Acanthisitta_chloris_0,,Photo,Acanthisitta chloris,Rifleman,,...,1669.0,2500.0,https://macaulaylibrary.org/asset/114404951,https://ebird.org/view/checklist/S48312775,https://ebird.org/species/riflem1,False,https://cdn.download.ams.birds.cornell.edu/api...,riflem1,Acanthisitta,Acanthisitta chloris
4,115495451,Acanthisittidae/Acanthisitta_chloris_0_115495451,Acanthisittidae,97.31,Acanthisitta_chloris_0,,Photo,Acanthisitta chloris,Rifleman,,...,1565.0,2400.0,https://macaulaylibrary.org/asset/115495451,https://ebird.org/view/checklist/S48500276,https://ebird.org/species/riflem1,False,https://cdn.download.ams.birds.cornell.edu/api...,riflem1,Acanthisitta,Acanthisitta chloris
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
896550,59630711,Zosteropidae/Zosterornis_whiteheadi_0_59630711,Zosteropidae,211.91,Zosterornis_whiteheadi_0,,Photo,Zosterornis whiteheadi,Chestnut-faced Babbler,,...,1983.0,2976.0,https://macaulaylibrary.org/asset/59630711,https://ebird.org/view/checklist/S35659495,https://ebird.org/species/chfbab1,False,https://cdn.download.ams.birds.cornell.edu/api...,chfbab1,Zosterornis,Zosterornis whiteheadi
896551,59630721,Zosteropidae/Zosterornis_whiteheadi_0_59630721,Zosteropidae,194.77,Zosterornis_whiteheadi_0,,Photo,Zosterornis whiteheadi,Chestnut-faced Babbler,,...,1848.0,2772.0,https://macaulaylibrary.org/asset/59630721,https://ebird.org/view/checklist/S35659495,https://ebird.org/species/chfbab1,False,https://cdn.download.ams.birds.cornell.edu/api...,chfbab1,Zosterornis,Zosterornis whiteheadi
896552,63554951,Zosteropidae/Zosterornis_whiteheadi_0_63554951,Zosteropidae,254.81,Zosterornis_whiteheadi_0,,Photo,Zosterornis whiteheadi,Chestnut-faced Babbler,,...,841.0,1500.0,https://macaulaylibrary.org/asset/63554951,https://ebird.org/view/checklist/S38159117,https://ebird.org/species/chfbab1,False,https://cdn.download.ams.birds.cornell.edu/api...,chfbab1,Zosterornis,Zosterornis whiteheadi
896553,79738531,Zosteropidae/Zosterornis_whiteheadi_0_79738531,Zosteropidae,251.04,Zosterornis_whiteheadi_0,,Photo,Zosterornis whiteheadi,Chestnut-faced Babbler,,...,829.0,1500.0,https://macaulaylibrary.org/asset/79738531,https://ebird.org/view/checklist/S41419807,https://ebird.org/species/chfbab1,False,https://cdn.download.ams.birds.cornell.edu/api...,chfbab1,Zosterornis,Zosterornis whiteheadi


In [198]:
df_meta_iNat = pd.read_csv('../../iNaturalist_2021/meta/val.csv', index_col=0)
df_meta_iNat.set_axis(df_meta_iNat.columns.str.capitalize(), axis='columns', inplace=True)
df_meta_iNat.columns

Index(['Id', 'Category_id', 'Width', 'Height', 'File_name', 'License',
       'Rights_holder', 'Date', 'Latitude', 'Longitude',
       'Location_uncertainty', 'Name', 'Common_name', 'Supercategory',
       'Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus',
       'Specific_epithet', 'Image_dir_name'],
      dtype='object')

In [141]:
data_path = '../../iNaturalist_2021'
val_data = 'val_ebird'
    
imgs_path = Path( data_path, val_data)

!find  {str(imgs_path)} -type f |grep -c jpg
# delete .ipynb files
!find  {str(imgs_path)} -type f |grep .ipynb  | xargs rm -v  

files_path_ = [f for f in imgs_path.glob('**/*') if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
files_path_.sort()
# exlude .ipynb files
files_path_ = [f for f in files_path_ if not f.name.startswith('.')] 

files_path_ = [('/').join(f.parts[3:]) for f in files_path_]

print(len(files_path_))

14860
rm: missing operand
Try 'rm --help' for more information.
14860


In [143]:
print(df_meta_iNat.File_name[0])
df_meta_iNat.Image_dir_name[0]

val/03938_Animalia_Chordata_Aves_Passeriformes_Meliphagidae_Ptilotula_penicillata/df8edd4c-fbb4-4886-8600-a429e5efac23.jpg


'03938_Animalia_Chordata_Aves_Passeriformes_Meliphagidae_Ptilotula_penicillata'

In [144]:
df_meta_iNat.sort_values(by=['Image_dir_name'])

Unnamed: 0,Id,Category_id,Width,Height,File_name,License,Rights_holder,Date,Latitude,Longitude,...,Common_name,Supercategory,Kingdom,Phylum,Class,Order,Family,Genus,Specific_epithet,Image_dir_name
8854,2742608,3111,500,374,val/03111_Animalia_Chordata_Aves_Accipitriform...,1,rguinness,2019-04-22 10:29:00+00:00,-15.78316,34.99626,...,Shikra,Birds,Animalia,Chordata,Aves,Accipitriformes,Accipitridae,Accipiter,badius,03111_Animalia_Chordata_Aves_Accipitriformes_A...
8859,2783801,3111,500,500,val/03111_Animalia_Chordata_Aves_Accipitriform...,0,Renju,2019-06-29 12:16:14+00:00,9.79733,76.28338,...,Shikra,Birds,Animalia,Chordata,Aves,Accipitriformes,Accipitridae,Accipiter,badius,03111_Animalia_Chordata_Aves_Accipitriformes_A...
8858,2782750,3111,500,327,val/03111_Animalia_Chordata_Aves_Accipitriform...,1,Bart Wursten,2018-01-24 09:43:00+00:00,-19.07145,16.58461,...,Shikra,Birds,Animalia,Chordata,Aves,Accipitriformes,Accipitridae,Accipiter,badius,03111_Animalia_Chordata_Aves_Accipitriformes_A...
8857,2776272,3111,500,375,val/03111_Animalia_Chordata_Aves_Accipitriform...,1,Abubakar S. Ringim,2019-03-14 06:34:00+00:00,11.09605,8.62213,...,Shikra,Birds,Animalia,Chordata,Aves,Accipitriformes,Accipitridae,Accipiter,badius,03111_Animalia_Chordata_Aves_Accipitriformes_A...
8856,2759352,3111,500,369,val/03111_Animalia_Chordata_Aves_Accipitriform...,1,wildchroma,2012-01-06 17:47:00+00:00,13.43873,-15.80738,...,Shikra,Birds,Animalia,Chordata,Aves,Accipitriformes,Accipitridae,Accipiter,badius,03111_Animalia_Chordata_Aves_Accipitriformes_A...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5621,2695404,4596,500,333,val/04596_Animalia_Chordata_Aves_Trogoniformes...,1,jonyboy20,2016-08-12 13:58:46+00:00,8.37735,-83.29238,...,Black-throated Trogon,Birds,Animalia,Chordata,Aves,Trogoniformes,Trogonidae,Trogon,rufus,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...
5620,2691084,4596,500,375,val/04596_Animalia_Chordata_Aves_Trogoniformes...,0,Annika Lindqvist,2015-07-18 14:49:00+00:00,10.43535,-84.03313,...,Black-throated Trogon,Birds,Animalia,Chordata,Aves,Trogoniformes,Trogonidae,Trogon,rufus,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...
5627,2757534,4596,332,500,val/04596_Animalia_Chordata_Aves_Trogoniformes...,1,sebastianlescano,2014-10-15 14:17:30+00:00,-25.58923,-54.07087,...,Black-throated Trogon,Birds,Animalia,Chordata,Aves,Trogoniformes,Trogonidae,Trogon,rufus,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...
5628,2758135,4596,500,375,val/04596_Animalia_Chordata_Aves_Trogoniformes...,1,Zack Graham,2019-06-23 05:02:00+00:00,9.12571,-79.71493,...,Black-throated Trogon,Birds,Animalia,Chordata,Aves,Trogoniformes,Trogonidae,Trogon,rufus,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...


In [177]:
df_val_ebird = pd.DataFrame(files_path_, columns=['File_name']) 
df_val_ebird['Image_dir_name'] = df_val_ebird.File_name.apply(lambda x: Path(x).parent.stem)
df_val_ebird['ML_Catalog_Number'] = df_val_ebird.File_name.apply(lambda x: Path(x).stem.split('_')[-1]).astype(np.int64)
df_val_ebird

Unnamed: 0,File_name,Image_dir_name,ML_Catalog_Number
0,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204131931
1,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204342841
2,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204383371
3,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204389521
4,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204440111
...,...,...,...
14855,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204831231
14856,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204831241
14857,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204846361
14858,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,205024431


- merge with df_meta_iNat

In [200]:

col = ['Category_id', 'Order', 'Family', 'Genus', 'Name', 'Specific_epithet']
df_meta_iNat.sort_values(by=['Image_dir_name'], inplace=True)
df_val_ebird = pd.concat([df_val_ebird , 
                          df_meta_iNat[col].reset_index(drop=True) 
                         ], axis=1, )

In [202]:
%%time
col = ['ML_Catalog_Number', 'Date', 'Latitude', 'Longitude', 'Elevation_(m)','Year','Month', 'Day']
df_val_ebird = df_val_ebird.merge(df_jpg100_meta[col], on='ML_Catalog_Number', how='left')

df_val_ebird 

CPU times: user 168 ms, sys: 36.4 ms, total: 204 ms
Wall time: 203 ms


Unnamed: 0,File_name,Image_dir_name,ML_Catalog_Number,Category_id,Order,Family,Genus,Name,Specific_epithet,Date,Latitude,Longitude,Elevation_(m),Year,Month,Day
0,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204131931,3111,Accipitriformes,Accipitridae,Accipiter,Accipiter badius,badius,10/29/2015,23.2538,69.6619,,2015.0,10.0,29.0
1,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204342841,3111,Accipitriformes,Accipitridae,Accipiter,Accipiter badius,badius,5/9/2012,0.1339,38.2256,,2012.0,5.0,9.0
2,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204383371,3111,Accipitriformes,Accipitridae,Accipiter,Accipiter badius,badius,1/19/2012,22.7815,72.2067,,2012.0,1.0,19.0
3,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204389521,3111,Accipitriformes,Accipitridae,Accipiter,Accipiter badius,badius,3/1/2019,28.5526,77.1506,,2019.0,3.0,1.0
4,val_ebird/03111_Animalia_Chordata_Aves_Accipit...,03111_Animalia_Chordata_Aves_Accipitriformes_A...,204440111,3111,Accipitriformes,Accipitridae,Accipiter,Accipiter badius,badius,5/28/2011,45.0203,75.0274,,2011.0,5.0,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14855,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204831231,4596,Trogoniformes,Trogonidae,Trogon,Trogon rufus,rufus,2/24/2019,8.9943,-79.5135,,2019.0,2.0,24.0
14856,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204831241,4596,Trogoniformes,Trogonidae,Trogon,Trogon rufus,rufus,2/24/2019,8.9943,-79.5135,,2019.0,2.0,24.0
14857,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,204846361,4596,Trogoniformes,Trogonidae,Trogon,Trogon rufus,rufus,12/13/2011,8.6390,-83.1621,,2011.0,12.0,13.0
14858,val_ebird/04596_Animalia_Chordata_Aves_Trogoni...,04596_Animalia_Chordata_Aves_Trogoniformes_Tro...,205024431,4596,Trogoniformes,Trogonidae,Trogon,Trogon rufus,rufus,12/13/2010,0.3732,-79.7034,,2010.0,12.0,13.0


In [203]:
df_val_ebird.to_csv('../../iNaturalist_2021/meta/val_ebird.csv')

In [140]:
df_val_ebird.File_name.values [:10]

array(['val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_badius_0_204131931.jpg',
       'val_ebird/03111_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Accipiter_badius/Accipiter_b