In [1]:
import numpy as np
import pandas as pd
from pathlib import Path


#

### 目標:
 以物種層級，從台灣的TT跟RS資料列出需求名錄，清點每個物種資料取得現況，包括：有無照片 ~~照片出處 做個清單~~
#### 檔案來源:
- metadata: 'MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv'
- 目前的影像 : MCTT影像資料夾(576筆)

#### 方法:
- 分別整理出影像資料與 metadata(分為TT與RS)的物種名錄
- 以metadata資料為本，比對目前擁有的影像數量(所有、~~依據性別~~)


In [3]:

dir_MCTT = Path('../data_resize_cropped\MCTT_cropped256_paddingbg')

files_MCTT =  [path.stem for path in dir_MCTT.glob("*.png")]
print(len(files_MCTT))
files_MCTT[:10]

576


['Aberrasine lichenshihi_male_FBG_10-IV-2015_SWu&WCChang_61_1_800_cropped',
 'Abraxas adilluminata Inoue, 1984_male_01_cropped',
 'Abraxas adilluminata_female_Biluxi_1-VII-2015_SSLu_13_1_cropped',
 'Abraxas adilluminata_female_Hewang_28-X-2015_dissected_TFRI_20170105_020_cropped',
 'Abraxas adilluminata_female_MTTS1558_1_cropped',
 'Abraxas adilluminata_female_MTTS302_1_cropped',
 'Abraxas adilluminata_female_MTTS302_dissected_01_cropped',
 'Abraxas adilluminata_female_MTTS690_dissected_01_cropped',
 'Abraxas adilluminata_male_Biluxi_1-VII-2015_SSLu_12_1_cropped',
 'Abraxas adilluminata_male_MTTS230_dissected_01_cropped']

In [4]:
# file_MCTT = 'MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv'
url_file_MCTT = 'https://raw.githubusercontent.com/YunghuiHsu/Moth_Project/main/Moth_thermal/data/MC-SJ-CA_ThermalTolerance-Range%20Size%20-%20MCTT%26RS.csv'
df_MC = pd.read_csv(url_file_MCTT)
df_MCTT = df_MC[df_MC.Event == 'MCTT']
df_MCRS = df_MC[df_MC.Event == 'MCRS']

In [5]:
species_list = df_MC[df_MC.Species.notnull()].Species.sort_values().unique()
print('Total species in "MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv" : ', len(species_list))

species_list = sorted([s.replace('\"','') for s in species_list])
species_list[:10]

Total species in "MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv" :  964


['Abaciscus costimacula',
 'Abaciscus tristis tristis',
 'Aberrasine lichenshihi',
 'Abraxas adilluminata',
 'Abraxas consputa',
 'Abraxas illuminata',
 'Abraxas persimplex',
 'Abraxas placata',
 'Abraxas submartiaria',
 'Abraxas suspecta']

In [96]:
species_list_count = {}
for idx, specie in enumerate(species_list):
    print(f'Searching {idx}, {specie}')
    
    for file in files_MCTT:
        c=0
        if file.startswith(specie):
            # species_list_count[specie] = file
            c+=1
            species_list_count[specie] = c
    
    # if idx ==100: break

Searching 0, Abaciscus costimacula
Searching 1, Abaciscus tristis tristis
Searching 2, Aberrasine lichenshihi
Searching 3, Abraxas adilluminata
Searching 4, Abraxas consputa
Searching 5, Abraxas illuminata
Searching 6, Abraxas persimplex
Searching 7, Abraxas placata
Searching 8, Abraxas submartiaria
Searching 9, Abraxas suspecta
Searching 10, Abraxas tenellula
Searching 11, Abraxas tenuisuffusa
Searching 12, Abraxas wilemani
Searching 13, Abrostola suisharyonis suisharyonis
Searching 14, Acasis viretata himalayica
Searching 15, Achrosis rufescens
Searching 16, Acolutha pictaria imbecilla
Searching 17, Acolutha pulchella semifulva
Searching 18, Acosmetia chinensis
Searching 19, Acronicta gigasa
Searching 20, Acronicta hercules
Searching 21, Acronicta intermedia
Searching 22, Acronicta pruinosa
Searching 23, Acronicta sp.
Searching 24, Actias ningpoana ningtaiwana
Searching 25, Adrapsa incertalis
Searching 26, Adrapsa sp.
Searching 27, Adrapsoides reticulatis
Searching 28, Aedia acronyct

In [143]:

df_sp_count_ = pd.DataFrame(species_list_count.items(), columns=['Species', 'Count'])
df_species_list = pd.DataFrame(species_list, columns=['Species'])
df_sp_count = pd.merge(df_species_list, df_sp_count_, how='left').fillna(0)



In [144]:

print('Total species in metadata("MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv") : ', len(species_list), end='\n\n')
print('How many imgs available for MCTT : ', len(files_MCTT), end='\n\n')
print('How many species name(Scientific name) matched between MCTT and metadata  : ', len(df_sp_count_), end='\n\n')
print(f'Count for numbers of imgs for each specie:')
df_sp_count.groupby(['Count']).count()

Total species in metadata("MC-SJ-CA_ThermalTolerance-Range Size  - MCTT&RS.csv") :  964

How many imgs available for MCTT :  576

How many species name(Scientific name) matched between MCTT and metadata  :  302

Count for numbers of imgs for each specie:


Unnamed: 0_level_0,Species
Count,Unnamed: 1_level_1
0.0,662
1.0,302


In [145]:
df_sp_count.to_csv('count_species_for_MC.csv')