In [1]:
import pandas as pd
import json
from tqdm.notebook import trange, tqdm
from datetime import date
import requests
import time
import numpy as np
import re
# настройка, чтобы видеть все колонки
pd.set_option('display.max_columns', None)
pd.set_option('display.colheader_justify', 'right')


In [2]:
observations_path = 'data/observations-300445.csv'
radiuses_dataset_path = 'data/radiuses_dataset.csv'
# start_date = 'min'
start_date = date(2022, 8, 31)
# finish_date = 'max'
finish_date = date(2023, 2, 28)
radiuses = (20, 200, 2000, 0)
show_positions_raritets = 20
show_positions_afritets = 15
lat = '55.494403'
lng = '38.644662'
project_id = 'tsyurupy-i-ego-lesa'

month_num = int(finish_date.strftime('%m'))
months_ru = {1:'Янв', 2:'Фев', 3:'Мар', 4:'Апр',5:'Май', 6:'Июн', 7:'Июл', 8:'Авг', 9:'Сен', 10:'Окт', 11:'Ноя', 12:'Дек'}
format_finish_date = finish_date.strftime(f'%d {months_ru.get(month_num)} %Y')

ranks_enru = {
    'taxon_kingdom_name':
    'Царство',
    'taxon_phylum_name':
    'Тип',
    'taxon_subphylum_name':
    'Подтип',
    'taxon_superclass_name':
    'Надкласс',
    'taxon_class_name':
    'Класс',
    'taxon_subclass_name':
    'Подкласс',
    'taxon_infraclass_name':
    'Инфракласс',
    'taxon_subterclass_name':
    'Надкласс',
    'taxon_superorder_name':
    'Надотряд',
    'taxon_order_name':
    'Отряд',
    'taxon_suborder_name':
    'Подотряд',
    'taxon_infraorder_name':
    'Инфраотряд',
    'taxon_parvorder_name':
    'Парвотряд',
    'taxon_zoosection_name':
    'Зоосекция',
    'taxon_zoosubsection_name':
    'Зооподсекция',
    'taxon_superfamily_name':
    'Надсемейство',
    'taxon_epifamily_name':
    'Эписемейство',
    'taxon_family_name':
    'Семейство',
    'taxon_subfamily_name':
    'Подсемейство',
    'taxon_supertribe_name':
    'Надтриба',
    'taxon_tribe_name':
    'Триба',
    'taxon_subtribe_name':
    'Подтриба',
    'taxon_genus_name':
    'Род',
    'taxon_genushybrid_name':
    'Genus hybrid',
    'taxon_subgenus_name':
    'Подрод',
    'taxon_section_name':
    'Секция',
    'taxon_subsection_name':
    'Подсекция',
    'taxon_complex_name':
    'Комплекс',
    'taxon_species_name':
    'Вид',
    'taxon_hybrid_name':
    'Гибрид',
    'taxon_subspecies_name':
    'Подвид',
    'taxon_variety_name':
    'Разновидность',
    'taxon_form_name':
    'Форма',
    'taxon_infrahybrid_name':
    'Инфрагибрид'}


In [3]:
def prepare_df(observations_path, start_date, finish_date):

    df_full = pd.read_csv(observations_path)
    df = df_full.loc[:, 'taxon_id':'taxon_form_name'].copy()
    df.insert(0, 'iconic_taxon_name', df_full['iconic_taxon_name'])
    df.insert(0, 'created_at', '')
    # df.insert(0,'scientific_name', df_full['scientific_name'])
    df.insert(0, 'quality_grade', df_full['quality_grade'])
    df.insert(0, 'common_name', df_full['common_name'])

    df.loc[df['quality_grade'] == 'needs_id', 'quality_grade'] = 0
    df.loc[df['quality_grade'] == 'research', 'quality_grade'] = 1

    df['created_at'] = pd.to_datetime(df_full['created_at']).dt.date
    del (df_full)
    start_date = min(df['created_at']) if start_date == 'min' else start_date
    finish_date = max(
        df['created_at']) if finish_date == 'max' else finish_date
    return df, start_date, finish_date


In [4]:
df_taxons, start_date, finish_date = prepare_df(
    observations_path, start_date, finish_date)
df_taxons.head(3)

df_taxons = df_taxons[df_taxons['taxon_id'] != 349797]
df_taxons

Unnamed: 0,common_name,quality_grade,created_at,iconic_taxon_name,taxon_id,taxon_kingdom_name,taxon_phylum_name,taxon_subphylum_name,taxon_superclass_name,taxon_class_name,taxon_subclass_name,taxon_superorder_name,taxon_order_name,taxon_suborder_name,taxon_superfamily_name,taxon_family_name,taxon_subfamily_name,taxon_supertribe_name,taxon_tribe_name,taxon_subtribe_name,taxon_genus_name,taxon_genushybrid_name,taxon_species_name,taxon_hybrid_name,taxon_subspecies_name,taxon_variety_name,taxon_form_name
0,Сморчок высокий,0,2018-09-24,Fungi,1062676,Fungi,Ascomycota,Pezizomycotina,,Pezizomycetes,,,Pezizales,,,Morchellaceae,,,,,Morchella,,,,,,
1,,0,2018-09-24,Arachnida,1070919,Animalia,Arthropoda,Chelicerata,,Arachnida,,,Araneae,Araneomorphae,Salticoidea,Salticidae,Salticinae,,Sitticini,,Attulus,,Attulus floricola,,,,
2,Малашки,0,2019-05-26,Insecta,373470,Animalia,Arthropoda,Hexapoda,,Insecta,Pterygota,,Coleoptera,Polyphaga,Cleroidea,Melyridae,Malachiinae,,,,,,,,,,
3,Ольха чёрная,1,2019-05-28,Plantae,966205,Plantae,Tracheophyta,Angiospermae,,Magnoliopsida,,,Fagales,,,Betulaceae,,,,,Alnus,,Alnus glutinosa,,,,
4,Вероника дубравная,1,2019-05-28,Plantae,51610,Plantae,Tracheophyta,Angiospermae,,Magnoliopsida,,,Lamiales,,,Plantaginaceae,,,Veroniceae,,Veronica,,Veronica chamaedrys,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998,Бодяк обыкновенный,0,2023-02-19,Plantae,52989,Plantae,Tracheophyta,Angiospermae,,Magnoliopsida,,,Asterales,,,Asteraceae,Carduoideae,,Cardueae,Carduinae,Cirsium,,Cirsium vulgare,,,,
999,Стереум,0,2023-02-19,Fungi,55503,Fungi,Basidiomycota,Agaricomycotina,,Agaricomycetes,,,Russulales,,,Stereaceae,,,,,Stereum,,,,,,
1000,Зелёные водоросли,0,2023-02-19,Plantae,50863,Plantae,Chlorophyta,,,,,,,,,,,,,,,,,,,,
1001,Лось,0,2023-02-19,Mammalia,522193,Animalia,Chordata,Vertebrata,,Mammalia,Theria,Laurasiatheria,Artiodactyla,Ruminantia,,Cervidae,Capreolinae,,Alceini,,Alces,,Alces alces,,,,


In [5]:
date_to = finish_date
df = df_taxons[df_taxons['created_at'] <= date_to].copy()

In [6]:
def get_taxons_df_to_date(df, date_to):

    df = df_taxons[df_taxons['created_at'] <= date_to].copy()
    df.drop('created_at', axis=1, inplace=True)
    taxons_full = df.columns
    df.dropna(axis=1, how='all', inplace=True)

    last_levels = df.apply(lambda x: x.last_valid_index(), axis=1)
    levels = df.columns[df.columns.isin(last_levels.unique())].to_list()
    df = df[['taxon_id', 'common_name', 'iconic_taxon_name']+levels].copy()
    # df = df[['taxon_id', 'common_name', 'scientific_name']+levels].copy()
    col_qty = len(df.columns)

    taxons_df = pd.DataFrame(columns=[
                             'taxon_id', 'taxon_rang', 'taxon_name', 'common_name', 'iconic_taxon_name'])
    # taxons_df = pd.DataFrame(columns=['taxon_id', 'taxon_rang', 'taxon_name', 'scientific_name', 'common_name'])

    for i in range(2, col_qty):
        temp_df = pd.DataFrame()
        level_name = df.columns[i]
        level_filter = df[last_levels == level_name]
        temp_df.insert(loc=0, column='taxon_id',
                       value=level_filter['taxon_id'])
        temp_df.insert(loc=0, column='taxon_rang', value=df.columns[i])
        temp_df.insert(loc=0, column='taxon_name',
                       value=level_filter[level_name])
        # temp_df.insert(loc=0, column='scientific_name', value=level_filter['scientific_name'])
        temp_df.insert(loc=0, column='common_name',
                       value=level_filter['common_name'])
        temp_df.insert(loc=0, column='iconic_taxon_name',
                       value=level_filter['iconic_taxon_name'])
        taxons_df = pd.concat([taxons_df, temp_df], axis=0)
        i += 1
    taxons_df.drop_duplicates(inplace=True)
    taxons_df['taxon_id'] = taxons_df['taxon_id'].astype('Int64')
    taxons_df.set_index(keys='taxon_id', drop=True, inplace=True)

    return taxons_df


In [7]:
taxons_df_finish = get_taxons_df_to_date(df_taxons, finish_date)
taxons_df_start = get_taxons_df_to_date(df_taxons, start_date)

In [8]:
taxons_df_finish

Unnamed: 0_level_0,taxon_rang,taxon_name,common_name,iconic_taxon_name
taxon_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
47170,taxon_kingdom_name,Fungi,Грибы,Fungi
311249,taxon_phylum_name,Bryophyta,Мхи,Plantae
50863,taxon_phylum_name,Chlorophyta,Зелёные водоросли,Plantae
48372,taxon_class_name,Bryopsida,Листостебельные мхи,Plantae
50814,taxon_class_name,Agaricomycetes,Агарикомицеты,Fungi
...,...,...,...,...
55366,taxon_hybrid_name,Fragaria × ananassa,Земляника ананасная,Plantae
234770,taxon_subspecies_name,Calystegia sepium americana,Повой вздутый,Plantae
448080,taxon_subspecies_name,Centaurea phrygia pseudophrygia,Василёк ложнофригийский,Plantae
1397747,taxon_subspecies_name,Calystegia sepium spectabilis,Повой заметный,Plantae


In [9]:
def update_radius(df_tax_tocheck, radiuses, radiuses_dataset_path, date_to):
    # получает датасет со столбцом 'taxon_id'
    # проверяет, какие радиусы есть в csv, каких нет - запрашивает у update_radius, какие есть - берёт из csv, отдаёт
    # возвращает датафрейм с тремя столбцами по данным от iNat или из файла
    csb_col_set = ['taxon_id', 'radius', 'date']
    df_tax_asked = pd.DataFrame(columns=csb_col_set)
    date_to = str(date_to)
    for radius in radiuses:
        df_tax_asked_r = pd.DataFrame()
        df_tax_asked_r['taxon_id'] = df_tax_tocheck.index
        df_tax_asked_r.insert(1, 'radius', radius)
        df_tax_asked_r.insert(2, 'date', date_to)
        df_tax_asked = pd.concat([df_tax_asked, df_tax_asked_r])
    df_tax_asked.reset_index(drop=True, inplace=True)
    print(f'Going to check in csv: {df_tax_asked.shape[0]} values')

    df_tax_csv = pd.read_csv(
        index_col=False, filepath_or_buffer=radiuses_dataset_path)
    print(f'Total in csv: {df_tax_csv.shape[0]} values ')

    check_radiuses = pd.merge(
        df_tax_asked, df_tax_csv, how='left', left_on=csb_col_set, right_on=csb_col_set)
    already_in_csv_sum = check_radiuses['count'].notnull().sum()
    print(f'Already in csv: {already_in_csv_sum} values')

    havenoradiuses = check_radiuses[check_radiuses['count'].isnull()].copy()

    if havenoradiuses.shape[0] > 0:
        print(f'Ask for {havenoradiuses.shape[0]} values from iNat')
        havenoradiuses.drop('count', axis=1, inplace=True)
        fetched = fetch_radius(havenoradiuses, radiuses_dataset_path)
        fetched_sum = fetched['count'].notnull().sum()
        print(f'Have fetched {fetched_sum} values from iNat')
        df_tax_csv = pd.concat([df_tax_csv, fetched])
        df_tax_csv.to_csv(path_or_buf=radiuses_dataset_path, index=False)
        del (df_tax_csv)
    else:
        print('No need to fetch from iNat')

    df_tax_csv = pd.read_csv(
        index_col=False, filepath_or_buffer=radiuses_dataset_path)
    print(f'Total in csv: {df_tax_csv.shape[0]} values ')

    if not (df_tax_csv.value_counts(subset=csb_col_set) > 1).any():
        print('No duplicates in csv')
    else:
        print('! Somehow duplicates in CSV !')

    check_radiuses = pd.merge(
        df_tax_asked, df_tax_csv, how='left', left_on=csb_col_set, right_on=csb_col_set)
    if check_radiuses['count'].notnull().all():
        print(True)
    else:
        print(False)

In [10]:
def fetch_radius(havenoradiuses, radiuses_dataset_path):

    # получает датасет со столбцами taxon_id, radius, date,
    # запрашивает у айнат,
    # сохраняет в csv,
    # возвращает датафрейм со столбцами 'taxon_id','radius','date','count'
    current_date = str.replace(str(date.today()), '-', '_')
    current_time = time.strftime('%H_%M_%S', time.localtime())
    temporal_txt_path = 'data/temp_file_' + \
        current_date + '_' + current_time + '.csv'
    url = 'https://api.inaturalist.org/v1/observations'

    df = pd.DataFrame(columns=['taxon_id', 'radius', 'date', 'count'])

    with open(temporal_txt_path, 'a') as temp_file:
        temp_file.write('taxon_id,radius,date,count\n')

    i = 0
    for i in trange(havenoradiuses.shape[0]):
        taxon_id = havenoradiuses.iloc[i, 0]
        radius = havenoradiuses.iloc[i, 1]
        date_to = havenoradiuses.iloc[i, 2]
        if radius == 0:
            lat, lng, radius_param = '', '', ''
        else:
            lat = '55.494403'
            lng = '38.644662'
            radius_param = radius
        params = {
            'verifiable': 'true',
            'taxon_id': taxon_id,
            'd2': date_to,
            'lat': lat,
            'lng': lng,
            'radius': radius_param,
            'order': 'desc',
            'order_by': 'created_at',
            'only_id': 'true'
        }
        response = requests.get(url=url, params=params)
        count = response.json()['total_results']
        df.loc[i] = [taxon_id, radius, date_to, count]

        with open(temporal_txt_path, 'a') as temp_file:
            temp_file.write(
                ','.join([str(taxon_id), str(radius), date_to, str(count)])+'\n')

        if response.status_code != 200:
            raise Exception('Oh response is not 200, it is ',
                            response.status_code)
        i += 1
        time.sleep(1)
        print(
            f'Done loop {i}: r{radius}, date {date_to}, response {response.status_code}, count {count}, id {taxon_id}')

    return df


In [11]:
update_radius(df_tax_tocheck=taxons_df_finish, radiuses=radiuses,
              radiuses_dataset_path=radiuses_dataset_path, date_to=start_date)
update_radius(df_tax_tocheck=taxons_df_finish, radiuses=radiuses,
              radiuses_dataset_path=radiuses_dataset_path, date_to=finish_date)

Going to check in csv: 2164 values
Total in csv: 6352 values 
Already in csv: 2160 values
Ask for 4 values from iNat


  0%|          | 0/4 [00:00<?, ?it/s]

Done loop 1: r20, date 2022-08-31, response 200, count 1, id 55852
Done loop 2: r200, date 2022-08-31, response 200, count 12, id 55852
Done loop 3: r2000, date 2022-08-31, response 200, count 778, id 55852
Done loop 4: r0, date 2022-08-31, response 200, count 2391, id 55852
Have fetched 4 values from iNat
Total in csv: 6356 values 
No duplicates in csv
True
Going to check in csv: 2164 values
Total in csv: 6356 values 
Already in csv: 2164 values
No need to fetch from iNat
Total in csv: 6356 values 
No duplicates in csv
True


In [12]:
def get_radius_info(taxons_df_start, taxons_df_finish, radiuses_dataset_path, start_date, finish_date):

    start_date = str(start_date)
    finish_date = str(finish_date)
    df_tax_csv = pd.read_csv(
        index_col='taxon_id', filepath_or_buffer=radiuses_dataset_path)
    sort_list = [('pos_finish', radiuses[i]) for i in range(0, len(radiuses))]

    def get_radius(taxons_list, date_to):
        df = pd.DataFrame()
        df.index = taxons_list.index
        df_tax_csv_todate = df_tax_csv[(df_tax_csv['date'] == date_to) & (
            df_tax_csv.index.isin(taxons_df_finish.index))]
        df = df.merge(df_tax_csv_todate, how='left',
                      left_index=True, right_index=True)
        return df

    df_start = get_radius(taxons_df_finish, start_date)
    # df_start = get_radius(taxons_df_start, start_date)
    df_finish = get_radius(taxons_df_finish, finish_date)

    df_compact = pd.concat([df_start, df_finish]).pivot(
        index=None, columns=['date', 'radius'], values='count').copy()

    def get_cool_indexes(column):
        series_sorted = column.sort_values()
        positions = series_sorted.ne(series_sorted.shift()).cumsum()
        positions = positions.align(column)[0]
        return positions

    # def get_full_indexes(column):
    #     positions = range(1, column.shape[0]+1)
    #     return positions

    df_diff = df_compact.loc[:, finish_date] - df_compact.loc[:, start_date]
    df_diff = pd.concat([df_diff], keys=['count_diff'], axis=1)

    df_pos_start = df_compact[df_compact[(start_date, radiuses[-1])].notnull()][start_date].apply(get_cool_indexes, axis=0)
    df_pos_start = pd.concat([df_pos_start], keys=['pos_start'], axis=1)

    df_pos_finish = df_compact[finish_date].apply(get_cool_indexes, axis=0)
    df_pos_finish = pd.concat([df_pos_finish], keys=['pos_finish'], axis=1)
    df_pos_finish = df_pos_finish.sort_values(by=sort_list)

    # df_pos_diff = df_pos_start['pos_start'] - df_pos_finish['pos_finish']
    # df_pos_diff = pd.concat([df_pos_diff], keys=['pos_diff'], axis=1)

    # df_compact = df_compact.drop(start_date, axis=1)

    # df_pos = pd.concat([df_compact,  df_diff, df_pos_start, df_pos_finish, df_pos_diff], axis=1)
    df_pos = pd.concat(
        [df_compact,  df_diff, df_pos_start, df_pos_finish], axis=1)
    df_pos = df_pos.reindex(index=df_pos_finish.index)
    df_pos = sort_index(df_pos)

    df_pos = df_pos.astype('Int64')
    df_pos.insert(0,('full_pos'), range(1,df_pos.shape[0]+1))

    df_pos.loc[~df_pos.index.isin(taxons_df_start.index), ('ifnew')] = 'new'
    # df_pos[('need_id')] = df.groupby(by='taxon_id', axis=0).any()['quality_grade']
    df_pos[('research')] = df[['taxon_id', 'quality_grade']
                              ].groupby(by='taxon_id', axis=0).sum()
    taxons_df_finish.columns = (
        ('taxon_rang', ''), ('taxon_name', ''), ('common_name', ''), ('iconic_taxon_name', ''))
    df_pos = pd.concat([df_pos, taxons_df_finish], axis=1)
    df_pos.drop([str(start_date), 'pos_start'], axis=1, inplace=True, level=0)

    return df_pos


def sort_index(df):
    sort_dict_order = ['result_pos', 'result_name', 'result_count', str(start_date), str(
        finish_date), 'full_pos','count_diff', 'pos_start', 'pos_finish', 'pos_diff', 'ifnew', 'taxon_rang', 'taxon_name', 'iconic_taxon_name','common_name', 'research'] + list(radiuses)
    sort_dict = {sort_dict_order[i]: i for i in range(len(sort_dict_order))}
    df = df.sort_index(axis=1, level=[0, 1], key=lambda x: x.map(sort_dict))
    return df


In [13]:
def formatcount(count, count_diff=False):
    if count > 1000000:
        count = str(round(count/1000000, 1)) + 'M'
    elif count > 10000:
        count = str(int(count/1000)) + 'K'
    elif count > 1000:
        count = str(round(count/1000, 1)) + 'K'

    if (count and count_diff):
        count = '+' + str(count)
    elif ((not count) and count_diff):
        count = ''

    return count


def addresult_columns(df):
    for columnset in ['result_name', 'result_pos', 'result_count']:
        columnset_df = pd.DataFrame(
            columns=pd.MultiIndex.from_product(
                [[columnset], radiuses], names=['date', 'radius']),
            index=df.index)
        columnset_df[columnset][radiuses[0]
                                ] = df[str(finish_date)][radiuses[0]]
        df = pd.concat([df, columnset_df], axis=1)

    df = sort_index(df)

    return df


def add_apply_formats(df):
    df['taxon_rang'] = df['taxon_rang'].apply(lambda x: ranks_enru.get(x))
    df[str(finish_date)] = df[str(finish_date)].applymap(formatcount)
    df['count_diff'] = df['count_diff'].applymap(formatcount, count_diff=True)

    return df


In [14]:
# finish_date = str(finish_date)
df_pos = get_radius_info(taxons_df_start, taxons_df_finish,
                         radiuses_dataset_path, start_date, finish_date)
df_pos


date,full_pos,2023-02-28,2023-02-28,2023-02-28,2023-02-28,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,research,taxon_rang,taxon_name,common_name,iconic_taxon_name
radius,Unnamed: 1_level_1,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
900207,1,1,3,27,28,1,1,1,1,1,1,1,1,new,1,taxon_species_name,Alchemilla conglobata,Манжетка шариковидно-скученная,Plantae
1070919,2,1,3,69,256,0,0,2,12,1,1,3,5,,0,taxon_species_name,Attulus floricola,,Arachnida
556219,3,1,4,148,267,0,1,13,42,1,2,13,6,,1,taxon_species_name,Podosphaera aphanis,,Fungi
227391,4,1,8,97,322,1,2,21,43,1,4,5,9,new,0,taxon_species_name,Peltigera malacea,Пельтигера мягкая,Fungi
875126,5,1,9,316,405,1,2,64,96,1,5,26,17,new,0,taxon_species_name,Phellinopsis conchata,Феллинус раковинообразный,Fungi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,537,359,29016,286667,3148221,127,6993,74750,538487,90,446,519,532,new,0,taxon_subclass_name,Agaricomycetidae,Агарикомицетовые,Fungi
790553,538,374,22538,133843,665390,40,627,4667,31192,91,444,516,526,new,0,taxon_subfamily_name,Apioideae,Сельдерейные,Plantae
47434,539,560,27693,194467,1605209,104,1236,9562,169567,92,445,518,529,new,0,taxon_family_name,Poaceae,Мятликовые,Plantae
50814,540,623,49880,493777,5323789,226,10246,111551,851104,93,447,520,533,,0,taxon_class_name,Agaricomycetes,Агарикомицеты,Fungi


In [15]:
df_added_formats = add_apply_formats(df_pos)
df_added_formats

date,full_pos,2023-02-28,2023-02-28,2023-02-28,2023-02-28,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,research,taxon_rang,taxon_name,common_name,iconic_taxon_name
radius,Unnamed: 1_level_1,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
900207,1,1,3,27,28,+1,+1,+1,+1,1,1,1,1,new,1,Вид,Alchemilla conglobata,Манжетка шариковидно-скученная,Plantae
1070919,2,1,3,69,256,,,+2,+12,1,1,3,5,,0,Вид,Attulus floricola,,Arachnida
556219,3,1,4,148,267,,+1,+13,+42,1,2,13,6,,1,Вид,Podosphaera aphanis,,Fungi
227391,4,1,8,97,322,+1,+2,+21,+43,1,4,5,9,new,0,Вид,Peltigera malacea,Пельтигера мягкая,Fungi
875126,5,1,9,316,405,+1,+2,+64,+96,1,5,26,17,new,0,Вид,Phellinopsis conchata,Феллинус раковинообразный,Fungi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,537,359,29K,286K,3.1M,+127,+7.0K,+74K,+538K,90,446,519,532,new,0,Подкласс,Agaricomycetidae,Агарикомицетовые,Fungi
790553,538,374,22K,133K,665K,+40,+627,+4.7K,+31K,91,444,516,526,new,0,Подсемейство,Apioideae,Сельдерейные,Plantae
47434,539,560,27K,194K,1.6M,+104,+1.2K,+9.6K,+169K,92,445,518,529,new,0,Семейство,Poaceae,Мятликовые,Plantae
50814,540,623,49K,493K,5.3M,+226,+10K,+111K,+851K,93,447,520,533,,0,Класс,Agaricomycetes,Агарикомицеты,Fungi


In [16]:
df_added_res_cols = addresult_columns(df_pos)
df_added_res_cols

date,result_pos,result_pos,result_pos,result_pos,result_name,result_name,result_name,result_name,result_count,result_count,result_count,result_count,2023-02-28,2023-02-28,2023-02-28,2023-02-28,full_pos,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,iconic_taxon_name,common_name,research
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 17_level_1,20,200,2000,0,20,200,2000,0,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2
900207,,,,,,,,,,,,,1,3,27,28,1,+1,+1,+1,+1,1,1,1,1,new,Вид,Alchemilla conglobata,Plantae,Манжетка шариковидно-скученная,1
1070919,,,,,,,,,,,,,1,3,69,256,2,,,+2,+12,1,1,3,5,,Вид,Attulus floricola,Arachnida,,0
556219,,,,,,,,,,,,,1,4,148,267,3,,+1,+13,+42,1,2,13,6,,Вид,Podosphaera aphanis,Fungi,,1
227391,,,,,,,,,,,,,1,8,97,322,4,+1,+2,+21,+43,1,4,5,9,new,Вид,Peltigera malacea,Fungi,Пельтигера мягкая,0
875126,,,,,,,,,,,,,1,9,316,405,5,+1,+2,+64,+96,1,5,26,17,new,Вид,Phellinopsis conchata,Fungi,Феллинус раковинообразный,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,,,,,,,,,,,,,359,29K,286K,3.1M,537,+127,+7.0K,+74K,+538K,90,446,519,532,new,Подкласс,Agaricomycetidae,Fungi,Агарикомицетовые,0
790553,,,,,,,,,,,,,374,22K,133K,665K,538,+40,+627,+4.7K,+31K,91,444,516,526,new,Подсемейство,Apioideae,Plantae,Сельдерейные,0
47434,,,,,,,,,,,,,560,27K,194K,1.6M,539,+104,+1.2K,+9.6K,+169K,92,445,518,529,new,Семейство,Poaceae,Plantae,Мятликовые,0
50814,,,,,,,,,,,,,623,49K,493K,5.3M,540,+226,+10K,+111K,+851K,93,447,520,533,,Класс,Agaricomycetes,Fungi,Агарикомицеты,0


In [17]:
def joininfo(row):

    taxon_name = row['taxon_name'].item()
    common_name = row['common_name'].item()
    ifnew = row['ifnew'].item()
    pos_finish = row['pos_finish'].astype('string')
    taxon_rang = row['taxon_rang'].item()
    count_diff = row['count_diff'].astype('string')
    count = row[str(finish_date)].astype('string')
    taxon_id = row.name
    research = row['research'].item()
    iconic_taxon_name = str(row['iconic_taxon_name'].item()).lower()
    full_pos = row['full_pos'].astype('string')

    taxon_name_link = f'<a href=https://www.inaturalist.org/taxa/{taxon_id} style="color:black">{str(taxon_name)}</a>'
    common_name_link = f'<a href=https://www.inaturalist.org/taxa/{taxon_id} style="color:black">{str(common_name).title()}</a>'

    if pd.isnull(common_name):
        bold = f'<b>{taxon_name_link}</b>'
        italic = '<br>'
    else:
        bold = f'<b>{common_name_link}</b>'
        italic = f'<br><i>{taxon_name}</i>'

    if pd.isnull(ifnew):
        ifnew = ''
    else:
        ifnew = '<br><b style="font-size:62%;color:green">NEW</b>'

    if not research:
        research = f'<a href=https://www.inaturalist.org/observations?'\
            f'&project_id={project_id}'\
            '&subview=map&nelat=55.526&nelng=38.85&swlat=55.423&swlng=38.536'\
            f'&taxon_id={taxon_id} style="color:black">Need ID</a>'
    else:
        research = f'<a href=https://www.inaturalist.org/observations?'\
            f'&project_id={project_id}'\
            '&subview=map&nelat=55.526&nelng=38.85&swlat=55.423&swlng=38.536'\
            f'&taxon_id={taxon_id} style="color:green"><b>RG&#xD7;{research}</b></a>'

    if taxon_rang == 'Вид':
        taxon_rang = ''
    else:
        taxon_rang = f' {taxon_rang}'

    row['research'] = research
    row['result_name'] = bold + taxon_rang + italic
    row['result_pos'] = ifnew

    row['iconic_taxon_name'] = f'<img src=https://www.inaturalist.org/assets/iconic_taxa/{iconic_taxon_name}-cccccc-20px.png alt={iconic_taxon_name}>'

    row['result_count'] = f'<a href=https://www.inaturalist.org/observations?'\
        '&place_id=any'\
        f'&lat={lat}&lng={lng}&radius=xxx'\
        '&subview=table'\
        f'&taxon_id={taxon_id} style="color:black">' + \
        count + '</a> ' + count_diff

    return row


In [18]:
df_info = df_added_res_cols.apply(joininfo, axis=1)
df_info.head(5)

date,result_pos,result_pos,result_pos,result_pos,result_name,result_name,result_name,result_name,result_count,result_count,result_count,result_count,2023-02-28,2023-02-28,2023-02-28,2023-02-28,full_pos,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,iconic_taxon_name,common_name,research
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 17_level_1,20,200,2000,0,20,200,2000,0,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2
900207,"<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>",<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,1,3,27,28,1,1.0,1.0,1,1,1,1,1,1,new,Вид,Alchemilla conglobata,<img src=https://www.inaturalist.org/assets/ic...,Манжетка шариковидно-скученная,<a href=https://www.inaturalist.org/observatio...
1070919,,,,,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,1,3,69,256,2,,,2,12,1,1,3,5,,Вид,Attulus floricola,<img src=https://www.inaturalist.org/assets/ic...,,<a href=https://www.inaturalist.org/observatio...
556219,,,,,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,1,4,148,267,3,,1.0,13,42,1,2,13,6,,Вид,Podosphaera aphanis,<img src=https://www.inaturalist.org/assets/ic...,,<a href=https://www.inaturalist.org/observatio...
227391,"<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>",<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,1,8,97,322,4,1.0,2.0,21,43,1,4,5,9,new,Вид,Peltigera malacea,<img src=https://www.inaturalist.org/assets/ic...,Пельтигера мягкая,<a href=https://www.inaturalist.org/observatio...
875126,"<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>","<br><b style=""font-size:62%;color:green"">NEW</b>",<b><a href=https://www.inaturalist.org/taxa/87...,<b><a href=https://www.inaturalist.org/taxa/87...,<b><a href=https://www.inaturalist.org/taxa/87...,<b><a href=https://www.inaturalist.org/taxa/87...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,<a href=https://www.inaturalist.org/observatio...,1,9,316,405,5,1.0,2.0,64,96,1,5,26,17,new,Вид,Phellinopsis conchata,<img src=https://www.inaturalist.org/assets/ic...,Феллинус раковинообразный,<a href=https://www.inaturalist.org/observatio...


In [19]:
def sort_separate(df, raritets_sort):

    afritet_rang = ['Вид','Гибрид','Подвид','Разновидность','Форма','Инфрагибрид']

    if not raritets_sort:
        df = df[df_info['taxon_rang'].isin(afritet_rang)]
        show_positions = show_positions_afritets
    else:
        show_positions = show_positions_raritets

    radiuse_array = np.asarray(radiuses)
    dataframes = list()
    radius_pars = f'&lat={lat}&lng={lng}&radius=xxx'

    count_col_name = list()

    for radius in radiuses:

        sort_list = [('pos_finish', radiuse_array[i])
                     for i in range(0, len(radiuse_array))]

        df = df.sort_values(by=sort_list, ignore_index=True)

        full_pos_col = 'pos_'+str(radius)
        df.insert(0, full_pos_col, range(1,df.shape[0]+1))
        df[full_pos_col] = df[full_pos_col].astype('string')

        df = df.sort_values(by=sort_list, ignore_index=True,
                    ascending=raritets_sort)

        if radius:
            count_col_name = f'Количество наблюдений<br>в радиусе {radius} км на {format_finish_date}'
        else:
            count_col_name = f'Количество наблюдений<br>во всём iNat'

        if radius:
            pass
        else:
            df.loc[:, ('result_count', radius)] = df['result_count'][radius].apply(
                lambda x: x.replace(radius_pars, ''), )

        if raritets_sort:
            taxonname_col_name = 'Название таксона'
        else:
            taxonname_col_name = 'Название вида<br>или подвида'  

        df_sorted = pd.DataFrame(
            columns=['#<br>по редкости', 'Статус<br>в проекте', ' ', taxonname_col_name, count_col_name])
        df_sorted.iloc[:, 0] = df[full_pos_col] + df['result_pos'][radius]
        df_sorted.iloc[:, 1] = df['research']
        df_sorted.iloc[:, 2] = df['iconic_taxon_name']
        df_sorted.iloc[:, 3] = df['result_name'][radius]
        df_sorted.iloc[:, 4] = df['result_count'][radius]

        df_sorted = df_sorted.iloc[0:show_positions, :]
        dataframes.append(df_sorted)

        radiuse_array = np.roll(radiuse_array, -1)

    return dataframes


raritets = sort_separate(df_info, raritets_sort=True)
afritets = sort_separate(df_info, raritets_sort=False)

In [20]:
def raritets_html(raritets, raritets_sort):

    prefix = 'raritets' if raritets_sort else 'afritets'

    for i in range(0, len(radiuses)):
        radius = radiuses[i]
        htmlname = f'output/{prefix}_' + str(radius) + '.html'
        df_to_export = raritets[i]
        # df_to_export_styled = df_to_export.style.set_table_styles(
        #   table_styles = {
        #     'Статус в проекте':
        #       [
        #         {'selector': 'td',
        #        'props':'text-align:center;'}
        #                                     ]
        #                                       }, axis=0, overwrite = True)

        df_to_export.to_html(htmlname, header=True, index=False,
                             escape=False, justify='center', border=None)

        with open(htmlname, 'r') as file:
            filedata = file.read()

        # Replace the target string
        filedata = filedata.replace(' class="dataframe"', '')
        # Replace the target string
        filedata = filedata.replace('radius=xxx', f'radius={radius}')
        filedata = filedata.replace(
            '<td><b style="color', '<td style="vertical-align:top;text-align: center"><b style="color')

        filedata = filedata.replace(
            '<th>#', '<th  style="vertical-align:top" width="10%">#')
        filedata = filedata.replace(
            '<th>Статус в проекте', '<th  style="vertical-align:top" width="15%">Статус в проекте')
        filedata = filedata.replace(
            '<th>Статус в проекте', '<th  style="vertical-align:top" width="5%"> ')
        filedata = filedata.replace(
            '<th>Название таксона', '<th  style="vertical-align:top" width="40%">Название таксона')
        filedata = filedata.replace('<th>Количество наблюдений<br>во всём iNat',
                                    f'<th  style="vertical-align:top" width="30%">Количество наблюдений<br>во всём iNat на {format_finish_date}')

        filedata = filedata.replace(
            '+new', '<b style="font-size:62%;color:green">&nbsp;&nbsp;NEW</b>')
        filedata = filedata.replace('<tr>', '<tr height="50px">')
        filedata = filedata.replace(
            '<td><img', '<td align="center" style="text-align: center; vertical-align: middle;"><img')
        filedata = re.sub(
            '\+([0-9]*\.?[0-9]+K?)', r'<b style="font-size:62%;color:green">&nbsp;&nbsp;&#8593;\1</b>', filedata)

        with open(htmlname, 'w') as file:  # Write the file out again
            file.write(filedata)


In [21]:
raritets_html(raritets, raritets_sort=True)
raritets_html(afritets, raritets_sort=False)