In [1]:
import pandas as pd
import json
from tqdm.notebook import trange, tqdm
from datetime import date
import requests
import time
import numpy as np
import re
pd.set_option('display.max_columns', None) #настройка, чтобы видеть все колонки
pd.set_option('display.colheader_justify', 'right')


In [2]:
observations_path = 'data/observations-285432.csv'
radiuses_dataset_path = 'data/radiuses_dataset.csv'
# start_date = 'min'
start_date = date(2022, 8, 31)
# finish_date = 'max'
finish_date = date(2022, 11, 30)
radiuses = (20, 200, 2000, 0)
show_positions = 25

ranks_enru = {
'taxon_kingdom_name':
'Царство',
'taxon_phylum_name':
'Тип',
'taxon_subphylum_name':
'Подтип',
'taxon_superclass_name':
'Надкласс',
'taxon_class_name':
'Класс',
'taxon_subclass_name':
'Подкласс',
'taxon_infraclass_name':
'Инфракласс',
'taxon_subterclass_name':
'Надкласс',
'taxon_superorder_name':
'Надотряд',
'taxon_order_name':
'Отряд',
'taxon_suborder_name':
'Подотряд',
'taxon_infraorder_name':
'Инфраотряд',
'taxon_parvorder_name':
'Парвотряд',
'taxon_zoosection_name':
'Зоосекция',
'taxon_zoosubsection_name':
'Зооподсекция',
'taxon_superfamily_name':
'Надсемейство',
'taxon_epifamily_name':
'Эписемейство',
'taxon_family_name':
'Семейство',
'taxon_subfamily_name':
'Подсемейство',
'taxon_supertribe_name':
'Надтриба',
'taxon_tribe_name':
'Триба',
'taxon_subtribe_name':
'Подтриба',
'taxon_genus_name':
'Род',
'taxon_genushybrid_name':
'Genus hybrid',
'taxon_subgenus_name':
'Подрод',
'taxon_section_name':
'Секция',
'taxon_subsection_name':
'Подсекция',
'taxon_complex_name':
'Комплекс',
'taxon_species_name':
'Вид',
'taxon_hybrid_name':
'Гибрид',
'taxon_subspecies_name':
'Подвид',
'taxon_variety_name':
'Разновидность',
'taxon_form_name':
'Форма',
'taxon_infrahybrid_name':
'Инфрагибрид'}


In [3]:
def prepare_df(observations_path, start_date, finish_date):

    df_full = pd.read_csv(observations_path)
    df = df_full.loc[:, 'taxon_id':'taxon_form_name'].copy()
    df.insert(0, 'created_at', '')
    # df.insert(0,'scientific_name', df_full['scientific_name'])
    df.insert(0,'common_name', df_full['common_name'])
    df['created_at'] = pd.to_datetime(df_full['created_at']).dt.date
    del (df_full)
    start_date = min(df['created_at']) if start_date == 'min' else start_date
    finish_date = max(
        df['created_at']) if finish_date == 'max' else finish_date
    return df, start_date, finish_date


In [4]:
df_taxons, start_date, finish_date = prepare_df(
    observations_path, start_date, finish_date)
df_taxons.head(3)

df_taxons = df_taxons[df_taxons['taxon_id'] != 349797]
df_taxons

Unnamed: 0,common_name,created_at,taxon_id,taxon_kingdom_name,taxon_phylum_name,taxon_subphylum_name,taxon_superclass_name,taxon_class_name,taxon_subclass_name,taxon_superorder_name,taxon_order_name,taxon_suborder_name,taxon_superfamily_name,taxon_family_name,taxon_subfamily_name,taxon_supertribe_name,taxon_tribe_name,taxon_subtribe_name,taxon_genus_name,taxon_genushybrid_name,taxon_species_name,taxon_hybrid_name,taxon_subspecies_name,taxon_variety_name,taxon_form_name
0,Сморчок высокий,2018-09-24,1062676,Fungi,Ascomycota,Pezizomycotina,,Pezizomycetes,,,Pezizales,,,Morchellaceae,,,,,Morchella,,,,,,
1,,2018-09-24,1070919,Animalia,Arthropoda,Chelicerata,,Arachnida,,,Araneae,Araneomorphae,Salticoidea,Salticidae,Salticinae,,Sitticini,,Attulus,,Attulus floricola,,,,
2,Малашки,2019-05-26,373470,Animalia,Arthropoda,Hexapoda,,Insecta,Pterygota,,Coleoptera,Polyphaga,Cleroidea,Melyridae,Malachiinae,,,,,,,,,,
3,Ольха чёрная,2019-05-28,966205,Plantae,Tracheophyta,Angiospermae,,Magnoliopsida,,,Fagales,,,Betulaceae,,,,,Alnus,,Alnus glutinosa,,,,
4,Вероника дубравная,2019-05-28,51610,Plantae,Tracheophyta,Angiospermae,,Magnoliopsida,,,Lamiales,,,Plantaginaceae,,,Veroniceae,,Veronica,,Veronica chamaedrys,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
930,Млекопитающие,2022-12-15,40151,Animalia,Chordata,Vertebrata,,Mammalia,,,,,,,,,,,,,,,,,
931,Млекопитающие,2022-12-15,40151,Animalia,Chordata,Vertebrata,,Mammalia,,,,,,,,,,,,,,,,,
932,Млекопитающие,2022-12-15,40151,Animalia,Chordata,Vertebrata,,Mammalia,,,,,,,,,,,,,,,,,
933,Млекопитающие,2022-12-15,40151,Animalia,Chordata,Vertebrata,,Mammalia,,,,,,,,,,,,,,,,,


In [5]:
def get_taxons_df_to_date(df, date_to):

    df = df_taxons[df_taxons['created_at'] <= date_to].copy()
    df.drop('created_at', axis=1, inplace=True)
    taxons_full = df.columns    
    df.dropna(axis=1, how='all', inplace=True)

    last_levels = df.apply(lambda x: x.last_valid_index(), axis=1)
    levels = df.columns[df.columns.isin(last_levels.unique())].to_list()
    df = df[['taxon_id', 'common_name']+levels].copy()
    # df = df[['taxon_id', 'common_name', 'scientific_name']+levels].copy()
    col_qty = len(df.columns)

    taxons_df = pd.DataFrame(columns=['taxon_id', 'taxon_rang', 'taxon_name', 'common_name'])
    # taxons_df = pd.DataFrame(columns=['taxon_id', 'taxon_rang', 'taxon_name', 'scientific_name', 'common_name'])

    for i in range(2, col_qty):
        temp_df = pd.DataFrame()
        level_name = df.columns[i]
        level_filter = df[last_levels == level_name]
        temp_df.insert(loc=0, column='taxon_id', value=level_filter['taxon_id'])
        temp_df.insert(loc=0, column='taxon_rang', value=df.columns[i])
        temp_df.insert(loc=0, column='taxon_name', value=level_filter[level_name])
        # temp_df.insert(loc=0, column='scientific_name', value=level_filter['scientific_name'])
        temp_df.insert(loc=0, column='common_name', value=level_filter['common_name'])
        taxons_df = pd.concat([taxons_df, temp_df], axis=0)
        i += 1
    taxons_df.drop_duplicates(inplace=True)
    taxons_df['taxon_id'] = taxons_df['taxon_id'].astype('Int64')
    taxons_df.set_index(keys='taxon_id', drop=True, inplace=True)

    return taxons_df

In [6]:
taxons_df_finish = get_taxons_df_to_date(df_taxons, finish_date)
taxons_df_start = get_taxons_df_to_date(df_taxons, start_date)

In [7]:
def update_radius(df_tax_tocheck, radiuses, radiuses_dataset_path, date_to):
    # получает датасет со столбцом 'taxon_id'
    # проверяет, какие радиусы есть в csv, каких нет - запрашивает у update_radius, какие есть - берёт из csv, отдаёт
    # возвращает датафрейм с тремя столбцами по данным от iNat или из файла
    csb_col_set = ['taxon_id', 'radius', 'date']
    df_tax_asked = pd.DataFrame(columns=csb_col_set)
    date_to = str(date_to)
    for radius in radiuses:
        df_tax_asked_r = pd.DataFrame()
        df_tax_asked_r['taxon_id'] = df_tax_tocheck.index
        df_tax_asked_r.insert(1, 'radius', radius)
        df_tax_asked_r.insert(2, 'date', date_to)
        df_tax_asked = pd.concat([df_tax_asked, df_tax_asked_r])
    df_tax_asked.reset_index(drop=True, inplace=True)
    print(f'Going to check in csv: {df_tax_asked.shape[0]} values')

    df_tax_csv = pd.read_csv(
        index_col=False, filepath_or_buffer=radiuses_dataset_path)
    print(f'Total in csv: {df_tax_csv.shape[0]} values ')

    check_radiuses = pd.merge(
        df_tax_asked, df_tax_csv, how='left', left_on=csb_col_set, right_on=csb_col_set)
    already_in_csv_sum = check_radiuses['count'].notnull().sum()
    print(f'Already in csv: {already_in_csv_sum} values')

    havenoradiuses = check_radiuses[check_radiuses['count'].isnull()].copy()

    if havenoradiuses.shape[0] > 0:
        print(f'Ask for {havenoradiuses.shape[0]} values from iNat')
        havenoradiuses.drop('count', axis=1, inplace=True)
        fetched = fetch_radius(havenoradiuses, radiuses_dataset_path)
        fetched_sum = fetched['count'].notnull().sum()
        print(f'Have fetched {fetched_sum} values from iNat')
        df_tax_csv = pd.concat([df_tax_csv, fetched])
        df_tax_csv.to_csv(path_or_buf=radiuses_dataset_path, index=False)
        del (df_tax_csv)
    else:
        print('No need to fetch from iNat')

    df_tax_csv = pd.read_csv(
        index_col=False, filepath_or_buffer=radiuses_dataset_path)
    print(f'Total in csv: {df_tax_csv.shape[0]} values ')

    if not (df_tax_csv.value_counts(subset=csb_col_set) > 1).any():
        print('No duplicates in csv')
    else:
        print('! Somehow duplicates in CSV !')

    check_radiuses = pd.merge(
        df_tax_asked, df_tax_csv, how='left', left_on=csb_col_set, right_on=csb_col_set)
    if check_radiuses['count'].notnull().all():
        print(True)
    else:
        print(False)

In [8]:
def fetch_radius(havenoradiuses, radiuses_dataset_path):

    # получает датасет со столбцами taxon_id, radius, date,
    # запрашивает у айнат,
    # сохраняет в csv,
    # возвращает датафрейм со столбцами 'taxon_id','radius','date','count'
    current_date = str.replace(str(date.today()), '-', '_')
    current_time = time.strftime('%H_%M_%S', time.localtime())
    temporal_txt_path = 'data/temp_file_' + current_date + '_' + current_time + '.csv'
    url = 'https://api.inaturalist.org/v1/observations'

    df = pd.DataFrame(columns=['taxon_id', 'radius', 'date', 'count'])

    with open(temporal_txt_path, 'a') as temp_file:
            temp_file.write('taxon_id,radius,date,count\n')

    i = 0
    for i in trange(havenoradiuses.shape[0]):
        taxon_id = havenoradiuses.iloc[i, 0]
        radius = havenoradiuses.iloc[i, 1]
        date_to = havenoradiuses.iloc[i, 2]
        if radius == 0:
            lat, lng, radius_param = '', '', ''
        else:
            lat = '55.494403'
            lng = '38.644662'
            radius_param = radius
        params = {
            'verifiable': 'true',
            'taxon_id': taxon_id,
            'd2': date_to,
            'lat': lat,
            'lng': lng,
            'radius': radius_param,
            'order': 'desc',
            'order_by': 'created_at',
            'only_id': 'true'
        }
        response = requests.get(url=url, params=params)
        count = response.json()['total_results']
        df.loc[i] = [taxon_id, radius, date_to, count]

        with open(temporal_txt_path, 'a') as temp_file:
            temp_file.write(','.join([str(taxon_id), str(radius), date_to, str(count)])+'\n')

        if response.status_code != 200:
            raise Exception('Oh response is not 200, it is ',
                            response.status_code)
        i += 1
        time.sleep(1)
        print(
            f'Done loop {i}: r{radius}, date {date_to}, response {response.status_code}, count {count}, id {taxon_id}')

    return df


In [9]:
update_radius(df_tax_tocheck=taxons_df_finish, radiuses=radiuses,
              radiuses_dataset_path=radiuses_dataset_path, date_to=start_date)
update_radius(df_tax_tocheck=taxons_df_finish, radiuses=radiuses,
              radiuses_dataset_path=radiuses_dataset_path, date_to=finish_date)

Going to check in csv: 1996 values
Total in csv: 4002 values 
Already in csv: 1996 values
No need to fetch from iNat
Total in csv: 4002 values 
No duplicates in csv
True
Going to check in csv: 1996 values
Total in csv: 4002 values 
Already in csv: 1996 values
No need to fetch from iNat
Total in csv: 4002 values 
No duplicates in csv
True


In [10]:
def get_radius_info(taxons_df_start, taxons_df_finish, radiuses_dataset_path, start_date, finish_date):

        start_date = str(start_date)
        finish_date = str(finish_date)
        df_tax_csv = pd.read_csv(
                index_col='taxon_id', filepath_or_buffer=radiuses_dataset_path)
        sort_list = [('pos_finish',radiuses[i]) for i in range(0, len(radiuses))]

        def get_radius(taxons_list, date_to):
                df = pd.DataFrame()
                df.index = taxons_list.index
                df_tax_csv_todate = df_tax_csv[(df_tax_csv['date'] == date_to) & (df_tax_csv.index.isin(taxons_df_finish.index))]
                df = df.merge(df_tax_csv_todate, how='left', left_index=True, right_index=True)
                return df

        df_start = get_radius(taxons_df_finish, start_date)
        # df_start = get_radius(taxons_df_start, start_date)
        df_finish = get_radius(taxons_df_finish, finish_date)

        df_compact = pd.concat([df_start, df_finish]).pivot(index=None, columns=['date', 'radius'], values='count').copy()
        
        def get_cool_indexes(column):
                series_sorted = column.sort_values()
                positions = series_sorted.ne(series_sorted.shift()).cumsum()
                positions = positions.align(column)[0]
                return positions 


        df_diff = df_compact.loc[:,finish_date] - df_compact.loc[:,start_date]
        df_diff = pd.concat([df_diff], keys=['count_diff'], axis=1)
        
        df_pos_start = df_compact[df_compact[(start_date, radiuses[-1])].notnull()][start_date].apply(get_cool_indexes, axis=0)
        df_pos_start = pd.concat([df_pos_start], keys=['pos_start'], axis=1)

        df_pos_finish = df_compact[finish_date].apply(get_cool_indexes, axis=0)
        df_pos_finish = pd.concat([df_pos_finish], keys=['pos_finish'], axis=1)
        df_pos_finish = df_pos_finish.sort_values(by=sort_list)

        # df_pos_diff = df_pos_start['pos_start'] - df_pos_finish['pos_finish']
        # df_pos_diff = pd.concat([df_pos_diff], keys=['pos_diff'], axis=1)

        # df_compact = df_compact.drop(start_date, axis=1)

        # df_pos = pd.concat([df_compact,  df_diff, df_pos_start, df_pos_finish, df_pos_diff], axis=1)
        df_pos = pd.concat([df_compact,  df_diff, df_pos_start, df_pos_finish], axis=1)
        df_pos = df_pos.reindex(index=df_pos_finish.index)
        df_pos = sort_index(df_pos)

        df_pos = df_pos.astype('Int64')    

        df_pos.loc[~df_pos.index.isin(taxons_df_start.index),('ifnew')] = 'new'
        taxons_df_finish.columns = (('taxon_rang', ''), ('taxon_name', ''), ('common_name', ''))
        df_pos = pd.concat([df_pos, taxons_df_finish], axis=1)
        df_pos.drop([str(start_date),'pos_start'], axis=1, inplace=True, level=0)

        return df_pos

def sort_index (df):
        sort_dict_order = ['result_pos', 'result_name','result_count', str(start_date), str(finish_date), 'count_diff', 'pos_start', 'pos_finish', 'pos_diff', 'ifnew', 'taxon_rang', 'taxon_name', 'common_name'] + list(radiuses)
        sort_dict = {sort_dict_order[i]:i for i in range(len(sort_dict_order))}
        df = df.sort_index(axis=1, level=[0,1], key=lambda x: x.map(sort_dict))
        return df

In [11]:
def formatcount(count, count_diff=False):

    if count > 1000000:
        count = str(round(count/1000000,1)) + 'M'
    elif count > 10000:
        count = str(int(count/1000)) + 'K'
    elif count > 1000:
        count = str(round(count/1000, 1)) + 'K'

    if (count and count_diff):
        count = '+' + str(count)
    elif ((not count) and count_diff):
        count = ''

    return count


def addresult_columns(df):

    for columnset in ['result_name', 'result_pos', 'result_count']:
        columnset_df = pd.DataFrame(
            columns=pd.MultiIndex.from_product([[columnset], radiuses], names=['date', 'radius']),
            index=df.index)
        columnset_df[columnset][radiuses[0]]=df[str(finish_date)][radiuses[0]]
        df = pd.concat([df,columnset_df], axis=1)

    df = sort_index(df)

    return df

def add_apply_formats(df):

    df['taxon_rang'] = df['taxon_rang'].apply(lambda x: ranks_enru.get(x))
    df[finish_date] = df[finish_date].applymap(formatcount)
    df['count_diff'] = df['count_diff'].applymap(formatcount, count_diff=True)

    return df

In [12]:
finish_date = str(finish_date)
df_pos = get_radius_info(taxons_df_start, taxons_df_finish, radiuses_dataset_path, start_date, finish_date)
df_pos

date,2022-11-30,2022-11-30,2022-11-30,2022-11-30,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,common_name
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
900207,1,3,27,28,1,1,1,1,1,1,1,1,new,taxon_species_name,Alchemilla conglobata,Манжетка шариковидно-скученная
1070919,1,3,67,251,0,0,0,7,1,1,3,5,,taxon_species_name,Attulus floricola,
556219,1,4,145,258,0,1,10,33,1,2,12,6,,taxon_species_name,Podosphaera aphanis,
227391,1,7,91,311,1,1,15,32,1,4,4,8,new,taxon_species_name,Peltigera malacea,Пельтигера мягкая
210241,1,10,66,1115,1,2,24,331,1,5,2,46,new,taxon_genus_name,Chromelosporium,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,355,28651,279291,2996359,123,6628,67374,386625,81,426,487,495,new,taxon_subclass_name,Agaricomycetidae,Агарикомицетовые
790553,363,22340,132621,653432,29,429,3445,19234,82,424,485,492,new,taxon_subfamily_name,Apioideae,Сельдерейные
47434,507,27114,191714,1553788,51,657,6809,118146,83,425,486,494,new,taxon_family_name,Poaceae,Мятликовые
50814,605,49155,477589,5068911,208,9521,95363,596226,84,427,488,496,,taxon_class_name,Agaricomycetes,Агарикомицеты


In [13]:
df_added_formats = add_apply_formats(df_pos)
df_added_formats

date,2022-11-30,2022-11-30,2022-11-30,2022-11-30,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,common_name
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
900207,1,3,27,28,+1,+1,+1,+1,1,1,1,1,new,Вид,Alchemilla conglobata,Манжетка шариковидно-скученная
1070919,1,3,67,251,,,,+7,1,1,3,5,,Вид,Attulus floricola,
556219,1,4,145,258,,+1,+10,+33,1,2,12,6,,Вид,Podosphaera aphanis,
227391,1,7,91,311,+1,+1,+15,+32,1,4,4,8,new,Вид,Peltigera malacea,Пельтигера мягкая
210241,1,10,66,1.1K,+1,+2,+24,+331,1,5,2,46,new,Род,Chromelosporium,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,355,28K,279K,3.0M,+123,+6.6K,+67K,+386K,81,426,487,495,new,Подкласс,Agaricomycetidae,Агарикомицетовые
790553,363,22K,132K,653K,+29,+429,+3.4K,+19K,82,424,485,492,new,Подсемейство,Apioideae,Сельдерейные
47434,507,27K,191K,1.6M,+51,+657,+6.8K,+118K,83,425,486,494,new,Семейство,Poaceae,Мятликовые
50814,605,49K,477K,5.1M,+208,+9.5K,+95K,+596K,84,427,488,496,,Класс,Agaricomycetes,Агарикомицеты


In [14]:
df_added_res_cols = addresult_columns(df_pos)
df_added_res_cols

date,result_pos,result_pos,result_pos,result_pos,result_name,result_name,result_name,result_name,result_count,result_count,result_count,result_count,2022-11-30,2022-11-30,2022-11-30,2022-11-30,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,common_name
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2
900207,,,,,,,,,,,,,1,3,27,28,+1,+1,+1,+1,1,1,1,1,new,Вид,Alchemilla conglobata,Манжетка шариковидно-скученная
1070919,,,,,,,,,,,,,1,3,67,251,,,,+7,1,1,3,5,,Вид,Attulus floricola,
556219,,,,,,,,,,,,,1,4,145,258,,+1,+10,+33,1,2,12,6,,Вид,Podosphaera aphanis,
227391,,,,,,,,,,,,,1,7,91,311,+1,+1,+15,+32,1,4,4,8,new,Вид,Peltigera malacea,Пельтигера мягкая
210241,,,,,,,,,,,,,1,10,66,1.1K,+1,+2,+24,+331,1,5,2,46,new,Род,Chromelosporium,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094814,,,,,,,,,,,,,355,28K,279K,3.0M,+123,+6.6K,+67K,+386K,81,426,487,495,new,Подкласс,Agaricomycetidae,Агарикомицетовые
790553,,,,,,,,,,,,,363,22K,132K,653K,+29,+429,+3.4K,+19K,82,424,485,492,new,Подсемейство,Apioideae,Сельдерейные
47434,,,,,,,,,,,,,507,27K,191K,1.6M,+51,+657,+6.8K,+118K,83,425,486,494,new,Семейство,Poaceae,Мятликовые
50814,,,,,,,,,,,,,605,49K,477K,5.1M,+208,+9.5K,+95K,+596K,84,427,488,496,,Класс,Agaricomycetes,Агарикомицеты


In [15]:
def joininfo(row):
    
    taxon_name = row['taxon_name'].item()
    common_name = row['common_name'].item()
    ifnew = row['ifnew'].item()
    pos_finish = row['pos_finish'].astype('string')
    taxon_rang = row['taxon_rang'].item()
    count_diff = row['count_diff'].astype('string')
    count = row[finish_date].astype('string')
    taxon_id = row.name

    taxon_name_link = f'<a href=https://www.inaturalist.org/taxa/{taxon_id}>{str(taxon_name)}</a>'
    common_name_link = f'<a href=https://www.inaturalist.org/taxa/{taxon_id}>{str(common_name)}</a>'

    

    if pd.isnull(common_name):
        bold = f'<b>{taxon_name_link}</b>'
        italic = ''
    else: 
        bold = f'<b>{common_name_link}</b>'
        italic = f'<br><i>{taxon_name}</i>'

    if pd.isnull(ifnew):
        ifnew = ''
    else:
        ifnew = '+new'

    if taxon_rang == 'Вид':
        taxon_rang = ''
    else:
        # taxon_rang = f'<div style="color:gray"> ({taxon_rang})</div>'
        taxon_rang = f' {taxon_rang}'

    row['result_name'] = bold + taxon_rang + italic
    row['result_pos'] = pos_finish + ifnew
    row['result_count'] = count + ' ' + count_diff


    return row

In [16]:
df_info = df_added_res_cols.apply(joininfo, axis=1)
df_info.head(5)

date,result_pos,result_pos,result_pos,result_pos,result_name,result_name,result_name,result_name,result_count,result_count,result_count,result_count,2022-11-30,2022-11-30,2022-11-30,2022-11-30,count_diff,count_diff,count_diff,count_diff,pos_finish,pos_finish,pos_finish,pos_finish,ifnew,taxon_rang,taxon_name,common_name
radius,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,20,200,2000,0,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
taxon_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2
900207,1+new,1+new,1+new,1+new,<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,<b><a href=https://www.inaturalist.org/taxa/90...,1 +1,3 +1,27 +1,28 +1,1,3,27,28,1.0,1.0,1.0,1,1,1,1,1,new,Вид,Alchemilla conglobata,Манжетка шариковидно-скученная
1070919,1,1,3,5,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,<b><a href=https://www.inaturalist.org/taxa/10...,1,3,67,251 +7,1,3,67,251,,,,7,1,1,3,5,,Вид,Attulus floricola,
556219,1,2,12,6,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,<b><a href=https://www.inaturalist.org/taxa/55...,1,4 +1,145 +10,258 +33,1,4,145,258,,1.0,10.0,33,1,2,12,6,,Вид,Podosphaera aphanis,
227391,1+new,4+new,4+new,8+new,<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,<b><a href=https://www.inaturalist.org/taxa/22...,1 +1,7 +1,91 +15,311 +32,1,7,91,311,1.0,1.0,15.0,32,1,4,4,8,new,Вид,Peltigera malacea,Пельтигера мягкая
210241,1+new,5+new,2+new,46+new,<b><a href=https://www.inaturalist.org/taxa/21...,<b><a href=https://www.inaturalist.org/taxa/21...,<b><a href=https://www.inaturalist.org/taxa/21...,<b><a href=https://www.inaturalist.org/taxa/21...,1 +1,10 +2,66 +24,1.1K +331,1,10,66,1.1K,1.0,2.0,24.0,331,1,5,2,46,new,Род,Chromelosporium,


In [17]:
def sort_separate(df, show_positions):
    
    radiuse_array = np.asarray(radiuses)
    dataframes = list()

    count_col_name = list()

    for radius in radiuses:
        
        sort_list = [('pos_finish',radiuse_array[i]) for i in range(0, len(radiuse_array))]
        df = df.sort_values(by=sort_list, ignore_index=True)


        if radius: 
            count_col_name = f'Количество наблюдений<br>в радиусе {radius} км'
        else:
            count_col_name = f'Количество наблюдений<br>во всём iNat'

        df_sorted = pd.DataFrame(columns=['#','Название таксона', count_col_name])
        df_sorted.iloc[:,0] = df['result_pos'][radius]
        df_sorted.iloc[:,1] = df['result_name'][radius]
        df_sorted.iloc[:,2] = df['result_count'][radius]

        df_sorted = df_sorted.iloc[0:show_positions,:]
        dataframes.append(df_sorted)

        radiuse_array = np.roll(radiuse_array,-1)

    return dataframes

raritets = sort_separate(df_info, show_positions)

In [18]:
def raritets_html(raritets):
  
  for i in range(0, len(radiuses)):
    radius = radiuses[i]
    htmlname = 'output/raritets_' + str(radius) + '.html'
    df_to_export = raritets[i]
    df_to_export.to_html(htmlname, header=True, index=False, escape=False, justify='center', border=None)

    with open(htmlname, 'r') as file :
      filedata = file.read()

    filedata = filedata.replace(' class="dataframe"', '') # Replace the target string
    filedata = filedata.replace('<th>', '<th  style="vertical-align:top">')
    filedata = filedata.replace('+new', '<b style="font-size:62%;color:green">&nbsp;&nbsp;NEW</b>')
    filedata = re.sub('\+([0-9]+)',r'<b style="font-size:62%;color:green">&nbsp;&nbsp;&#8593;\1</b>',filedata)

    with open(htmlname, 'w') as file: # Write the file out again
      file.write(filedata)

In [19]:
raritets_html(raritets)