In [1]:
import h3
import pandas as pd
import numpy as np

In [7]:
# Read and concatnate data into a single dataframe
df = pd.DataFrame({})

for year in range(2000, 2024, 2):
  df_year = pd.read_csv(f'../../data/birds/{year}-{year+1}.csv')
  df = pd.concat([df, df_year])

df.info()

Unnamed: 0,ScientificName,TaxonID,Date,Latitude,Longitude,Municipality
0,Strix aluco,http://tun.fi/MX.28998,2000-01-01 - 2000-12-31,60.3409,25.2425,Sipoo
1,Fringilla coelebs,http://tun.fi/MX.36237,2001-07-24,60.177,24.202,Siuntio
2,Certhia familiaris,http://tun.fi/MX.34616,2000-05-14,60.4421,24.519,Vihti
3,Cyanistes caeruleus,http://tun.fi/MX.34574,2000-04-25,61.593,27.3188,Mikkeli
4,Corvus monedula,http://tun.fi/MX.37142,2001-04-22,60.177,24.202,Siuntio


In [9]:
# Drop NAs from Lat and Lon. Otherwise h3 cannot work with these.
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
df.head()

Unnamed: 0,ScientificName,TaxonID,Date,Latitude,Longitude,Municipality
0,Strix aluco,http://tun.fi/MX.28998,2000-01-01 - 2000-12-31,60.3409,25.2425,Sipoo
1,Fringilla coelebs,http://tun.fi/MX.36237,2001-07-24,60.177,24.202,Siuntio
2,Certhia familiaris,http://tun.fi/MX.34616,2000-05-14,60.4421,24.519,Vihti
3,Cyanistes caeruleus,http://tun.fi/MX.34574,2000-04-25,61.593,27.3188,Mikkeli
4,Corvus monedula,http://tun.fi/MX.37142,2001-04-22,60.177,24.202,Siuntio


In [32]:
# Begin gridding using h3
def geo_to_h3(row):
  return h3.geo_to_h3(lat=row['Latitude'], lng=row['Longitude'], resolution = 5)

df['h3_cell'] = df.apply(geo_to_h3, axis=1)
df_h3grouped = df.reset_index(drop=False)
df_h3grouped

Unnamed: 0,index,ScientificName,TaxonID,Date,Latitude,Longitude,Municipality,h3_cell
0,0,Strix aluco,http://tun.fi/MX.28998,2000-01-01 - 2000-12-31,60.340900,25.242500,Sipoo,851126c7fffffff
1,1,Fringilla coelebs,http://tun.fi/MX.36237,2001-07-24,60.177000,24.202000,Siuntio,85089947fffffff
2,2,Certhia familiaris,http://tun.fi/MX.34616,2000-05-14,60.442100,24.519000,Vihti,85089977fffffff
3,3,Cyanistes caeruleus,http://tun.fi/MX.34574,2000-04-25,61.593000,27.318800,Mikkeli,8511202bfffffff
4,4,Corvus monedula,http://tun.fi/MX.37142,2001-04-22,60.177000,24.202000,Siuntio,85089947fffffff
...,...,...,...,...,...,...,...,...
11393007,987433,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-07,68.925050,26.778422,Inari,8508cd47fffffff
11393008,987434,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-21,69.018273,26.737819,Inari,8508cd47fffffff
11393009,987435,Motacilla flava,http://tun.fi/MX.32180,2022-05-19,61.226294,28.811891,Imatra,8511234ffffffff
11393010,987436,Delichon urbicum,http://tun.fi/MX.32163,2022-05-19,61.221876,28.797762,Imatra,8511234ffffffff


In [33]:
# Aggragate observations with similar h3 index into a list
df_agg = df_h3grouped.groupby('h3_cell').index.agg(list).to_frame("ids").reset_index()
df_agg['count'] = df_agg['ids'].apply(lambda row: len(row))
df_agg

Unnamed: 0,h3_cell,ids,count
0,85012603fffffff,"[83371, 83379, 83388, 83391, 83392, 83393, 833...",1450
1,85012613fffffff,"[83372, 83373, 83374, 83375, 83376, 83377, 833...",1165
2,85012617fffffff,"[1012023, 1106452, 1106453, 1106454, 1106455, ...",52
3,8501261bfffffff,"[4865, 4866, 5194, 5195, 653607, 2681, 4502, 4...",1365
4,85012643fffffff,"[709475, 709476, 709477, 709478, 709479, 70948...",447
...,...,...,...
2063,85112ed3fffffff,"[196349, 530183, 537374, 569130, 582496, 61454...",2895
2064,85112ed7fffffff,"[197417, 205764, 403088, 530176, 530177, 70028...",2062
2065,85112edbfffffff,"[196347, 196348, 268174, 268175, 268176, 66772...",3742
2066,851135a7fffffff,[647965],1


In [34]:
# Find neighbours using k-ring
df_agg['neighbors'] = df_agg['h3_cell'].apply(lambda h3_index: h3.k_ring(h3_index, 1))
df_agg

Unnamed: 0,h3_cell,ids,count,neighbors
0,85012603fffffff,"[83371, 83379, 83388, 83391, 83392, 83393, 833...",1450,"{8501260ffffffff, 8501261bfffffff, 85012617fff..."
1,85012613fffffff,"[83372, 83373, 83374, 83375, 83376, 83377, 833...",1165,"{8501261bfffffff, 85012617fffffff, 8501268bfff..."
2,85012617fffffff,"[1012023, 1106452, 1106453, 1106454, 1106455, ...",52,"{850126abfffffff, 85012617fffffff, 850126bbfff..."
3,8501261bfffffff,"[4865, 4866, 5194, 5195, 653607, 2681, 4502, 4...",1365,"{8501261bfffffff, 850126cffffffff, 850126c7fff..."
4,85012643fffffff,"[709475, 709476, 709477, 709478, 709479, 70948...",447,"{85012647fffffff, 8501264bfffffff, 8501264ffff..."
...,...,...,...,...
2063,85112ed3fffffff,"[196349, 530183, 537374, 569130, 582496, 61454...",2895,"{85112edbfffffff, 85112ec3fffffff, 85112ed7fff..."
2064,85112ed7fffffff,"[197417, 205764, 403088, 530176, 530177, 70028...",2062,"{85112e8bfffffff, 85112ec3fffffff, 85112e9bfff..."
2065,85112edbfffffff,"[196347, 196348, 268174, 268175, 268176, 66772...",3742,"{85112edbfffffff, 85112337fffffff, 85112ec3fff..."
2066,851135a7fffffff,[647965],1,"{851122dbfffffff, 851135b7fffffff, 851135affff..."
