# Bird dataset

In [1]:
import pandas as pd
import xarray as xr

In [2]:
import mgrs

## Birds joined file

In [3]:
bird_data_url = "https://drive.google.com/file/d/13eVxCsP7bjXcTigO_CE4DvN_1jMQi2eR/view?usp=sharing"
bird_data_download_url = 'https://drive.google.com/uc?id=' + bird_data_url.split('/')[-2]

bird_data_download_url

'https://drive.google.com/uc?id=13eVxCsP7bjXcTigO_CE4DvN_1jMQi2eR'

In [4]:
birds = pd.read_csv(bird_data_download_url,sep=',',encoding='latin-1',low_memory=False)

In [5]:
birds

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,GBIFKey,Feld9,VALUE,UNIT,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,5232162,,1.0,Number,s,r,"mehrmals rufend, 18.06",


In [6]:
birds['time'] = pd.to_datetime(birds['DATE'].astype(str), format='%Y%m%d')
birds

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,GBIFKey,Feld9,VALUE,UNIT,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS,time
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,5232162,,1.0,Number,s,r,"mehrmals rufend, 18.06",,2005-10-22


In [8]:
# Define a custom function to combine columns
def compute_lat_mean(row):
    return (float(str(row['westBoundingCoordinate']).replace(",",".")) + float(str(row['eastBoundingCoordinate']).replace(",",".")))/2.
def compute_lon_mean(row):
    return (float(str(row['northBoundingCoordinate']).replace(",",".")) + float(str(row['southBoundingCoordinate']).replace(",",".")))/2.

In [9]:
# Apply the custom function to create a new column 'latitude_mean'
birds['latitude_mean'] = birds.apply(compute_lat_mean, axis=1)

In [10]:
birds['longitude_mean'] = birds.apply(compute_lon_mean, axis=1)

## Save to Cube in native grid

In [11]:
dset = birds.rename(columns={"longitude_mean": "lon", "latitude_mean": "lat"}).groupby(['time', 'lat', 'lon', 'IM_CODE']).size().rename('count').to_xarray().to_dataset()
dset

In [12]:
dset.to_netcdf("birds_native.nc")

## MGRS 100m grid

In [13]:
pip install mgrs

Note: you may need to restart the kernel to use updated packages.


In [14]:
import mgrs

In [18]:
# Define a custom function to combine columns
# Precision	Size of grid side
# 5	1m
# 4	10m
# 3	100m
# 2	1000m (1km)
# 1	10000m (10km)
# 0	100000m (100km)
def to_mgrs_compute(row):
    return mgrs.MGRS().toMGRS(row['latitude_mean'], row['longitude_mean'],4)

def from_mgrs_compute_lat(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[0]
# Define a custom function to combine columns
def from_mgrs_compute_lon(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[1]

In [19]:
# Apply the custom function to create a new column 'MGRS'
birds['MGRS'] = birds.apply(to_mgrs_compute, axis=1)

In [20]:
birds['lon'] = birds.apply(from_mgrs_compute_lon, axis=1)
birds['lat'] = birds.apply(from_mgrs_compute_lat, axis=1)

In [21]:
birds

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,...,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS,time,latitude_mean,longitude_mean,MGRS,lon,lat
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,...,s,r,"mehrmals rufend, 18.06",,2005-10-22,14.439388,47.844173,38PRA0666298220,47.844164,14.439384


## Convert to Xarray to get datacube

In [22]:
dset = birds.groupby(['time', 'lat', 'lon', 'IM_CODE']).size().rename('count').to_xarray().to_dataset()

In [23]:
dset

## Save into netcdf to keep the datacube

In [24]:
dset.to_netcdf("birds_MGRS100m.nc")

In [25]:
import xarray as xr

In [26]:
dset = xr.open_dataset("birds_MGRS100m.nc")

In [27]:
dset