### Creation of Bird dataset

Packages used for this environment:

In [1]:
import pandas as pd  
import xarray as xr
import mgrs  

### Loading Bird data  
Load the birds csv file in alignment with GBIF (filtered by the year 2005)

In [2]:
bird_data_url = "https://drive.google.com/file/d/13eVxCsP7bjXcTigO_CE4DvN_1jMQi2eR/view?usp=sharing"
bird_data_download_url = 'https://drive.google.com/uc?id=' + bird_data_url.split('/')[-2]

bird_data_download_url

'https://drive.google.com/uc?id=13eVxCsP7bjXcTigO_CE4DvN_1jMQi2eR'

Create a dataframe 

In [3]:
birds = pd.read_csv(bird_data_download_url,sep=',',encoding='latin-1',low_memory=False)

In [4]:
birds  #print the dataframe

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,GBIFKey,Feld9,VALUE,UNIT,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,5232162,,1.0,Number,s,r,"mehrmals rufend, 18.06",


Add a new column. Provide or use 'Time' as the column name. You need to convert DATE column a datetime type and a specific format.

In [9]:
birds['time'] = pd.to_datetime(birds['DATE'].astype(str), format='%Y%m%d')
birds

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,GBIFKey,Feld9,VALUE,UNIT,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS,time,latitude_mean
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21,14.452050
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21,14.452050
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21,14.452050
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21,14.452050
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,2494422,,2.0,Number,r,,,,2005-05-21,14.452050
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30,14.439388
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30,14.439388
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,5231438,,1.0,Number,s,,mehrmals,,2005-05-30,14.439388
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,5232162,,1.0,Number,s,r,"mehrmals rufend, 18.06",,2005-10-22,14.439388


In [10]:
#to print the full summary of the dataframe
birds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185664 entries, 0 to 185663
Data columns (total 20 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   ID                       185664 non-null  int64         
 1   STATION_CODE             185664 non-null  object        
 2   westBoundingCoordinate   185664 non-null  object        
 3   eastBoundingCoordinate   185664 non-null  object        
 4   northBoundingCoordinate  185664 non-null  object        
 5   southBoundingCoordinate  185664 non-null  object        
 6   DATE                     185664 non-null  int64         
 7   IM_CODE                  185664 non-null  int64         
 8   IM_SPEC_NAME             185664 non-null  object        
 9   GBIFscientificName       185664 non-null  object        
 10  GBIFKey                  185664 non-null  object        
 11  Feld9                    0 non-null       float64       
 12  VALUE           

In [7]:
# Define a custom function to combine columns
def compute_lat_mean(row):
    return (float(str(row['westBoundingCoordinate']).replace(",",".")) + float(str(row['eastBoundingCoordinate']).replace(",",".")))/2.
def compute_lon_mean(row):
    return (float(str(row['northBoundingCoordinate']).replace(",",".")) + float(str(row['southBoundingCoordinate']).replace(",",".")))/2.

In [8]:
# Apply the custom function to create a new column called 'latitude_mean'for the latitude values
birds['latitude_mean'] = birds.apply(compute_lat_mean, axis=1)

In [11]:
# Apply the custom function to create a new column called 'longitude_mean'for the longitude values
birds['longitude_mean'] = birds.apply(compute_lon_mean, axis=1)

### Save to Cube in native grid

Create a dataset

In [12]:
dset = birds.rename(columns={"longitude_mean": "lon", "latitude_mean": "lat"}).groupby(['time','lat','lon','VALUE']).size().rename('count').to_xarray().to_dataset()
dset

Save the dataset in netCDF format

In [13]:
dset.to_netcdf("birds_native.nc")

### MGRS 100m grid

In [15]:
# Define a custom function to combine columns
# Precision	Size of grid side
# 5	1m
# 4	10m
# 3	100m
# 2	1000m (1km)
# 1	10000m (10km)
# 0	100000m (100km)
def to_mgrs_compute(row):
    return mgrs.MGRS().toMGRS(row['latitude_mean'], row['longitude_mean'],4)

def from_mgrs_compute_lat(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[0]
# Define a custom function to combine columns
def from_mgrs_compute_lon(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[1]

In [16]:
# Apply the custom function to create a new column 'MGRS'
birds['MGRS'] = birds.apply(to_mgrs_compute, axis=1)

Add the columns lat and lon derived from grid custom function to the birds dataframe.
lat/lon represent the centre of the grid point

In [20]:
birds['lon'] = birds.apply(from_mgrs_compute_lon, axis=1)
birds['lat'] = birds.apply(from_mgrs_compute_lat, axis=1)

In [21]:
birds

Unnamed: 0,ID,STATION_CODE,westBoundingCoordinate,eastBoundingCoordinate,northBoundingCoordinate,southBoundingCoordinate,DATE,IM_CODE,IM_SPEC_NAME,GBIFscientificName,...,BEHAVIOUR_1,BEHAVIOUR_2,REMARKS,ACOUSTICS,time,latitude_mean,longitude_mean,MGRS,lon,lat
0,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
1,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
2,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
3,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
4,1,0005F01,1445205,1445205,4784122,4784122,20050521,160027,Fringilla coelebs,Fringilla coelebs Linnaeus,...,r,,,,2005-05-21,14.452050,47.841220,38PRA0632699618,47.841211,14.452047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185659,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185660,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185661,2175,1718F01,14439388,14439388,47844173,47844173,20050530,160059,Troglodytes troglodytes,Troglodytes troglodytes (Linnaeus,...,s,,mehrmals,,2005-05-30,14.439388,47.844173,38PRA0666298220,47.844164,14.439384
185662,2176,1718F01,14439388,14439388,47844173,47844173,20051022,160029,Glaucidium passerinum,Glaucidium passerinum (Linnaeus,...,s,r,"mehrmals rufend, 18.06",,2005-10-22,14.439388,47.844173,38PRA0666298220,47.844164,14.439384


### Convert to Xarray to get datacube

In [22]:
#dset = birds.groupby(['time', 'lat', 'lon', 'IM_CODE']).size().rename('count').to_xarray().to_dataset()

Count the occurrence  using the VALUE column

In [23]:
dset = birds.groupby(['time', 'lat', 'lon', 'VALUE']).size().rename('count').to_xarray().to_dataset()

In [24]:
dset

Save into netCDF format to keep the datacube

In [25]:
dset.to_netcdf("birds_MGRS100m.nc")

Open the output dataset to verify the results

In [31]:
dset_mgrs = xr.open_dataset("birds_MGRS100m.nc")

In [32]:
dset_mgrs