In [1]:
import geopandas as gpd
from utils import get_strip_id

### Churchill

In [2]:
whale_shp_path = r'E:\2022\Churchill_050166593010 & 050169967010\Churchill_annotations\30cm_Final\Churchill_Annotations.shp'
tile_shp_path = r'E:\2022\Churchill_050166593010 & 050169967010\050169967010_01\GIS_FILES\050169967010_01_TILE_SHAPE.shp'

whale_gdf = gpd.read_file(whale_shp_path)
tiles_gdf = gpd.read_file(tile_shp_path)
print('Coordinate Reference System of the shp files:', whale_gdf.crs.to_string(), tiles_gdf.crs.to_string())
print(f'{len(tiles_gdf)} tiles')
print(whale_gdf['conf_detec'].value_counts())
whale_gdf['ID'] = range(1, len(whale_gdf) + 1)
whale_gdf['Photo-ID'] = None
print(f'{len(whale_gdf)} whales')

# add 'Species' field. if 'conf_detec' == 1, then 'Species' = 'uncertain whale'; if 'conf_detec' == 2, then 'Species' = 'certain whale'
whale_gdf['Species'] = whale_gdf['conf_detec'].apply(lambda x: 'certain whale' if x == 2 else 'uncertain whale')
print(whale_gdf['Species'].value_counts())
whale_gdf.head(2)

Coordinate Reference System of the shp files: EPSG:4326 EPSG:4326
30 tiles
conf_detec
2    378
1    113
Name: count, dtype: int64
491 whales
Species
certain whale      378
uncertain whale    113
Name: count, dtype: int64


Unnamed: 0,ORIG_FID,length,location,population,Year,date,conf_detec,conf_measu,conf_vecto,social_gro,Longitude,Latitude,Strip_ID,geometry,ID,Photo-ID,Species
0,1,2.830853,Churchill River,Western Hudson Bay,2022,31072022,2,3,1,A,-94.276302,58.853473,P001,POINT (-94.2763 58.85347),1,,certain whale
1,2,1.722982,Churchill River,Western Hudson Bay,2022,31072022,1,1,1,B,-94.245892,58.846111,P001,POINT (-94.24589 58.84611),2,,uncertain whale


In [3]:
tiles_gdf.head(2)

Unnamed: 0,tileName,fileName,prodDesc,volNum,geometry
0,R1C1,22JUL31174035-P3DS_R1C1-050169967010_01_P002.TIF,P002,Vol. 1,"POLYGON ((-94.24669 58.8611, -94.1615 58.8619,..."
1,R1C2,22JUL31174035-P3DS_R1C2-050169967010_01_P002.TIF,P002,Vol. 1,"POLYGON ((-94.16149 58.8619, -94.0763 58.86263..."


In [4]:
# Assign Photo-ID to each annotation
for index, annotation in whale_gdf.iterrows():
    point = annotation['geometry']
    for tile_index, tile in tiles_gdf.iterrows():
        if tile['geometry'].contains(point) and annotation['Strip_ID']==get_strip_id(tile['fileName']):
            whale_gdf.at[index, 'Photo-ID'] = tile['fileName'].rstrip('.TIF')
            break

whale_gdf.head(1)

Unnamed: 0,ORIG_FID,length,location,population,Year,date,conf_detec,conf_measu,conf_vecto,social_gro,Longitude,Latitude,Strip_ID,geometry,ID,Photo-ID,Species
0,1,2.830853,Churchill River,Western Hudson Bay,2022,31072022,2,3,1,A,-94.276302,58.853473,P001,POINT (-94.2763 58.85347),1,22JUL31174054-P3DS_R1C1-050169967010_01_P001,certain whale


In [5]:
whale_gdf.to_file(r'E:\annotations_fmt\Churchill_2022_fmt.shp')

### CW21

In [6]:
# 2021
pan_dir = r'D:\Whale_Data\2021\Clearwater\014413001050_01_'
whale_strip_shp_path = r'D:\Whale_Data\Clearwater\Clearwater_Annotations-strip\CW21_Whale_Specified.shp'
whale_shp_path = r'D:\Whale_Data\2021\Clearwater\Annotations_NAD83_UTM19N_Amao\CS_20210907_points_B_Project.shp'
tile_shp_path = r'E:\2021\Clearwater\GIS_FILES\014413001050_01_TILE_SHAPE.shp'

whale_gdf = gpd.read_file(whale_shp_path)
whale_strip_gdf = gpd.read_file(whale_strip_shp_path)
whale_gdf['Photo-ID'] = whale_strip_gdf['Photo-ID']
whale_gdf = whale_gdf.rename(columns={'Id': 'ID'})
whale_gdf['ID'] = range(1, len(whale_gdf) + 1)

print(f"The coordinate system is {whale_gdf.crs.to_string()}.")

# Format the Species column
whale_gdf.loc[(whale_gdf['BS_BCAgree'].isin(['N HARP SEALS', 'N - HARP'])) | 
       (whale_gdf['AgreeBS'] == 'N - harp seals'), 'Species'] = 'harp seal'
whale_gdf.loc[whale_gdf['Confidence'] == 1, 'Species'] = 'uncertain whale'
whale_gdf.loc[whale_gdf['Confidence'] == 2, 'Species'] = 'certain whale'
whale_gdf.loc[~whale_gdf['Species'].isin(['harp seal', 'uncertain whale', 'certain whale']), 'Species'] = 'background'

print(whale_gdf['Species'].value_counts(), '\n')
# print(whale_gdf['Confidence'].value_counts(), '\n')
# print(whale_gdf['BS_BCAgree'].value_counts(), '\n')
# print(whale_gdf['AgreeBS'].value_counts(), '\n')

whale_gdf.head(3)

The coordinate system is EPSG:26919.
Species
background         539
harp seal          526
uncertain whale    215
certain whale      196
Name: count, dtype: int64 



Unnamed: 0,ID,Sighting,Confidence,Cloud_Fog,BSS,TileFID,Technician,Comment,Tile_PID,Lat,Long,Agree,AgreeBS,BS_BCAgree,geometry,Photo-ID,Species
0,1,whale,0,0,0,6150,CC,,DM24,66.5194,-67.9284,N,N - changed to 0,,POINT (547642.263 7378215.012),21SEP07162638-P3DS_R03C1-014413001050_01_P003,background
1,2,ice,0,0,0,6269,CC,,DN35,66.5113,-67.8702,Y,N - change to 0,,POINT (550245.959 7377352.54),21SEP07162638-P3DS_R03C2-014413001050_01_P003,background
2,3,whale,2,0,0,6597,CC,,DQ15,66.5137,-67.9678,Y,Y,Y,POINT (545900.596 7377547.442),21SEP07162638-P3DS_R03C1-014413001050_01_P003,certain whale


In [7]:
whale_gdf.to_file(r'E:\annotations_fmt\Clearwater_2021_fmt.shp')

### CW22

In [8]:
# # 2022
pan_dir = r'E:\2022\Clearwater&Kangilo_015379185020_01\015379185020_01_P001_PAN_MOS'
whale_shp_path = r'E:\2022\Clearwater&Kangilo_015379185020_01\WS_Clearwater&Kangilo_015379185020_01_annotations\Clearwater&Kangilo_015379185020_01.shp\Whale_DB_Clearwater_Kangilo_2022-08-01_015379185020_01_final.shp'
whale_gdf = gpd.read_file(whale_shp_path)
whale_gdf['Species'].value_counts()
print(whale_gdf['Species'].value_counts())
# rename the beluga in column 'Species' to 'certain whale'
whale_gdf['Species'] = whale_gdf['Species'].apply(lambda x: 'certain whale' if x == 'beluga' else x)
whale_gdf['Species'] = whale_gdf['Species'].apply(lambda x: 'uncertain whale' if x == 'maybe' else x)
whale_gdf['Species'] = whale_gdf['Species'].apply(lambda x: 'harp seal' if x == 'seal' else x)
print('After formatting,', whale_gdf['Species'].value_counts())
whale_gdf['Species'].value_counts()
whale_gdf.head(3)

Species
beluga        103
maybe          79
harp seals     17
seal           16
Name: count, dtype: int64
After formatting, Species
certain whale      103
uncertain whale     79
harp seals          17
harp seal           16
Name: count, dtype: int64


Unnamed: 0,ID,Longitude,Latitude,Species,Duplicate,Beaufort,Comment,Photo-ID,geometry
0,1,-67.21713,66.110227,uncertain whale,no,2,,22AUG01161709-P3DM_R09C5-015379185020_01_P001,POINT (-67.21713 66.11023)
1,2,-67.172506,66.093857,certain whale,no,2,,22AUG01161709-P3DM_R09C5-015379185020_01_P001,POINT (-67.17251 66.09385)
2,3,-67.180719,66.089997,harp seals,no,2,likely harp seals,22AUG01161709-P3DM_R09C5-015379185020_01_P001,POINT (-67.18072 66.09)


In [9]:
whale_gdf.to_file(r'E:\annotations_fmt\Clearwater_2022_fmt.shp')