In [5]:
#import os
import math
import fiona 
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
from shapely.geometry import Polygon, Point, mapping
from explode import explode
from coord import coord

In [6]:
#SET COUNTRY/CRS <-------------------------------------------------------------------------------CHANGE
# n = ['TUR', 'EPSG:5637'] #turkey
# n = ['GRC', 'EPSG:2100'] #greece
n = ['ESP', 'EPSG:2062'] #spain
# n = ['HRV', 'EPSG:3765'] #croatia
# n = ['ITA', 'EPSG:7794'] #italy
# n = ['CYP', 'EPSG:6312'] #cyprus
# n = ['MLT', 'EPSG:3034'] #malta
# n = ['FRA', 'EPSG:2154'] #france
# n = ['ALB', 'EPSG:6962'] #albania
wgs84= {'init' :'EPSG:4326'}

#open file
f = '../targets_2_simrdwn/3_new/' + n[0] + '/'
file = f + 'predictions_' + n[0] + '.csv'
detections = pd.read_csv(file, delimiter=',', header=0)
print(len(detections), 'total SIMRDWN predictions')
#detections.head()

5260 total SIMRDWN predictions


In [7]:
#georeference prediction bounding boxes and convert to centroid
selected = detections

lon_list, lat_list, radius_list, diameter_list = [], [], [], []
for i in range(len(selected)):
    img = detections['img_file'].iloc[i]

    #extract bottom right coordinate from image name
    br_lon = float(img.split('_')[4] + '.' + img.split('_')[5]) #lon = x                                       
    br_lat = float(img.split('_')[2] + '.' + img.split('_')[3]) #lat = y
    
    #extract top left coordinate from image name
    tl_lon = float(img.split('_')[8] + '.' + (img.split('_')[9]).split('.')[0]) #lon = x
    tl_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y    
    
#     #FOR GREECE ONLY - extract bottom right coordinate from image name
#     br_lon = float(img.split('_')[8] + '.' + img.split('_')[9]) #lon = x                                       
#     br_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y
    
#     #extract top left coordinate from image name
#     tl_lon = float(img.split('_')[12] + '.' + (img.split('_')[13]).split('.')[0]) #lon = x
#     tl_lat = float(img.split('_')[10] + '.' + img.split('_')[11]) #lat = y
    
    #image dimensions
    img_w = detections['img_width'].iloc[i]
    img_h = detections['img_height'].iloc[i]
    width = abs(br_lon - tl_lon) 
    height = abs(tl_lat - br_lat)
    
    #decimal degrees per pixel
    res_x = width / img_w #image width
    res_y = height / img_h #image height
    
    #res
    lat = tl_lat * math.pi / 180
    res = 156543.04 * math.cos(lat) / (2 ** 18)
    
    #convert bounding box to centroid
    xmin = detections['xmin'].iloc[i]
    ymin = detections['ymin'].iloc[i]
    xmax = detections['xmax'].iloc[i]
    ymax = detections['ymax'].iloc[i]
    x = (xmin + xmax) / 2
    y = (ymin + ymax) / 2
    
    #convert centroid point to lat/lon   
    x_center = tl_lon + (x * res_x) 
    y_center = tl_lat - (y * res_y) 
    
    #estimate radius of detection
    w = xmax-xmin 
    h = ymax-ymin 
    radius = (((w+h)/2)/4)*1.1
    diameter = radius * 2
    lon_list.append(x_center)
    lat_list.append(y_center)
    radius_list.append(radius)
    diameter_list.append(diameter)

In [8]:
#add attributes to df
d2 = selected.reset_index(drop=True) #super important
d2 = d2.drop(columns=['label'])
d2['x']=lon_list
d2['y']=lat_list
d2['radius']=np.round(radius_list,2)
d2['diameter']=np.round(diameter_list,2)
print(len(d2), 'total SIMRDWN predictions')

#add geometry information to df 
geometry = [Point(i) for i in zip(lon_list, lat_list)]
d2 = gpd.GeoDataFrame(d2, geometry=geometry, crs=wgs84)
d2 = d2.to_crs({'init': n[1]})
print(d2.crs)
#d2.head()

5260 total SIMRDWN predictions
{'init': 'EPSG:2062'}


In [9]:
#select predictions within search area
# search_area = gpd.read_file('../0_search_areas/4_search_area/search_area_100m_' + n + '.shp')
# search_area = search_area.to_crs({'init': n[1]})

# d2 = sjoin(d2, search_area, how='inner', op='within')
# print(len(d2), 'predictions within search area')

#select predictions by diameter
d = 55
d2 = d2[d2['diameter'] <= d]
geometry = [Point(i) for i in zip(d2['x'], d2['y'])]

d2 = d2[['radius', 'geometry']]
print(len(d2), 'total SIMRDWN predictions less than or equal to', d, 'meters')
#d2.head()

1222 total SIMRDWN predictions less than or equal to 55 meters


In [10]:
#aggregate predictions 
buffer = gpd.GeoDataFrame(geometry = d2.buffer(10)) #buffer by 10 meters
buffer['Dissolve'] = 0
buffer_dis = buffer.dissolve(by='Dissolve')
buffer_exploded = explode(buffer_dis)    
print(len(d2), 'predictions aggregated to', len(buffer_exploded), 'predictions')

1222 predictions aggregated to 850 predictions


In [11]:
#calculate mean radius of aggregated predictions
d2_ag = gpd.sjoin(d2, buffer_exploded, how="inner", op='intersects')
centroids = gpd.GeoDataFrame(geometry = buffer_exploded.centroid, crs=wgs84)
centroids['radius']=d2_ag.groupby('index_right')['radius'].mean()
centroids.crs={'init' : n[1]}
print(len(centroids), 'predictions')
#centroids.head()

  warn('CRS of frames being joined does not match!')


850 predictions


In [12]:
#generate farmsite polygons
b = 100
buffer = gpd.GeoDataFrame(geometry = centroids.buffer(b)) #buffer by 100m
buffer['Dissolve'] = 0
buffer_dis = buffer.dissolve(by='Dissolve')
buffer_exploded = explode(buffer_dis)    
buffer_exploded.crs={'init' : n[1]}
buffer_exploded.shape

(527, 1)

In [13]:
#add number of predictions per farmsite to farmsite polygons
count=sjoin(centroids, buffer_exploded, how='inner', op='within')
buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
buffer_exploded["farm ID"] = buffer_exploded.index + 1 #add farm ID

print(len(buffer_exploded), 'total farmsite predictions based on farmsites')
#buffer_exploded.head()

527 total farmsite predictions based on farmsites


In [14]:
#select farmsites by number of predictions
p = 4 #number of predictions per farmsite
farmsites = buffer_exploded[buffer_exploded['cage count'] >= p]
print(len(farmsites), 'farm site predictions containing', p, 'or more net pen predictions')
#farmsites.head()

31 farm site predictions containing 4 or more net pen predictions


In [15]:
#select predictions by farmsites
select = farmsites
select['predictions'] = 0
select = select.dissolve(by='predictions')
mask = centroids.within(select.loc[0, 'geometry'])
farmsites=farmsites.drop(['predictions'], axis=1)
d2_clip = centroids.loc[mask]

print(len(d2_clip), 'net pen predictions within farm sites')
print(d2_clip.crs)
#d2_clip.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


227 net pen predictions within farm sites
{'init': 'EPSG:2062'}


In [16]:
#add id and farm_id to predictions
d2_clip_id=sjoin(farmsites, d2_clip, how='right', op='intersects')
d2_clip_id=d2_clip_id.reset_index(drop=True)
d2_clip_id["cage ID"] = d2_clip_id.index + 1 
d2_clip_id['diameter']=d2_clip_id['radius'] * 2

d2_clip_id['radius']=round(d2_clip_id['radius'],2)
d2_clip_id['diameter']=round(d2_clip_id['diameter'],2)

d2_clip=d2_clip_id[['farm ID', 'cage ID', 'radius', 'diameter', 'geometry']]
print(len(d2_clip), 'net pen predictions')
d2_clip.head()

227 net pen predictions


Unnamed: 0,farm ID,cage ID,radius,diameter,geometry
0,19,1,16.23,32.46,POINT (689919.8004147761 234025.2045451488)
1,19,2,15.4,30.8,POINT (689893.930380497 234128.2964443578)
2,19,3,14.44,28.88,POINT (689975.6102594684 234130.8334081308)
3,19,4,13.89,27.78,POINT (689764.0753690819 234177.018769359)
4,30,5,14.68,29.36,POINT (540441.464335561 236010.5871938617)


In [17]:
#generate farmsite extents
envelope = gpd.GeoDataFrame(geometry = farmsites.envelope)
farmsites2=farmsites.copy()
farmsites2['geometry']=envelope['geometry']
farmsites2=farmsites2[['farm ID', 'cage count', 'geometry']]
print(len(farmsites2), 'farm site predictions')
farmsites2.head()

31 farm site predictions


Unnamed: 0,farm ID,cage count,geometry
18,19,4,"POLYGON ((689664.0753690819 233925.2045451488,..."
29,30,7,"POLYGON ((540002.9713616917 235910.5871938617,..."
33,34,5,"POLYGON ((692026.3589401236 235610.9962224594,..."
85,86,4,"POLYGON ((693289.863692842 236355.5148529743, ..."
99,100,5,"POLYGON ((923736.3560987308 453142.8355842861,..."


In [18]:
#exclusion of false positives
path= f + n[0] + '_exclude.kml'
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
exclude = gpd.read_file(path, driver='KML')
exclude = exclude.to_crs({'init': n[1]})

#create exclusion mask
exclude['Dissolve'] = 0
exclude_dis = exclude.dissolve(by='Dissolve')

#eliminate net pen false positives
mask = ~d2_clip.within(exclude_dis.loc[0, 'geometry'])
d2_clip = d2_clip.loc[mask]
print('final number of predicted net pens:', len(d2_clip))

#eliminate farmsite false positives
mask2 = ~farmsites2.within(exclude_dis.loc[0, 'geometry'])
farmsites2 = farmsites2.loc[mask2]
print('final number of predicted farm sites:', len(farmsites2))

final number of predicted net pens: 29
final number of predicted farm sites: 3


In [19]:
#generate farmsite centroids
farmsites3 = farmsites2.copy()
farm_pts = gpd.GeoDataFrame(farmsites3, geometry = farmsites3.centroid)
farm_pts.head()

Unnamed: 0,farm ID,cage count,geometry
99,100,5,POINT (923909.2637902545 453295.3234419652)
118,119,18,POINT (918090.1659160672 448093.5808382252)
512,513,6,POINT (1165123.082400917 872053.4200517861)


In [20]:
d2_clip.head()

Unnamed: 0,farm ID,cage ID,radius,diameter,geometry
20,100,21,9.08,18.16,POINT (923836.3560987308 453242.8355842861)
21,100,22,8.94,17.88,POINT (923867.7209484867 453258.1059747139)
22,100,23,8.39,16.78,POINT (923891.7316953522 453304.0131081137)
23,100,24,8.66,17.32,POINT (923952.8767858517 453333.3249462657)
24,100,25,9.35,18.7,POINT (923982.171481778 453347.8112996442)


In [21]:
# #generate net pen buffers
def buffer(row):
     return row.geometry.buffer(row.radius)       
d2_clip2 = d2_clip.copy()
d2_clip2.crs

{'init': 'EPSG:2062'}

In [22]:
#generate buffer and envelope for bing aquistion
farm_pts_2 = farm_pts.copy()
centroid_buffer = gpd.GeoDataFrame(geometry = farm_pts_2.buffer(500))
envelope = gpd.GeoDataFrame(farmsites3, geometry = centroid_buffer.envelope)
envelope = envelope['geometry'].to_crs(epsg=4326)
#extract lat/long for each square polygon (envelope)
coord_list = []
for i in envelope.index:
    coords = (mapping(envelope.geometry[i])['coordinates'])
    coord_list.append(coords)
    
#generate CSVs of bing targets and index
#combine x/y point groups
coord_all = []
for i in range(len(coord_list)):
    coord_group = coord(coord_list[i]) #function to extract and format x/y points
    coord_all.append(coord_group) 
targets = pd.concat(coord_all)
print('total coordinates (5 per detection):', len(targets))

targets.to_csv(f + n[0] + '.csv', index = None, header=True)

index = pd.DataFrame(envelope.index)
index = index[0] + 1
index.to_csv(f + n[0] + '_index.csv', index = None, header=False)

total coordinates (5 per detection): 15


In [23]:
# #generate net pen buffers
def buffer(row):
     return row.geometry.buffer(row.radius)       
d2_clip2 = d2_clip.copy()
d2_clip2.crs
buff = d2_clip2['geometry'] = d2_clip2.apply(buffer, axis=1)
circles = gpd.GeoDataFrame(d2_clip2, geometry = buff, crs={'init': n[1]})
#d2_clip.head()

In [24]:
print(len(circles), circles.crs)
print(len(d2), d2.crs)
print(len(d2_clip), d2_clip.crs)
print(len(farmsites2), farmsites2.crs)
print(len(farm_pts), farm_pts.crs)

29 {'init': 'EPSG:2062'}
1222 {'init': 'EPSG:2062'}
29 {'init': 'EPSG:2062'}
3 {'init': 'EPSG:2062'}
3 {'init': 'EPSG:2062'}


In [25]:
#export all centroids
d2 = d2.to_crs({'init': n[1]})
d2.to_file(f + n[0] + '_simrdwn.shp')
d2['geometry'] = d2['geometry'].to_crs(epsg=4326)
d2.to_file(f + n[0] + '_simrdwn.geojson', driver='GeoJSON')

#export buffered centroids
circles.to_file(f + n[0] + '_simrdwn_pens.shp')
circles['geometry'] = circles['geometry'].to_crs(epsg=4326)
circles.to_file(f + n[0] + '_simrdwn_pens.geojson', driver='GeoJSON')

#export centroids within farmsites
d2_clip.to_file(f + n[0] + '_simrdwn_pts.shp')
d2_clip['geometry'] = d2_clip['geometry'].to_crs(epsg=4326)
d2_clip.to_file(f + n[0] + '_simrdwn_pts.geojson', driver='GeoJSON')

#export farmsites extents
farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.shp')
farmsites2['geometry'] = farmsites2['geometry'].to_crs(epsg=4326)
farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.geojson', driver='GeoJSON')

#export farmsites points
farm_pts = farm_pts_2
farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.shp')
farm_pts['geometry'] = farm_pts['geometry'].to_crs(epsg=4326)
farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.geojson', driver='GeoJSON')