In [24]:
#import os
import math
import fiona 
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
from shapely.geometry import Polygon, Point
from explode import explode

In [25]:
#SET COUNTRY/CRS <-------------------------------------------------------------------------------CHANGE
n = ['TUR', 'EPSG:5637'] #turkey
# n = ['GRC', 'EPSG:2100'] #greece
# n = ['ESP', 'EPSG:2062'] #spain
# n = ['HRV', 'EPSG:3765'] #croatia
# n = ['ITA', 'EPSG:7794'] #italy
# n = ['CYP', 'EPSG:6312'] #cyprus
# n = ['MLT', 'EPSG:3034'] #malta
# n = ['FRA', 'EPSG:2154'] #france
# n = ['ALB', 'EPSG:6962'] #albania
wgs84= {'init' :'EPSG:4326'}

#open file
f = '../targets_2_simrdwn/' + n[0] + '/'
file = f + 'predictions_' + n[0] + '.csv'
detections = pd.read_csv(file, delimiter=',', header=0)
print(len(detections), 'total SIMRDWN predictions')
#detections.head()

13290 total SIMRDWN predictions


In [26]:
#georeference prediction bounding boxes and convert to centroid
selected = detections

lon_list, lat_list, radius_list, diameter_list = [], [], [], []
for i in range(len(selected)):
    img = detections['img_file'].iloc[i]

#     #extract bottom right coordinate from image name
    br_lon = float(img.split('_')[4] + '.' + img.split('_')[5]) #lon = x                                       
    br_lat = float(img.split('_')[2] + '.' + img.split('_')[3]) #lat = y
    
    #extract top left coordinate from image name
    tl_lon = float(img.split('_')[8] + '.' + (img.split('_')[9]).split('.')[0]) #lon = x
    tl_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y    
    
#     #FOR GREECE ONLY - extract bottom right coordinate from image name
#     br_lon = float(img.split('_')[8] + '.' + img.split('_')[9]) #lon = x                                       
#     br_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y
    
#     #extract top left coordinate from image name
#     tl_lon = float(img.split('_')[12] + '.' + (img.split('_')[13]).split('.')[0]) #lon = x
#     tl_lat = float(img.split('_')[10] + '.' + img.split('_')[11]) #lat = y
    
    #image dimensions
    img_w = detections['img_width'].iloc[i]
    img_h = detections['img_height'].iloc[i]
    width = abs(br_lon - tl_lon) 
    height = abs(tl_lat - br_lat)
    
    #decimal degrees per pixel
    res_x = width / img_w #image width
    res_y = height / img_h #image height
    
    #res
    lat = tl_lat * math.pi / 180
    res = 156543.04 * math.cos(lat) / (2 ** 18)
    
    #convert bounding box to centroid
    xmin = detections['xmin'].iloc[i]
    ymin = detections['ymin'].iloc[i]
    xmax = detections['xmax'].iloc[i]
    ymax = detections['ymax'].iloc[i]
    x = (xmin + xmax) / 2
    y = (ymin + ymax) / 2
    
    #convert centroid point to lat/lon   
    x_center = tl_lon + (x * res_x) 
    y_center = tl_lat - (y * res_y) 
    
    #estimate radius of detection
    w = xmax-xmin 
    h = ymax-ymin 
    radius = (((w+h)/2)/4)*1.1
    diameter = radius * 2
    lon_list.append(x_center)
    lat_list.append(y_center)
    radius_list.append(radius)
    diameter_list.append(diameter)

In [27]:
#add attributes to df
d2 = selected.reset_index(drop=True) #super important
d2 = d2.drop(columns=['label'])
d2['x']=lon_list
d2['y']=lat_list
d2['radius']=np.round(radius_list,2)
d2['diameter']=np.round(diameter_list,2)
print(len(d2), 'total SIMRDWN predictions')

#add geometry information to df 
geometry = [Point(i) for i in zip(lon_list, lat_list)]
d2 = gpd.GeoDataFrame(d2, geometry=geometry, crs=wgs84)
d2 = d2.to_crs({'init': n[1]})
print(d2.crs)
d2.head()

13290 total SIMRDWN predictions
{'init': 'EPSG:5637'}


Unnamed: 0,img_file,img_width,img_height,xmin,ymin,xmax,ymax,x,y,radius,diameter,geometry
0,image_18_38_46098_27_08505_38_47028_27_07399.jpeg,2061,2214,30,613,113,704,27.074374,38.467514,23.92,47.85,POINT (5457336.632317584 1507076.414288142)
1,image_18_36_80047_28_24948_36_80986_28_2388.jpeg,1991,2186,1005,718,1101,891,28.244448,36.806404,36.99,73.98,POINT (5599828.388183808 1353999.777046428)
2,image_18_38_57436_26_35616_38_5836_26_34498.jpeg,2084,2203,1005,1214,1100,1295,26.350626,38.578338,24.2,48.4,POINT (5393933.816647355 1504941.096989041)
3,image_18_38_57436_26_35616_38_5836_26_34498.jpeg,2084,2203,1175,1366,1262,1466,26.351517,38.577661,25.71,51.42,POINT (5394024.773244331 1504885.534582943)
4,image_18_38_57436_26_35616_38_5836_26_34498.jpeg,2084,2203,1181,1208,1261,1301,26.35153,38.578338,23.79,47.58,POINT (5394009.618732309 1504958.149597286)


In [28]:
#select predictions within search area
# search_area = gpd.read_file('../0_search_areas/4_search_area/search_area_100m_' + n + '.shp')
# search_area = search_area.to_crs({'init': n[1]})

# d2 = sjoin(d2, search_area, how='inner', op='within')
# print(len(d2), 'predictions within search area')

#select predictions by diameter
d = 55
d2 = d2[d2['diameter'] <= d]
geometry = [Point(i) for i in zip(d2['x'], d2['y'])]

d2 = d2[['radius', 'geometry']]
print(len(d2), 'total SIMRDWN predictions less than or equal to', d, 'meters')
d2.head()

10589 total SIMRDWN predictions less than or equal to 55 meters


Unnamed: 0,radius,geometry
0,23.92,POINT (5457336.632317584 1507076.414288142)
2,24.2,POINT (5393933.816647355 1504941.096989041)
3,25.71,POINT (5394024.773244331 1504885.534582943)
4,23.79,POINT (5394009.618732309 1504958.149597286)
5,24.34,POINT (5393993.114654925 1505030.460836815)


In [29]:
#aggregate predictions 
buffer = gpd.GeoDataFrame(geometry = d2.buffer(10)) #buffer by 10 meters
buffer['Dissolve'] = 0
buffer_dis = buffer.dissolve(by='Dissolve')
buffer_exploded = explode(buffer_dis)    
print(len(d2), 'predictions aggregated to', len(buffer_exploded), 'predictions')

10589 predictions aggregated to 3316 predictions


In [30]:
#calculate mean radius of aggregated predictions
d2_ag = gpd.sjoin(d2, buffer_exploded, how="inner", op='intersects')
centroids = gpd.GeoDataFrame(geometry = buffer_exploded.centroid, crs=wgs84)
centroids['radius']=d2_ag.groupby('index_right')['radius'].mean()
centroids.crs={'init' : n[1]}
print(len(centroids), 'predictions')
centroids.head()

3316 predictions


Unnamed: 0,geometry,radius
0,POINT (5585256.743817649 1334762.655111267),14.02
1,POINT (5585402.066031021 1334864.4896085),13.2
2,POINT (5585343.659485435 1335108.628923445),10.105
3,POINT (5684442.50065573 1336382.959582468),16.5
4,POINT (5684522.643003204 1336641.468140449),11.186667


In [31]:
#generate farmsite polygons
b = 100
buffer = gpd.GeoDataFrame(geometry = centroids.buffer(b)) #buffer by 100m
buffer['Dissolve'] = 0
buffer_dis = buffer.dissolve(by='Dissolve')
buffer_exploded = explode(buffer_dis)    
buffer_exploded.crs={'init' : n[1]}
buffer_exploded.shape

(684, 1)

In [32]:
#add number of predictions per farmsite to farmsite polygons
count=sjoin(centroids, buffer_exploded, how='inner', op='within')
buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
buffer_exploded["farm ID"] = buffer_exploded.index + 1 #add farm ID

print(len(buffer_exploded), 'total farmsite predictions based on farmsites')
buffer_exploded.head()

684 total farmsite predictions based on farmsites


Unnamed: 0,geometry,cage count,farm ID
0,"POLYGON ((5585356.743817649 1334762.655111267,...",2,1
1,"POLYGON ((5585443.659485435 1335108.628923445,...",1,2
2,"POLYGON ((5684542.50065573 1336382.959582468, ...",1,3
3,"POLYGON ((5684622.643003204 1336641.468140449,...",1,4
4,"POLYGON ((5561514.39750424 1339062.930456469, ...",1,5


In [33]:
#select farmsites by number of predictions
p = 4 #number of predictions per farmsite
farmsites = buffer_exploded[buffer_exploded['cage count'] >= p]
print(len(farmsites), 'farm site predictions containing', p, 'or more net pen predictions')
farmsites.head()

146 farm site predictions containing 4 or more net pen predictions


Unnamed: 0,geometry,cage count,farm ID
57,"POLYGON ((5633338.431933922 1360451.209251807,...",6,58
72,"POLYGON ((5523619.944363367 1372915.76649205, ...",5,73
73,"POLYGON ((5524092.456876466 1371532.797600841,...",13,74
75,"POLYGON ((5804822.691990278 1365594.318564056,...",9,76
80,"POLYGON ((5632466.068409337 1361403.594569023,...",5,81


In [34]:
#select predictions by farmsites
select = farmsites
select['predictions'] = 0
select = select.dissolve(by='predictions')
mask = centroids.within(select.loc[0, 'geometry'])
farmsites=farmsites.drop(['predictions'], axis=1)
d2_clip = centroids.loc[mask]

print(len(d2_clip), 'net pen predictions within farm sites')
print(d2_clip.crs)
d2_clip.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


2663 net pen predictions within farm sites
{'init': 'EPSG:5637'}


Unnamed: 0,geometry,radius
57,POINT (5633238.431933922 1360451.209251807),8.66
59,POINT (5633411.177551221 1360524.496418205),8.8
60,POINT (5633602.239087233 1360558.792601296),10.86
62,POINT (5633117.398942553 1360647.30999176),7.7
63,POINT (5633735.87109067 1360616.836398021),10.493333


In [35]:
#add id and farm_id to predictions
d2_clip_id=sjoin(farmsites, d2_clip, how='right', op='intersects')
d2_clip_id=d2_clip_id.reset_index(drop=True)
d2_clip_id["cage ID"] = d2_clip_id.index + 1 
d2_clip_id['diameter']=d2_clip_id['radius'] * 2

d2_clip_id['radius']=round(d2_clip_id['radius'],2)
d2_clip_id['diameter']=round(d2_clip_id['radius'],2)

d2_clip=d2_clip_id[['farm ID', 'cage ID', 'radius', 'diameter', 'geometry']]
print(len(d2_clip), 'net pen predictions')
d2_clip.head()

2663 net pen predictions


Unnamed: 0,farm ID,cage ID,radius,diameter,geometry
0,58,1,8.66,8.66,POINT (5633238.431933922 1360451.209251807)
1,58,2,8.8,8.8,POINT (5633411.177551221 1360524.496418205)
2,58,3,10.86,10.86,POINT (5633602.239087233 1360558.792601296)
3,58,4,10.49,10.49,POINT (5633735.87109067 1360616.836398021)
4,58,5,25.99,25.99,POINT (5633410.31053256 1360674.129430174)


In [36]:
#generate farmsite extents
envelope = gpd.GeoDataFrame(geometry = farmsites.envelope)
farmsites2=farmsites.copy()
farmsites2['geometry']=envelope['geometry']
farmsites2=farmsites2[['farm ID', 'cage count', 'geometry']]
print(len(farmsites2), 'farm site predictions')
farmsites2.head()

146 farm site predictions


Unnamed: 0,farm ID,cage count,geometry
57,58,6,"POLYGON ((5633138.431933922 1360351.209251807,..."
72,73,5,"POLYGON ((5523423.882022151 1372843.324593243,..."
73,74,13,"POLYGON ((5523811.518937072 1371432.797600841,..."
75,76,9,"POLYGON ((5804590.57047427 1365513.892462137, ..."
80,81,5,"POLYGON ((5632270.016081243 1361331.192644842,..."


In [37]:
#exclusion of false positives
path= f + n[0] + '_exclude.kml'
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
exclude = gpd.read_file(path, driver='KML')
exclude = exclude.to_crs({'init': n[1]})

#create exclusion mask
exclude['Dissolve'] = 0
exclude_dis = exclude.dissolve(by='Dissolve')

#eliminate net pen false positives
mask = ~d2_clip.within(exclude_dis.loc[0, 'geometry'])
d2_clip = d2_clip.loc[mask]
print('final number of predicted net pens:', len(d2_clip))

#eliminate farmsite false positives
mask2 = ~farmsites2.within(exclude_dis.loc[0, 'geometry'])
farmsites2 = farmsites2.loc[mask2]
print('final number of predicted farm sites:', len(farmsites2))

final number of predicted net pens: 2115
final number of predicted farm sites: 114


In [38]:
# #generate net pen buffers
def buffer(row):
     return row.geometry.buffer(row.radius)   
    
d2_clip2 = d2_clip.copy()
d2_clip2.crs

buff = d2_clip2['geometry'] = d2_clip2.apply(buffer, axis=1)
circles = gpd.GeoDataFrame(d2_clip2, geometry = buff, crs={'init': n[1]})

In [39]:
#generate farmsite centroids
farmsites3 = farmsites2.copy()
farm_pts = gpd.GeoDataFrame(farmsites3, geometry = farmsites3.centroid)

In [40]:
print(len(circles), circles.crs)
print(len(d2), d2.crs)
print(len(d2_clip), d2_clip.crs)
print(len(farmsites2), farmsites2.crs)
print(len(farm_pts), farm_pts.crs)

2115 {'init': 'EPSG:5637'}
10589 {'init': 'EPSG:5637'}
2115 {'init': 'EPSG:5637'}
114 {'init': 'EPSG:5637'}
114 {'init': 'EPSG:5637'}


In [41]:
circles.head()

Unnamed: 0,farm ID,cage ID,radius,diameter,geometry
6,73,7,9.9,9.9,"POLYGON ((5523533.782022151 1372943.324593243,..."
7,73,8,10.04,10.04,"POLYGON ((5523547.95910089 1372972.835951038, ..."
8,73,9,10.04,10.04,"POLYGON ((5523572.54337193 1373038.277504713, ..."
9,73,10,9.49,9.49,"POLYGON ((5523588.658827233 1373073.01907898, ..."
10,73,11,10.59,10.59,"POLYGON ((5523610.739337819 1373133.237878393,..."


In [42]:
d2.head()

Unnamed: 0,radius,geometry
0,23.92,POINT (5457336.632317584 1507076.414288142)
2,24.2,POINT (5393933.816647355 1504941.096989041)
3,25.71,POINT (5394024.773244331 1504885.534582943)
4,23.79,POINT (5394009.618732309 1504958.149597286)
5,24.34,POINT (5393993.114654925 1505030.460836815)


In [43]:
d2_clip.head()

Unnamed: 0,farm ID,cage ID,radius,diameter,geometry
6,73,7,9.9,9.9,POINT (5523523.882022151 1372943.324593243)
7,73,8,10.04,10.04,POINT (5523537.91910089 1372972.835951038)
8,73,9,10.04,10.04,POINT (5523562.50337193 1373038.277504713)
9,73,10,9.49,9.49,POINT (5523579.168827233 1373073.01907898)
10,73,11,10.59,10.59,POINT (5523600.149337819 1373133.237878393)


In [44]:
farmsites2.head()

Unnamed: 0,farm ID,cage count,geometry
72,73,5,"POLYGON ((5523423.882022151 1372843.324593243,..."
73,74,13,"POLYGON ((5523811.518937072 1371432.797600841,..."
75,76,9,"POLYGON ((5804590.57047427 1365513.892462137, ..."
81,82,81,"POLYGON ((5630533.352746231 1361082.721710565,..."
85,86,8,"POLYGON ((5524415.986724827 1374167.773846559,..."


In [45]:
# #export all centroids
d2 = d2.to_crs({'init': n[1]})
d2.to_file(f + n[0] + '_simrdwn.shp')
d2['geometry'] = d2['geometry'].to_crs(epsg=4326)
d2.to_file(f + n[0] + '_simrdwn.geojson', driver='GeoJSON')

# #export buffered centroids
circles.to_file(f + n[0] + '_simrdwn_pens.shp')
circles['geometry'] = circles['geometry'].to_crs(epsg=4326)
circles.to_file(f + n[0] + '_simrdwn_pens.geojson', driver='GeoJSON')

#export centroids within farmsites
d2_clip.to_file(f + n[0] + '_simrdwn_pts.shp')
d2_clip['geometry'] = d2_clip['geometry'].to_crs(epsg=4326)
d2_clip.to_file(f + n[0] + '_simrdwn_pts.geojson', driver='GeoJSON')

# #export farmsites extents
farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.shp')
farmsites2['geometry'] = farmsites2['geometry'].to_crs(epsg=4326)
farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.geojson', driver='GeoJSON')

# #export farmsites points
farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.shp')
farm_pts['geometry'] = farm_pts['geometry'].to_crs(epsg=4326)
farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.geojson', driver='GeoJSON')