In [1]:
#import os
import math
import fiona 
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
from shapely.geometry import Polygon, Point, mapping
from explode import explode
from coord import coord

wgs84= {'init' :'EPSG:4326'}

#SET COUNTRY/CRS <-------------------------------------------------------------------------------CHANGE
countries = ['TUR', 'EPSG:5637'],['ESP', 'EPSG:2062'],['HRV', 'EPSG:3765'],['ITA', 'EPSG:7794'],['CYP', 'EPSG:6312'],['MLT', 'EPSG:3034'],['FRA', 'EPSG:2154'],['ALB', 'EPSG:6962'] 

def simrdwn(n):
    #open file
    f = '../targets_2_simrdwn/' + n[0] + '/'
    file = f + 'predictions_' + n[0] + '.csv'
    detections = pd.read_csv(file, delimiter=',', header=0)
    print('country:', n[0])
    print(len(detections), 'total SIMRDWN predictions')

    #georeference prediction bounding boxes and convert to centroid
    selected = detections

    lon_list, lat_list, radius_list, diameter_list = [], [], [], []
    for i in range(len(selected)):
        img = detections['img_file'].iloc[i]

    #extract bottom right coordinate from image name
        br_lon = float(img.split('_')[4] + '.' + img.split('_')[5]) #lon = x                                       
        br_lat = float(img.split('_')[2] + '.' + img.split('_')[3]) #lat = y

        #extract top left coordinate from image name
        tl_lon = float(img.split('_')[8] + '.' + (img.split('_')[9]).split('.')[0]) #lon = x
        tl_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y    

    #     #FOR GREECE ONLY - extract bottom right coordinate from image name
    #     br_lon = float(img.split('_')[8] + '.' + img.split('_')[9]) #lon = x                                       
    #     br_lat = float(img.split('_')[6] + '.' + img.split('_')[7]) #lat = y

    #     #extract top left coordinate from image name
    #     tl_lon = float(img.split('_')[12] + '.' + (img.split('_')[13]).split('.')[0]) #lon = x
    #     tl_lat = float(img.split('_')[10] + '.' + img.split('_')[11]) #lat = y

        #image dimensions
        img_w = detections['img_width'].iloc[i]
        img_h = detections['img_height'].iloc[i]
        width = abs(br_lon - tl_lon) 
        height = abs(tl_lat - br_lat)

        #decimal degrees per pixel
        res_x = width / img_w #image width
        res_y = height / img_h #image height

        #res
        lat = tl_lat * math.pi / 180
        res = 156543.04 * math.cos(lat) / (2 ** 18)

        #convert bounding box to centroid
        xmin = detections['xmin'].iloc[i]
        ymin = detections['ymin'].iloc[i]
        xmax = detections['xmax'].iloc[i]
        ymax = detections['ymax'].iloc[i]
        x = (xmin + xmax) / 2
        y = (ymin + ymax) / 2

        #convert centroid point to lat/lon   
        x_center = tl_lon + (x * res_x) 
        y_center = tl_lat - (y * res_y) 

        #estimate radius of detection
        w = xmax-xmin 
        h = ymax-ymin 
        radius = (((w+h)/2)/4)*1.1
        diameter = radius * 2
        lon_list.append(x_center)
        lat_list.append(y_center)
        radius_list.append(radius)
        diameter_list.append(diameter)

    #add attributes to df
    d2 = selected.reset_index(drop=True) #super important
    d2 = d2.drop(columns=['label'])
    d2['x']=lon_list
    d2['y']=lat_list
    d2['radius']=np.round(radius_list,2)
    d2['diameter']=np.round(diameter_list,2)
    print(len(d2), 'total SIMRDWN predictions')

    #add geometry information to df 
    geometry = [Point(i) for i in zip(lon_list, lat_list)]
    d2 = gpd.GeoDataFrame(d2, geometry=geometry, crs=wgs84)
    d2 = d2.to_crs({'init': n[1]})

    #select predictions within search area
    # search_area = gpd.read_file('../0_search_areas/4_search_area/search_area_100m_' + n + '.shp')
    # search_area = search_area.to_crs({'init': n[1]})

    # d2 = sjoin(d2, search_area, how='inner', op='within')
    # print(len(d2), 'predictions within search area')

    #select predictions by diameter
    d = 55
    d2 = d2[d2['diameter'] <= d]
    geometry = [Point(i) for i in zip(d2['x'], d2['y'])]

    d2 = d2[['radius', 'geometry']]
    print(len(d2), 'total SIMRDWN predictions less than or equal to', d, 'meters')

    #aggregate predictions 
    buffer = gpd.GeoDataFrame(geometry = d2.buffer(10)) #buffer by 10 meters
    buffer['Dissolve'] = 0
    buffer_dis = buffer.dissolve(by='Dissolve')
    buffer_exploded = explode(buffer_dis)    
    print(len(d2), 'predictions aggregated to', len(buffer_exploded), 'predictions')

    #calculate mean radius of aggregated predictions
    d2_ag = gpd.sjoin(d2, buffer_exploded, how="inner", op='intersects')
    centroids = gpd.GeoDataFrame(geometry = buffer_exploded.centroid, crs=wgs84)
    centroids['radius']=d2_ag.groupby('index_right')['radius'].mean()
    centroids.crs={'init' : n[1]}
    print(len(centroids), 'predictions')

    #generate farmsite polygons
    b = 100
    buffer = gpd.GeoDataFrame(geometry = centroids.buffer(b)) #buffer by 100m
    buffer['Dissolve'] = 0
    buffer_dis = buffer.dissolve(by='Dissolve')
    buffer_exploded = explode(buffer_dis)    
    buffer_exploded.crs={'init' : n[1]}

    #add number of predictions per farmsite to farmsite polygons
    count=sjoin(centroids, buffer_exploded, how='inner', op='within')
    buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
    buffer_exploded['cage count']=count.groupby('index_right')['index_right'].count()
    buffer_exploded["farm ID"] = buffer_exploded.index + 1 #add farm ID

    print(len(buffer_exploded), 'total farmsite predictions based on farmsites')

    #select farmsites by number of predictions
    p = 4 #number of predictions per farmsite
    farmsites = buffer_exploded[buffer_exploded['cage count'] >= p]
    print(len(farmsites), 'farm site predictions containing', p, 'or more net pen predictions')

    #select predictions by farmsites
    select = farmsites
    select['predictions'] = 0
    select = select.dissolve(by='predictions')
    mask = centroids.within(select.loc[0, 'geometry'])
    farmsites=farmsites.drop(['predictions'], axis=1)
    d2_clip = centroids.loc[mask]

    print(len(d2_clip), 'net pen predictions within farm sites')
    print(d2_clip.crs)

    #add id and farm_id to predictions
    d2_clip_id=sjoin(farmsites, d2_clip, how='right', op='intersects')
    d2_clip_id=d2_clip_id.reset_index(drop=True)
    d2_clip_id["cage ID"] = d2_clip_id.index + 1 
    d2_clip_id['diameter']=d2_clip_id['radius'] * 2

    d2_clip_id['radius']=round(d2_clip_id['radius'],2)
    d2_clip_id['diameter']=round(d2_clip_id['diameter'],2)

    d2_clip=d2_clip_id[['farm ID', 'cage ID', 'radius', 'diameter', 'geometry']]
    print(len(d2_clip), 'net pen predictions')

    #generate farmsite extents
    envelope = gpd.GeoDataFrame(geometry = farmsites.envelope)
    farmsites2=farmsites.copy()
    farmsites2['geometry']=envelope['geometry']
    farmsites2=farmsites2[['farm ID', 'cage count', 'geometry']]
    print(len(farmsites2), 'farm site predictions')

    #exclusion of false positives
    path= f + n[0] + '_exclude.kml'
    gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
    exclude = gpd.read_file(path, driver='KML')
    exclude = exclude.to_crs({'init': n[1]})

    #create exclusion mask
    exclude['Dissolve'] = 0
    exclude_dis = exclude.dissolve(by='Dissolve')

    #eliminate net pen false positives
    mask = ~d2_clip.within(exclude_dis.loc[0, 'geometry'])
    d2_clip = d2_clip.loc[mask]
    print('final number of predicted net pens:', len(d2_clip))

    #eliminate farmsite false positives
    mask2 = ~farmsites2.within(exclude_dis.loc[0, 'geometry'])
    farmsites2 = farmsites2.loc[mask2]
    print('final number of predicted farm sites:', len(farmsites2))

    #generate farmsite centroids
    farmsites3 = farmsites2.copy()
    farm_pts = gpd.GeoDataFrame(farmsites3, geometry = farmsites3.centroid)
    
    #generate buffer and envelope for bing aquistion
    farm_pts_2 = farm_pts.copy()
    centroid_buffer = gpd.GeoDataFrame(geometry = farm_pts_2.buffer(500))
    envelope = gpd.GeoDataFrame(farmsites3, geometry = centroid_buffer.envelope)
    envelope = envelope['geometry'].to_crs(epsg=4326)
    #extract lat/long for each square polygon (envelope)
    coord_list = []
    for i in envelope.index:
        coords = (mapping(envelope.geometry[i])['coordinates'])
        coord_list.append(coords)

    #generate CSVs of bing targets and index
    #combine x/y point groups
    coord_all = []
    for i in range(len(coord_list)):
        coord_group = coord(coord_list[i]) #function to extract and format x/y points
        coord_all.append(coord_group) 
    targets = pd.concat(coord_all)
    print('total coordinates (5 per detection):', len(targets))

    targets.to_csv(f + n[0] + '.csv', index = None, header=True)

    index = pd.DataFrame(envelope.index)
    index = index[0] + 1
    index.to_csv(f + n[0] + '_index.csv', index = None, header=False)

    #generate net pen buffers
    def buffer(row):
         return row.geometry.buffer(row.radius)   
    d2_clip2 = d2_clip.copy()
    d2_clip2.crs
    buff = d2_clip2['geometry'] = d2_clip2.apply(buffer, axis=1)
    circles = gpd.GeoDataFrame(d2_clip2, geometry = buff, crs={'init': n[1]})
    
    # #export all centroids
    d2 = d2.to_crs({'init': n[1]})
    d2.to_file(f + n[0] + '_simrdwn.shp')
    d2['geometry'] = d2['geometry'].to_crs(epsg=4326)
    d2.to_file(f + n[0] + '_simrdwn.geojson', driver='GeoJSON')

    # #export buffered centroids
    circles.to_file(f + n[0] + '_simrdwn_pens.shp')
    circles['geometry'] = circles['geometry'].to_crs(epsg=4326)
    circles.to_file(f + n[0] + '_simrdwn_pens.geojson', driver='GeoJSON')

    #export centroids within farmsites
    d2_clip.to_file(f + n[0] + '_simrdwn_pts.shp')
    d2_clip['geometry'] = d2_clip['geometry'].to_crs(epsg=4326)
    d2_clip.to_file(f + n[0] + '_simrdwn_pts.geojson', driver='GeoJSON')

    # #export farmsites extents
    farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.shp')
    farmsites2['geometry'] = farmsites2['geometry'].to_crs(epsg=4326)
    farmsites2.to_file(f + n[0] + '_simrdwn_farmsites_ext.geojson', driver='GeoJSON')

    # #export farmsites points
    farm_pts=farm_pts_2
    farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.shp')
    farm_pts['geometry'] = farm_pts['geometry'].to_crs(epsg=4326)
    farm_pts.to_file(f + n[0] + '_simrdwn_farm_pts.geojson', driver='GeoJSON')

for i in countries:
    simrdwn(i)

country: TUR
13290 total SIMRDWN predictions
13290 total SIMRDWN predictions
10589 total SIMRDWN predictions less than or equal to 55 meters
10589 predictions aggregated to 3316 predictions


  warn('CRS of frames being joined does not match!')


3316 predictions
684 total farmsite predictions based on farmsites
146 farm site predictions containing 4 or more net pen predictions


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2663 net pen predictions within farm sites
{'init': 'EPSG:5637'}
2663 net pen predictions
146 farm site predictions
final number of predicted net pens: 2067
final number of predicted farm sites: 104
total coordinates (5 per detection): 520
country: ESP
5260 total SIMRDWN predictions
5260 total SIMRDWN predictions
1222 total SIMRDWN predictions less than or equal to 55 meters
1222 predictions aggregated to 850 predictions
850 predictions
527 total farmsite predictions based on farmsites
31 farm site predictions containing 4 or more net pen predictions
227 net pen predictions within farm sites
{'init': 'EPSG:2062'}
227 net pen predictions
31 farm site predictions
final number of predicted net pens: 29
final number of predicted farm sites: 3
total coordinates (5 per detection): 15
country: HRV
7548 total SIMRDWN predictions
7548 total SIMRDWN predictions
4640 total SIMRDWN predictions less than or equal to 55 meters
4640 predictions aggregated to 1250 predictions
1250 predictions
507 tota