# Read the predictions from the annot xml files and convert them into GIS shapefiles

In [None]:
# cell 2.0 - Run this load these functions
def get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,x,y):
    # supposing x and y are your pixel coordinate this 
    # is how to get the coordinate in space.
    posX = px_w * x + rot1 * y + xoffset
    posY = rot2 * x + px_h * y + yoffset

    # shift to the center of the pixel
    posX += px_w / 2.0
    posY += px_h / 2.0
    return posX,posY

def get_poly_from_geotif_with_x_y(geotif_fp,minx,miny,maxx,maxy):
    ds = gdal.Open(geotif_fp)
    # open the dataset and get the geo transform matrix

    xoffset, px_w, rot1, yoffset, rot2,px_h = ds.GetGeoTransform()

    #print("xoffset, px_w, rot1, yoffset, px_h, rot2",xoffset, px_w, rot1, yoffset, px_h, rot2)
    print("minx,miny,maxx,maxy",minx,miny,maxx,maxy)

    pos1x,pos1y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,miny)
    pos2x,pos2y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,maxy)
    pos3x,pos3y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,maxy)
    pos4x,pos4y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,miny)
    coords = [(pos1x,pos1y), (pos2x,pos2y), (pos3x,pos3y), (pos4x,pos4y)]

    #print("pos",pos1x,pos1y,pos2x,pos2y,pos3x,pos3y,pos4x,pos4y)
    poly = Polygon(coords)
    
    return poly 

In [None]:
# cell 2.1 - Run this to load a dictionary of files to process
# We have about 50,000 files to process.  Looping through them repeatedly takes a long time.
# Instead, create a dictionary of files indexed by area+"pan" (or pas).  Each entry holds a list of matching files
# This makes it easier to process these files by area.

import csv
from os import listdir
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200720T1614'

#make a dict of all the areas + pan (or pas)
area_crs_dict = {}
with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        #print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]
        pan=""
        pas=""
        pan = row['PAN']
        if(pan == "Y"):
            area_crs_dict[area+'pan'] = []
        pas = row['PAS']
        if(pas == "Y"):
            area_crs_dict[area+'pas'] = []

# Now that the dictionary is created, add all of the matching files as a list linked to the entry.
# This dictionary will be used below.
annot_prediction_folder = '/storage/images/'+construction_type+'/predictions/'+cfg_name+"/unknown/"
for annot_filename in listdir(annot_prediction_folder):
    annot_area = annot_filename[:3]
    annot_panorpas = annot_filename[3:6]    
    print(annot_filename, annot_area,annot_panorpas)
    area_node = area_crs_dict[annot_area+annot_panorpas]
    area_node.append(annot_filename)
print(area_crs_dict) 

In [None]:
# cell 2.2 
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200720T1614'
model_epoch='0030'


annot_prediction_folder = '/storage/images/'+construction_type+'/predictions/'+cfg_name+"/unknown/"
split_tifs_folder = '/home/student/charcoal_hearth_hill/images/split_tifs/'
# display image with masks and bounding boxes
from os import listdir


from xml.etree import ElementTree
#from mrcnn.utils import Dataset
#from mrcnn.visualize import display_instances
#from mrcnn.utils import extract_bboxes
#https://gis.stackexchange.com/questions/92207/split-a-large-geotiff-into-smaller-regions-with-python-and-gdal

import numpy
from osgeo import gdal, osr
import math
from itertools import chain
import geopandas as gpd
from shapely.geometry import Point, Polygon
import numpy as np
import gdalnumeric
import os
def put_preds_in_shp(state_area_num,state_area_num_crs, panorpas):

    pred_polys = gpd.GeoDataFrame()
    pred_polys['geometry'] = None
    pred_polys.crs = {'init':'epsg:'+str(state_area_num_crs)}
    #pred_polys.crs = {'init':'epsg:32128'}

    import cv2

    #Store the results in XML    
    class_names = construction_type

    # find all images

    pa = area_crs_dict[str(state_area_num)+panorpas]
    for annot_filename in pa:
    
        print(annot_filename)
        #process only the files for this state land area, since other areas may not match crs
        if annot_filename.startswith(state_area_num):
            tree = ElementTree.parse(annot_prediction_folder+annot_filename)
            print(annot_prediction_folder+annot_filename)
            #print(tree)
            # get the root of the document
            root = tree.getroot()
            # extract each bounding box
    
            fn_image = root.find('./filename').text
            #object_present = root.find('./object_present').text
            fn_base = fn_image[:10]
            print(fn_base)
            box_num=0
            for obj in root.findall('./object'):
                score = obj.find('score').text
    
                box = obj.find('bndbox')
                box_num=obj.find('number').text
                box_num_pad = "00"+str(box_num)
                box_num_pad = box_num_pad[-2:]
                #boxes_correct[str(box_num)] = correct
                xmin = int(box.find('xmin').text)
                ymin = int(box.find('ymin').text)
                xmax = int(box.find('xmax').text)
                ymax = int(box.find('ymax').text)
                if(ymin>ymax):
                    ytemp = ymin
                    ymin = ymax
                    ymax=ytemp
                if(xmin>xmax):
                    xtemp = xmin
                    xmin = xmax
                    xmax=xtemp            
                coors = [xmin, ymin, xmax, ymax]
                print(coors)
        
                pred_poly = get_poly_from_geotif_with_x_y(split_tifs_folder+fn_base+".tif",xmin,ymin,xmax,ymax)
                new_pp_row = {'id':fn_base+box_num_pad, 'geometry':pred_poly}
                pred_polys = pred_polys.append(new_pp_row, ignore_index=True)

#pred_polys = pred_polys.to_crs(epsg=32128)
    outfolder = os.path.join("/home/student/charcoal_hearth_hill/polys/", (cfg_name+"/"))
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)
    outfp = os.path.join(outfolder,(state_area_num + panorpas + "_predictions.shp"))
                         
# Write the data into that Shapefile
    if not pred_polys.empty:
        pred_polys.to_file(outfp)
        #pred_polys.head()
        pred_polys = pred_polys.to_crs({'init':'epsg:4326'})
        #pred_polys.head()
        outfp = os.path.join(outfolder,("4326_" + state_area_num + panorpas + "_predictions.shp"))
        # Write the data into that Shapefile
        pred_polys.to_file(outfp)

 

In [None]:
# cell 2.3 
import csv
with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]
        pan=""
        pas=""
        pan = row['PAN']
        if(pan == "Y"):
            put_preds_in_shp(str(area),32128,"pan")
        pas = row['PAS']
        if(pas == "Y"):
            put_preds_in_shp(str(area),32129,"pas")


In [None]:
# to be trashed.
pred_polys = pred_polys.to_crs({'init':'epsg:4326'})
outfp = "/storage/images/charcoal_hearth_hill/polys/4326_"+cfg_name+"_predictions.shp"
# Write the data into that Shapefile
pred_polys.to_file(outfp)




In [None]:
# I think this ended up being expensive (dictionary holding dictionaries)
# to be trashed.
import csv
#make a dict of all the areas
areas = {}
with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        #print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]
        panorpas = {}
        areas[area] = panorpas
print(areas)
#add the PANs or PASs to the dict
with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]
        panorpas_dict = areas[area]
        pan=""
        pas=""
        pan = row['PAN']
        if(pan == "Y"):
            panorpas_dict['pan'] = []
        pas = row['PAS']
        if(pas == "Y"):
            panorpas_dict['pas'] = []
print(areas)

In [None]:
# I think this ended up being expensive (dictionary holding dictionaries)
# to be trashed.
from os import listdir
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200720T1614'


annot_prediction_folder = '/storage/images/'+construction_type+'/predictions/'+cfg_name+"/unknown/"
for annot_filename in listdir(annot_prediction_folder):
    annot_area = annot_filename[:3]
    annot_panorpas = annot_filename[3:6]    
    print(annot_filename, annot_area,annot_panorpas)
    area_node = areas[annot_area]
    panorpas_node = area_node[annot_panorpas]
    panorpas_node.append(annot_filename)
print(areas)    
    


# Convert Polygons to Points and remove Duplicates
For each PA State Game Land area, load the polygons.
Check if any existing points from previous areas processed are inside any of the polygons of this area.
If there are "matches" (duplicates), the polygons are removed (and stored in a dataframe of duplicates)
The left over unique polygons are then processed for their centroids.
These points are stored for output and also used to process the polygons for the remaining areas so see if there are duplicates for in the polygons of the remaining areas.

In [4]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
def preds_to_points(area, panorpas, pred_poly_folder, all_pred_points_df, dup_pred_polys_df):
    # print(all_pred_points_df.shape)
    # 4326_095pas_predictions.shp
    print(area, panorpas)
    area_pred_polys_path = os.path.join(pred_poly_folder,("4326_" + area + panorpas +"_predictions.shp"))
    if os.path.exists(area_pred_polys_path):
        area_pred_polys = gpd.read_file(area_pred_polys_path)
        # selection = pred_data[0:]
        # print(list(area_pred_polys))
        # print(area_pred_polys.shape)
        area_crs = 32128
        if(panorpas == "pan"):
            area_crs = 32128
        if(panorpas == "pas"):
            area_crs = 32128

        matched_pred_polys = list()    
        area_pred_polys.to_crs(area_crs)
        # for pred_poly in area_pred_polys:
        for index, row in area_pred_polys.iterrows():
            # print("row",row[0],row[1])
            pred_poly = row[1]
            # any_points = all_pred_points_df.within(pred_poly.loc[0, 'geometry'])
            any_points = all_pred_points_df.within(pred_poly)
            # print(any_points)
            if(any(any_points) == True):
                # print("MATCHES")
                matched_pred_polys.append(str(row[0]))
                
        # print(matched_pred_polys)
        # If there is more than 0 matches, remove them from the dataframe
        if(len(matched_pred_polys) > 0 ):
            print("area_pred_polys len before ",len(area_pred_polys))
            for mpp in matched_pred_polys:
                index_matches = area_pred_polys[area_pred_polys['id'] == mpp].index
                #dup_row = area_pred_polys.loc([area_pred_polys['id'] == mpp]
                # get the duplicate row
                dup_row = area_pred_polys.loc[area_pred_polys['id'] == mpp]
                #print("dup_row....",dup_row,dup_row['id'])
                #print("index_matches",index_matches)
                #print("dup_pred_polys_df len before",len(dup_pred_polys_df))
                # put the duplicate row into a dataframe it can be saved to check it.
                dup_pred_polys_df = dup_pred_polys_df.append(dup_row, ignore_index=True)
                print("dup_pred_polys_df len after",len(dup_pred_polys_df))
                area_pred_polys.drop(index_matches, inplace = True)
            # area_pred_polys.drop(matched_pred_polys)
            print("area_pred_polys len after ",len(area_pred_polys))
            
        # Create an empty geopandas GeoDataFrame
        area_pred_points_df = gpd.GeoDataFrame()
        area_pred_points_df.crs = {'init':'epsg:' + str(area_crs)}
        area_pred_points_df['geometry'] = area_pred_polys.centroid
        # make an id
        id_list = np.arange(1,len(area_pred_polys.centroid)+1)
        # print(id_list)
        id_list = [(area + panorpas + "-" + (("000"+str(i))[-4:])) for i in id_list]
        # print(id_list)
        area_pred_points_df['id'] = id_list
        
        dataframesList = [all_pred_points_df, area_pred_points_df]
        all_pred_points_df = gpd.GeoDataFrame(pd.concat(dataframesList, ignore_index=True), crs=dataframesList[0].crs)
        
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df
    else:
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df

import csv
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200720T1614'
pred_poly_folder = os.path.join("/home/student/charcoal_hearth_hill/polys/", (cfg_name + "/"))

# Create an empty geopandas GeoDataFrame
all_pred_points_df = gpd.GeoDataFrame()
all_pred_points_df['geometry'] = None
all_pred_points_df['id'] = None
all_pred_points_df.crs = {'init':'epsg:4326'}


# Create an empty geopandas GeoDataFrame for duplicates
dup_pred_polys_df = gpd.GeoDataFrame()
dup_pred_polys_df['geometry'] = None
dup_pred_polys_df['id'] = None
dup_pred_polys_df.crs = {'init':'epsg:4326'}



all_points_outfp = os.path.join(pred_poly_folder, "4326_000_hearth_prediction_points.shp")
dup_polys_outfp = os.path.join(pred_poly_folder, "4326_000_duplicate_hearth_prediction_polys.shp")

with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]

        pan=""
        pas=""
        pan = row['PAN']
        if(pan == "Y"):
            all_pred_points_df, dup_pred_polys_df = preds_to_points(str(area), "pan", pred_poly_folder, all_pred_points_df, dup_pred_polys_df)
        pas = row['PAS']
        if(pas == "Y"):
            all_pred_points_df, dup_pred_polys_df = preds_to_points(str(area), "pas", pred_poly_folder, all_pred_points_df, dup_pred_polys_df)
# Determine the output path for the Shapefile

        # Write the data into that Shapefile
        all_pred_points_df.to_file(all_points_outfp)
        print("Total points:", len(all_pred_points_df))
        if not dup_pred_polys_df.empty:
            dup_pred_polys_df.to_file(dup_polys_outfp)
            print("Total duplicate polys:", len(dup_pred_polys_df))

12
012 pan


  area_pred_points_df.crs = {'init':'epsg:' + str(area_crs)}

  area_pred_points_df['geometry'] = area_pred_polys.centroid

  id_list = np.arange(1,len(area_pred_polys.centroid)+1)
  all_pred_points_df = gpd.GeoDataFrame(pd.concat(dataframesList, ignore_index=True), crs=dataframesList[0].crs)


Total points area: 108  Total duplicates: 0
Total points: 108
13
013 pan
Total points area: 248  Total duplicates: 0
Total points: 248
14
014 pan
Total points area: 304  Total duplicates: 0
Total points: 304
24
024 pan
Total points area: 415  Total duplicates: 0
Total points: 415
25
025 pan
Total points area: 556  Total duplicates: 0
Total points: 556
26
026 pas
Total points area: 688  Total duplicates: 0
Total points: 688
28
028 pan
Total points area: 800  Total duplicates: 0
Total points: 800
29
029 pan
Total points area: 858  Total duplicates: 0
Total points: 858
30
030 pan
Total points area: 886  Total duplicates: 0
Total points: 886
31
031 pan
Total points area: 919  Total duplicates: 0
Total points: 919
33
033 pan
Total points area: 1080  Total duplicates: 0
033 pas
area_pred_polys len before  41
dup_pred_polys_df len after 1
dup_pred_polys_df len after 2
dup_pred_polys_df len after 3
dup_pred_polys_df len after 4
dup_pred_polys_df len after 5
dup_pred_polys_df len after 6
dup_pr

Total points: 6910
Total duplicate polys: 151
61
061 pan
Total points area: 6978  Total duplicates: 151
Total points: 6978
Total duplicate polys: 151
62
062 pan
Total points area: 7071  Total duplicates: 151
Total points: 7071
Total duplicate polys: 151
63
063 pan
Total points area: 7340  Total duplicates: 151
Total points: 7340
Total duplicate polys: 151
64
064 pan
Total points area: 7415  Total duplicates: 151
Total points: 7415
Total duplicate polys: 151
65
065 pas
area_pred_polys len before  46
dup_pred_polys_df len after 152
dup_pred_polys_df len after 153
dup_pred_polys_df len after 154
dup_pred_polys_df len after 155
area_pred_polys len after  42
Total points area: 7457  Total duplicates: 155
Total points: 7457
Total duplicate polys: 155
66
066 pan
area_pred_polys len before  65
dup_pred_polys_df len after 156
dup_pred_polys_df len after 157
dup_pred_polys_df len after 158
dup_pred_polys_df len after 159
dup_pred_polys_df len after 160
dup_pred_polys_df len after 161
dup_pred_po

Total points: 12993
Total duplicate polys: 245
106
106 pas
Total points area: 14558  Total duplicates: 245
Total points: 14558
Total duplicate polys: 245
107
107 pas
Total points area: 14691  Total duplicates: 245
Total points: 14691
Total duplicate polys: 245
108
108 pan
Total points area: 14697  Total duplicates: 245
108 pas
area_pred_polys len before  207
dup_pred_polys_df len after 246
dup_pred_polys_df len after 247
area_pred_polys len after  205
Total points area: 14902  Total duplicates: 247
Total points: 14902
Total duplicate polys: 247
109
109 pan
Total points area: 14939  Total duplicates: 247
Total points: 14939
Total duplicate polys: 247
110
110 pas
area_pred_polys len before  1059
dup_pred_polys_df len after 248
dup_pred_polys_df len after 249
dup_pred_polys_df len after 250
dup_pred_polys_df len after 251
dup_pred_polys_df len after 252
dup_pred_polys_df len after 253
dup_pred_polys_df len after 254
dup_pred_polys_df len after 255
dup_pred_polys_df len after 256
dup_pred_

dup_pred_polys_df len after 504
dup_pred_polys_df len after 505
dup_pred_polys_df len after 506
dup_pred_polys_df len after 507
dup_pred_polys_df len after 508
dup_pred_polys_df len after 509
dup_pred_polys_df len after 510
dup_pred_polys_df len after 511
dup_pred_polys_df len after 512
dup_pred_polys_df len after 513
dup_pred_polys_df len after 514
dup_pred_polys_df len after 515
dup_pred_polys_df len after 516
dup_pred_polys_df len after 517
dup_pred_polys_df len after 518
dup_pred_polys_df len after 519
dup_pred_polys_df len after 520
dup_pred_polys_df len after 521
dup_pred_polys_df len after 522
dup_pred_polys_df len after 523
dup_pred_polys_df len after 524
dup_pred_polys_df len after 525
dup_pred_polys_df len after 526
dup_pred_polys_df len after 527
dup_pred_polys_df len after 528
dup_pred_polys_df len after 529
dup_pred_polys_df len after 530
dup_pred_polys_df len after 531
dup_pred_polys_df len after 532
dup_pred_polys_df len after 533
dup_pred_polys_df len after 534
dup_pred

Total points area: 16519  Total duplicates: 737
Total points: 16519
Total duplicate polys: 737
117
117 pas
Total points area: 16541  Total duplicates: 737
Total points: 16541
Total duplicate polys: 737
118
118 pas
area_pred_polys len before  383
dup_pred_polys_df len after 738
dup_pred_polys_df len after 739
dup_pred_polys_df len after 740
dup_pred_polys_df len after 741
dup_pred_polys_df len after 742
dup_pred_polys_df len after 743
dup_pred_polys_df len after 744
dup_pred_polys_df len after 745
dup_pred_polys_df len after 746
dup_pred_polys_df len after 747
dup_pred_polys_df len after 748
dup_pred_polys_df len after 749
dup_pred_polys_df len after 750
dup_pred_polys_df len after 751
dup_pred_polys_df len after 752
dup_pred_polys_df len after 753
dup_pred_polys_df len after 754
dup_pred_polys_df len after 755
dup_pred_polys_df len after 756
area_pred_polys len after  364
Total points area: 16905  Total duplicates: 756
Total points: 16905
Total duplicate polys: 756
119
119 pan
area_pre

Total points area: 19906  Total duplicates: 834
Total points: 19906
Total duplicate polys: 834
156
156 pas
area_pred_polys len before  854
dup_pred_polys_df len after 835
dup_pred_polys_df len after 836
dup_pred_polys_df len after 837
dup_pred_polys_df len after 838
dup_pred_polys_df len after 839
dup_pred_polys_df len after 840
dup_pred_polys_df len after 841
dup_pred_polys_df len after 842
dup_pred_polys_df len after 843
dup_pred_polys_df len after 844
dup_pred_polys_df len after 845
dup_pred_polys_df len after 846
dup_pred_polys_df len after 847
dup_pred_polys_df len after 848
dup_pred_polys_df len after 849
dup_pred_polys_df len after 850
dup_pred_polys_df len after 851
dup_pred_polys_df len after 852
dup_pred_polys_df len after 853
dup_pred_polys_df len after 854
dup_pred_polys_df len after 855
dup_pred_polys_df len after 856
dup_pred_polys_df len after 857
dup_pred_polys_df len after 858
dup_pred_polys_df len after 859
dup_pred_polys_df len after 860
dup_pred_polys_df len after 8

dup_pred_polys_df len after 1110
dup_pred_polys_df len after 1111
dup_pred_polys_df len after 1112
dup_pred_polys_df len after 1113
dup_pred_polys_df len after 1114
dup_pred_polys_df len after 1115
dup_pred_polys_df len after 1116
dup_pred_polys_df len after 1117
dup_pred_polys_df len after 1118
dup_pred_polys_df len after 1119
dup_pred_polys_df len after 1120
dup_pred_polys_df len after 1121
dup_pred_polys_df len after 1122
dup_pred_polys_df len after 1123
dup_pred_polys_df len after 1124
dup_pred_polys_df len after 1125
dup_pred_polys_df len after 1126
dup_pred_polys_df len after 1127
dup_pred_polys_df len after 1128
area_pred_polys len after  251
Total points area: 20870  Total duplicates: 1128
Total points: 20870
Total duplicate polys: 1128
159
159 pan
Total points area: 20990  Total duplicates: 1128
Total points: 20990
Total duplicate polys: 1128
160
160 pas
area_pred_polys len before  259
dup_pred_polys_df len after 1129
dup_pred_polys_df len after 1130
dup_pred_polys_df len afte

Total points: 21026
Total duplicate polys: 1351
161
161 pan
area_pred_polys len before  19
dup_pred_polys_df len after 1352
dup_pred_polys_df len after 1353
dup_pred_polys_df len after 1354
dup_pred_polys_df len after 1355
area_pred_polys len after  15
Total points area: 21041  Total duplicates: 1355
Total points: 21041
Total duplicate polys: 1355
162
162 pan
area_pred_polys len before  18
dup_pred_polys_df len after 1356
dup_pred_polys_df len after 1357
dup_pred_polys_df len after 1358
area_pred_polys len after  15
Total points area: 21056  Total duplicates: 1358
Total points: 21056
Total duplicate polys: 1358
163
163 pan
Total points area: 21090  Total duplicates: 1358
Total points: 21090
Total duplicate polys: 1358
164
164 pas
Total points area: 21105  Total duplicates: 1358
Total points: 21105
Total duplicate polys: 1358
165
165 pan
Total points area: 21143  Total duplicates: 1358
Total points: 21143
Total duplicate polys: 1358
166
166 pas
area_pred_polys len before  633
dup_pred_p

Total points area: 21996  Total duplicates: 1571
Total points: 21996
Total duplicate polys: 1571
169
169 pas
Total points area: 22030  Total duplicates: 1571
Total points: 22030
Total duplicate polys: 1571
170
170 pas
Total points area: 22576  Total duplicates: 1571
Total points: 22576
Total duplicate polys: 1571
171
171 pas
Total points area: 22614  Total duplicates: 1571
Total points: 22614
Total duplicate polys: 1571
172
172 pan
Total points area: 22634  Total duplicates: 1571
Total points: 22634
Total duplicate polys: 1571
173
173 pas
Total points area: 22668  Total duplicates: 1571
Total points: 22668
Total duplicate polys: 1571
98
098 pan
Total points area: 22672  Total duplicates: 1571
Total points: 22672
Total duplicate polys: 1571
99
099 pas
area_pred_polys len before  112
dup_pred_polys_df len after 1572
dup_pred_polys_df len after 1573
dup_pred_polys_df len after 1574
dup_pred_polys_df len after 1575
dup_pred_polys_df len after 1576
dup_pred_polys_df len after 1577
area_pred

Total points: 24054
Total duplicate polys: 1686
201
201 pan
Total points area: 24060  Total duplicates: 1686
201 pas
area_pred_polys len before  7
dup_pred_polys_df len after 1687
dup_pred_polys_df len after 1688
area_pred_polys len after  5
Total points area: 24065  Total duplicates: 1688
Total points: 24065
Total duplicate polys: 1688
202
202 pan
Total points area: 24087  Total duplicates: 1688
Total points: 24087
Total duplicate polys: 1688
203
203 pas
Total points area: 24109  Total duplicates: 1688
Total points: 24109
Total duplicate polys: 1688
204
204 pan
Total points area: 24121  Total duplicates: 1688
Total points: 24121
Total duplicate polys: 1688
205
205 pas
Total points area: 24144  Total duplicates: 1688
Total points: 24144
Total duplicate polys: 1688
206
206 pan
area_pred_polys len before  20
dup_pred_polys_df len after 1689
dup_pred_polys_df len after 1690
area_pred_polys len after  18
Total points area: 24162  Total duplicates: 1690
Total points: 24162
Total duplicate p

dup_pred_polys_df len after 1867
dup_pred_polys_df len after 1868
dup_pred_polys_df len after 1869
dup_pred_polys_df len after 1870
dup_pred_polys_df len after 1871
dup_pred_polys_df len after 1872
dup_pred_polys_df len after 1873
dup_pred_polys_df len after 1874
dup_pred_polys_df len after 1875
dup_pred_polys_df len after 1876
dup_pred_polys_df len after 1877
dup_pred_polys_df len after 1878
dup_pred_polys_df len after 1879
dup_pred_polys_df len after 1880
dup_pred_polys_df len after 1881
dup_pred_polys_df len after 1882
dup_pred_polys_df len after 1883
dup_pred_polys_df len after 1884
dup_pred_polys_df len after 1885
dup_pred_polys_df len after 1886
dup_pred_polys_df len after 1887
dup_pred_polys_df len after 1888
dup_pred_polys_df len after 1889
dup_pred_polys_df len after 1890
dup_pred_polys_df len after 1891
dup_pred_polys_df len after 1892
dup_pred_polys_df len after 1893
dup_pred_polys_df len after 1894
dup_pred_polys_df len after 1895
dup_pred_polys_df len after 1896
dup_pred_p

dup_pred_polys_df len after 2152
dup_pred_polys_df len after 2153
dup_pred_polys_df len after 2154
dup_pred_polys_df len after 2155
dup_pred_polys_df len after 2156
dup_pred_polys_df len after 2157
dup_pred_polys_df len after 2158
dup_pred_polys_df len after 2159
dup_pred_polys_df len after 2160
dup_pred_polys_df len after 2161
dup_pred_polys_df len after 2162
dup_pred_polys_df len after 2163
dup_pred_polys_df len after 2164
dup_pred_polys_df len after 2165
dup_pred_polys_df len after 2166
dup_pred_polys_df len after 2167
dup_pred_polys_df len after 2168
dup_pred_polys_df len after 2169
dup_pred_polys_df len after 2170
dup_pred_polys_df len after 2171
dup_pred_polys_df len after 2172
dup_pred_polys_df len after 2173
dup_pred_polys_df len after 2174
dup_pred_polys_df len after 2175
dup_pred_polys_df len after 2176
dup_pred_polys_df len after 2177
dup_pred_polys_df len after 2178
dup_pred_polys_df len after 2179
dup_pred_polys_df len after 2180
dup_pred_polys_df len after 2181
dup_pred_p

Total points: 27313
Total duplicate polys: 2406
218
218 pan
area_pred_polys len before  42
dup_pred_polys_df len after 2407
dup_pred_polys_df len after 2408
dup_pred_polys_df len after 2409
dup_pred_polys_df len after 2410
dup_pred_polys_df len after 2411
dup_pred_polys_df len after 2412
dup_pred_polys_df len after 2413
dup_pred_polys_df len after 2414
area_pred_polys len after  34
Total points area: 27347  Total duplicates: 2414
Total points: 27347
Total duplicate polys: 2414
219
219 pan
Total points area: 27434  Total duplicates: 2414
Total points: 27434
Total duplicate polys: 2414
220
220 pas
Total points area: 27543  Total duplicates: 2414
Total points: 27543
Total duplicate polys: 2414
221
221 pan
area_pred_polys len before  142
dup_pred_polys_df len after 2415
dup_pred_polys_df len after 2416
dup_pred_polys_df len after 2417
dup_pred_polys_df len after 2418
dup_pred_polys_df len after 2419
dup_pred_polys_df len after 2420
dup_pred_polys_df len after 2421
dup_pred_polys_df len aft

Total points: 29904
Total duplicate polys: 2568
242
242 pas
Total points area: 29930  Total duplicates: 2568
Total points: 29930
Total duplicate polys: 2568
243
243 pas
Total points area: 29951  Total duplicates: 2568
Total points: 29951
Total duplicate polys: 2568
244
244 pan
area_pred_polys len before  50
dup_pred_polys_df len after 2569
dup_pred_polys_df len after 2570
dup_pred_polys_df len after 2571
dup_pred_polys_df len after 2572
dup_pred_polys_df len after 2573
area_pred_polys len after  45
Total points area: 29996  Total duplicates: 2573
Total points: 29996
Total duplicate polys: 2573
245
245 pas
Total points area: 30085  Total duplicates: 2573
Total points: 30085
Total duplicate polys: 2573
246
246 pas
Total points area: 30128  Total duplicates: 2573
Total points: 30128
Total duplicate polys: 2573
247
247 pas
Total points area: 30147  Total duplicates: 2573
Total points: 30147
Total duplicate polys: 2573
248
248 pas
Total points area: 30155  Total duplicates: 2573
Total point

Total points: 31231
Total duplicate polys: 2712
267
267 pas
area_pred_polys len before  84
dup_pred_polys_df len after 2713
dup_pred_polys_df len after 2714
dup_pred_polys_df len after 2715
dup_pred_polys_df len after 2716
dup_pred_polys_df len after 2717
dup_pred_polys_df len after 2718
dup_pred_polys_df len after 2719
dup_pred_polys_df len after 2720
dup_pred_polys_df len after 2721
dup_pred_polys_df len after 2722
dup_pred_polys_df len after 2723
dup_pred_polys_df len after 2724
dup_pred_polys_df len after 2725
dup_pred_polys_df len after 2726
dup_pred_polys_df len after 2727
dup_pred_polys_df len after 2728
dup_pred_polys_df len after 2729
dup_pred_polys_df len after 2730
dup_pred_polys_df len after 2731
area_pred_polys len after  65
Total points area: 31296  Total duplicates: 2731
Total points: 31296
Total duplicate polys: 2731
268
268 pan
Total points area: 31345  Total duplicates: 2731
Total points: 31345
Total duplicate polys: 2731
269
269 pan
area_pred_polys len before  25
dup

Total points: 32826
Total duplicate polys: 2875
290
290 pas
Total points area: 32826  Total duplicates: 2875
Total points: 32826
Total duplicate polys: 2875
291
291 pan
Total points area: 32849  Total duplicates: 2875
Total points: 32849
Total duplicate polys: 2875
292
292 pan
area_pred_polys len before  11
dup_pred_polys_df len after 2876
dup_pred_polys_df len after 2877
dup_pred_polys_df len after 2878
area_pred_polys len after  8
Total points area: 32857  Total duplicates: 2878
Total points: 32857
Total duplicate polys: 2878
293
293 pan
area_pred_polys len before  64
dup_pred_polys_df len after 2879
dup_pred_polys_df len after 2880
dup_pred_polys_df len after 2881
dup_pred_polys_df len after 2882
dup_pred_polys_df len after 2883
dup_pred_polys_df len after 2884
dup_pred_polys_df len after 2885
dup_pred_polys_df len after 2886
dup_pred_polys_df len after 2887
area_pred_polys len after  55
Total points area: 32912  Total duplicates: 2887
Total points: 32912
Total duplicate polys: 2887

dup_pred_polys_df len after 3030
dup_pred_polys_df len after 3031
dup_pred_polys_df len after 3032
dup_pred_polys_df len after 3033
dup_pred_polys_df len after 3034
area_pred_polys len after  121
Total points area: 35932  Total duplicates: 3034
Total points: 35932
Total duplicate polys: 3034
324
324 pas
Total points area: 36006  Total duplicates: 3034
Total points: 36006
Total duplicate polys: 3034
325
325 pan
Total points area: 36035  Total duplicates: 3034
Total points: 36035
Total duplicate polys: 3034
326
326 pas
Total points area: 36109  Total duplicates: 3034
Total points: 36109
Total duplicate polys: 3034
327
327 pas
Total points area: 36156  Total duplicates: 3034
Total points: 36156
Total duplicate polys: 3034
328
328 pas
Total points area: 36192  Total duplicates: 3034
Total points: 36192
Total duplicate polys: 3034
329
329 pan
area_pred_polys len before  37
dup_pred_polys_df len after 3035
area_pred_polys len after  36
Total points area: 36228  Total duplicates: 3035
329 pas

# Obsolete # Take the predictions, union the polygons, get centroids, make points

In [None]:
# Obsolete

import geopandas as gpd
import os
def preds_to_points(area, panorpas, pred_poly_folder, all_polys):
    print(all_polys.shape)
    # 4326_095pas_predictions.shp
    area_pred_polys_path = os.path.join(pred_poly_folder,("4326_" + area + panorpas +"_predictions.shp"))
    if os.path.exists(area_pred_polys_path):
        area_pred_polys = gpd.read_file(area_pred_polys_path)
        #selection = pred_data[0:]
        print(list(area_pred_polys))
        print(area_pred_polys.shape)
        # union = all_polys.union(area_pred_polys)
        res_intersection = geopandas.overlay(area_pred_polys, all_polys, how='intersection')

        return res_intersection
    else:
        return all_polys

import csv
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200720T1614'
pred_poly_folder = os.path.join("/home/student/charcoal_hearth_hill/polys/", (cfg_name + "/"))

# Create an empty geopandas GeoDataFrame
all_pred_points_df = gpd.GeoDataFrame()
all_pred_points_df['geometry'] = None
all_pred_points_df.crs = {'init':'epsg:4326'}

all_polys = gpd.read_file(os.path.join(pred_poly_folder,("4326_012pan_predictions.shp")))
all_polys_outfp = os.path.join(pred_poly_folder, "4326_000_hearth_prediction_polys.shp")

with open('/home/student/charcoalhearths/data_sheet.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['Area'])
        area=row['Area']
        area="0"+area
        area=area[-3:]
        pan=""
        pas=""
        pan = row['PAN']
        if(pan == "Y"):
            all_polys = preds_to_points(str(area), "pan", pred_poly_folder, all_polys)
        pas = row['PAS']
        if(pas == "Y"):
            all_polys = preds_to_points(str(area), "pas", pred_poly_folder, all_polys)
# Determine the output path for the Shapefile
outfp = os.path.join(pred_poly_folder, "4326_000_hearth_prediction_polys.shp")
# Write the data into that Shapefile
all_polys.to_file(outfp)