# Read the predictions from the annot xml files and convert them into GIS shapefiles

In [1]:
# cell 1
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# cell 2
!pip install geopandas

Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[?25l[K     |▎                               | 10 kB 13.3 MB/s eta 0:00:01[K     |▋                               | 20 kB 15.0 MB/s eta 0:00:01[K     |█                               | 30 kB 17.5 MB/s eta 0:00:01[K     |█▎                              | 40 kB 12.5 MB/s eta 0:00:01[K     |█▋                              | 51 kB 9.5 MB/s eta 0:00:01[K     |██                              | 61 kB 9.0 MB/s eta 0:00:01[K     |██▎                             | 71 kB 6.1 MB/s eta 0:00:01[K     |██▌                             | 81 kB 6.7 MB/s eta 0:00:01[K     |██▉                             | 92 kB 7.4 MB/s eta 0:00:01[K     |███▏                            | 102 kB 7.7 MB/s eta 0:00:01[K     |███▌                            | 112 kB 7.7 MB/s eta 0:00:01[K     |███▉                            | 122 kB 7.7 MB/s eta 0:00:01[K     |████▏                           | 133 kB 7.7 MB/s eta 0

In [None]:
# cell 3 - Run this load these functions
def get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,x,y):
    # supposing x and y are your pixel coordinate this 
    # is how to get the coordinate in space.
    posX = px_w * x + rot1 * y + xoffset
    posY = rot2 * x + px_h * y + yoffset

    # shift to the center of the pixel
    posX += px_w / 2.0
    posY += px_h / 2.0
    return posX,posY

def get_poly_from_geotif_with_x_y(geotif_fp,minx,miny,maxx,maxy):
    ds = gdal.Open(geotif_fp)
    # open the dataset and get the geo transform matrix

    xoffset, px_w, rot1, yoffset, rot2,px_h = ds.GetGeoTransform()

    #print("xoffset, px_w, rot1, yoffset, px_h, rot2",xoffset, px_w, rot1, yoffset, px_h, rot2)
    #print("minx,miny,maxx,maxy",minx,miny,maxx,maxy)

    pos1x,pos1y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,miny)
    pos2x,pos2y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,maxy)
    pos3x,pos3y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,maxy)
    pos4x,pos4y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,miny)
    coords = [(pos1x,pos1y), (pos2x,pos2y), (pos3x,pos3y), (pos4x,pos4y)]

    #print("pos",pos1x,pos1y,pos2x,pos2y,pos3x,pos3y,pos4x,pos4y)
    poly = Polygon(coords)
    
    return poly 

In [None]:
# cell 4 - Run this to load a dictionary of files to process
# Looping through them repeatedly takes a long time.
# Instead, create a dictionary of files indexed by area. Each entry holds a list of matching files
# This makes it easier to process these files by area.
batch_group = '1000-1599'

import csv
import os
from os import listdir
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200826T2315'

#make a dict of all the areas + pan (or pas)
area_crs_dict = {}
area_crs_dict["catoctin_1"] = []

# Now that the dictionary is created, add all of the matching files as a list linked to the entry.
# This dictionary will be used below.

annot_prediction_folder = os.path.join('/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/unknown/'+batch_group+'/')

for annot_filename in listdir(annot_prediction_folder):
    annot_area = "catoctin_1"
    print(annot_filename, annot_area)
    area_node = area_crs_dict[annot_area]
    area_node.append(annot_filename)
print(area_crs_dict) 

slope_20120310_001948_utm.xml catoctin_1
slope_20120310_001985_utm.xml catoctin_1
slope_20120310_001984_utm.xml catoctin_1
slope_20120310_001949_utm.xml catoctin_1
slope_20120310_001986_utm.xml catoctin_1
slope_20120310_002019_utm.xml catoctin_1
slope_20120310_002016_utm.xml catoctin_1
slope_20120310_002017_utm.xml catoctin_1
slope_20120310_002018_utm.xml catoctin_1
slope_20120310_002050_utm.xml catoctin_1
slope_20120310_002047_utm.xml catoctin_1
slope_20120310_002048_utm.xml catoctin_1
slope_20120310_002049_utm.xml catoctin_1
slope_20120310_002075_utm.xml catoctin_1
slope_20120310_002074_utm.xml catoctin_1
slope_20120310_002072_utm.xml catoctin_1
slope_20120310_002073_utm.xml catoctin_1
slope_20120310_002076_utm.xml catoctin_1
slope_20120310_002077_utm.xml catoctin_1
slope_20120310_002095_utm.xml catoctin_1
slope_20120310_002096_utm.xml catoctin_1
slope_20120310_002097_utm.xml catoctin_1
slope_20120310_002100_utm.xml catoctin_1
slope_20120310_002098_utm.xml catoctin_1
slope_20120310_0

In [None]:
# cell 5
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200826T2315'
model_epoch='0016'

split_tifs_folder = '/content/drive/MyDrive/crane_maryland/slope_images/slope_'+ batch_group +'/'
# display image with masks and bounding boxes

from xml.etree import ElementTree
#https://gis.stackexchange.com/questions/92207/split-a-large-geotiff-into-smaller-regions-with-python-and-gdal
import os
from os import listdir
import numpy
from osgeo import gdal, osr
import math
from itertools import chain
import geopandas as gpd
from shapely.geometry import Point, Polygon
import numpy as np
import gdalnumeric

import cv2
def put_preds_in_shp(state_area,state_area_num_crs):

    pred_polys = gpd.GeoDataFrame()
    pred_polys['geometry'] = None
    
    pred_polys.crs = ("EPSG:" + str(state_area_num_crs))    
    #pred_polys.geometry = pred_polys.geometry.crs(epsg=state_area_num_crs)
    pred_polys.geometry.crs = ("EPSG:" + str(state_area_num_crs))
    print("pred_polys.crs",pred_polys.crs, pred_polys.geometry.crs)    

    #Store the results in XML    
    class_names = construction_type

    # find all images
    pa = area_crs_dict[str(state_area)]
    for annot_filename in pa:
    
        #print(annot_filename)
        #process only the files for this state land area, since other areas may not match crs
        #if annot_filename.startswith(state_area_num):
        tree = ElementTree.parse(annot_prediction_folder+annot_filename)
        print(annot_prediction_folder+annot_filename)
        #print(tree)
        # get the root of the document
        root = tree.getroot()
        # extract each bounding box
    
        fn_image = root.find('./filename').text
        #object_present = root.find('./object_present').text
        fn_base = fn_image[:-4]
        #print(fn_base)
        box_num=0
        for obj in root.findall('./object'):
            score = obj.find('score').text
    
            box = obj.find('bndbox')
            box_num=obj.find('number').text
            box_num_pad = "00"+str(box_num)
            box_num_pad = box_num_pad[-2:]
            xmin = int(box.find('xmin').text)
            ymin = int(box.find('ymin').text)
            xmax = int(box.find('xmax').text)
            ymax = int(box.find('ymax').text)
            if(ymin>ymax):
                ytemp = ymin
                ymin = ymax
                ymax=ytemp
            if(xmin>xmax):
                xtemp = xmin
                xmin = xmax
                xmax=xtemp            
            coors = [xmin, ymin, xmax, ymax]
            #print("score", score, coors)
            print(os.path.join(split_tifs_folder+(fn_base+".tif")))
            try:
                pred_poly = get_poly_from_geotif_with_x_y(os.path.join(split_tifs_folder+(fn_base+".tif")),xmin,ymin,xmax,ymax)
                new_pp_row = {'id':fn_base+box_num_pad, 'geometry':pred_poly, 'score':score}
                pred_polys = pred_polys.append(new_pp_row, ignore_index=True)
            except:
                print("ERROR with file above ^")
            #print("pred_polys.crs",pred_polys.crs, pred_polys.geometry.crs)
            pred_polys.geometry.crs = ("EPSG:" + str(state_area_num_crs))
            #print("pred_polys.crs",pred_polys.crs, pred_polys.geometry.crs)


    outfolder = os.path.join("/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/", (cfg_name+"/"), "polys/", (batch_group+'/'))
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)
    outfp = os.path.join(outfolder,(state_area + "_predictions.shp"))
                         
# Write the data into that Shapefile
    if not pred_polys.empty:
        #pred_polys.head()
        #pred_polys = pred_polys.to_crs({'init':'epsg:4326'})
        pred_polys = pred_polys.to_crs(epsg = 26918)
        print("pred_polys.crs",pred_polys.crs, pred_polys.geometry.crs)
        pred_polys.to_file(outfp)
        print("File written to", outfp)

        """
        crs_4326 = 4326
        pred_polys.geometry = pred_polys.geometry.to_crs(crs=crs_4326)
        pred_polys.to_crs(crs=crs_4326)
        pred_polys = pred_polys.to_crs(epsg=crs_4326)
        
        #pred_polys = pred_polys.set_crs(epsg = 4326)
        #pred_polys.head()
        outfp = os.path.join(outfolder,("4326_" + state_area + "_predictions.shp"))
        # Write the data into that Shapefile
        pred_polys.to_file(outfp)
        """
 

In [None]:
# cell 6
put_preds_in_shp("catoctin_1",26918)

pred_polys.crs EPSG:26918 EPSG:26918
/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/unknown/1000-1599/slope_20120310_001948_utm.xml
/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/unknown/1000-1599/slope_20120310_001985_utm.xml
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_20120310_001985_utm.tif
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_20120310_001985_utm.tif
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_20120310_001985_utm.tif
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_20120310_001985_utm.tif
/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/unknown/1000-1599/slope_20120310_001984_utm.xml
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_20120310_001984_utm.tif
/content/drive/MyDrive/crane_maryland/slope_images/slope_1000-1599/slope_

In [None]:
geotif_fp = '/content/drive/MyDrive/crane_maryland/slope_images/slope_0-199/slope_20120130_17SQD1280_utm.tif'
minx = 294
miny = 1478
maxx = 337
maxy = 1521

# pos 197092.296875 4381544.0 197092.296875 4381501.0 197135.296875 4381501.0 197135.296875 4381544.0


ds = gdal.Open(geotif_fp)
print(ds.GetProjection())
# open the dataset and get the geo transform matrix

xoffset, px_w, rot1, yoffset, rot2,px_h = ds.GetGeoTransform()

#print("xoffset, px_w, rot1, yoffset, px_h, rot2",xoffset, px_w, rot1, yoffset, px_h, rot2)
print("minx,miny,maxx,maxy",minx,miny,maxx,maxy)

pos1x,pos1y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,miny)
pos2x,pos2y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,minx,maxy)
pos3x,pos3y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,maxy)
pos4x,pos4y = get_posx_posy(xoffset, px_w, rot1, yoffset, px_h, rot2,maxx,miny)
coords = [(pos1x,pos1y), (pos2x,pos2y), (pos3x,pos3y), (pos4x,pos4y)]

print("pos",pos1x,pos1y,pos2x,pos2y,pos3x,pos3y,pos4x,pos4y)
poly = Polygon(coords)

PROJCS["NAD83 / UTM zone 18N",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4269"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-75],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","26918"]]
minx,miny,maxx,maxy 294 1478 337 1521
pos 197092.296875 4381544.0 197092.296875 4381501.0 197135.296875 4381501.0 197135.296875 4381544.0


# Convert Polygons to Points and remove Duplicates
For each area, load the polygons.
Check if any existing points from previous areas processed are inside any of the polygons of this area.
If there are "matches" (duplicates), the polygons are removed (and stored in a dataframe of duplicates)
The left over unique polygons are then processed for their centroids.
These points are stored for output and also used to process the polygons for the remaining areas so see if there are duplicates for in the polygons of the remaining areas.

In [8]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import csv
def preds_to_points(area, pred_poly_folder, all_pred_points_df, dup_pred_polys_df, area_crs = 26918):
    # print(all_pred_points_df.shape)
    print(area)
    area_pred_polys_path = os.path.join(pred_poly_folder,(area + "_predictions.shp"))
    print(area_pred_polys_path)
    if os.path.exists(area_pred_polys_path):
        area_pred_polys = gpd.read_file(area_pred_polys_path)
        # selection = pred_data[0:]
        # print(list(area_pred_polys))
        # print(area_pred_polys.shape)

        matched_pred_polys = list()    
        area_pred_polys.to_crs(area_crs)
        # for pred_poly in area_pred_polys:
        for index, row in area_pred_polys.iterrows():
            #print("row",row[0],row[1],row[2])
            pred_poly = row[2]
            # any_points = all_pred_points_df.within(pred_poly.loc[0, 'geometry'])
            any_points = all_pred_points_df.within(pred_poly)
            #print("any_points",any_points)
            if(any(any_points) == True):
                # print("MATCHES")
                matched_pred_polys.append(str(row[0]))
                
        # print(matched_pred_polys)
        # If there is more than 0 matches, remove them from the dataframe
        if(len(matched_pred_polys) > 0 ):
            print("area_pred_polys len before ",len(area_pred_polys))
            for mpp in matched_pred_polys:
                index_matches = area_pred_polys[area_pred_polys['id'] == mpp].index
                #dup_row = area_pred_polys.loc([area_pred_polys['id'] == mpp]
                # get the duplicate row
                dup_row = area_pred_polys.loc[area_pred_polys['id'] == mpp]
                #print("dup_row....",dup_row,dup_row['id'])
                #print("index_matches",index_matches)
                #print("dup_pred_polys_df len before",len(dup_pred_polys_df))
                # put the duplicate row into a dataframe it can be saved to check it.
                dup_pred_polys_df = dup_pred_polys_df.append(dup_row, ignore_index=True)
                #print("dup_pred_polys_df len after",len(dup_pred_polys_df))
                area_pred_polys.drop(index_matches, inplace = True)
            # area_pred_polys.drop(matched_pred_polys)
            print("area_pred_polys len after ",len(area_pred_polys))
            
        # Create an empty geopandas GeoDataFrame
        area_pred_points_df = gpd.GeoDataFrame()
        #area_pred_points_df.crs = {'init':'epsg:' + str(area_crs)}
        area_pred_points_df.crs = ('EPSG:' + str(area_crs))
        
        area_pred_points_df['geometry'] = area_pred_polys.centroid
        # make an id
        id_list = np.arange(1,len(area_pred_polys.centroid)+1)
        # print(id_list)
        id_list = [(area + "-" + (("000"+str(i))[-4:])) for i in id_list]
        # print(id_list)
        area_pred_points_df['id'] = id_list
        area_pred_points_df['score'] = area_pred_polys['score']
        
        dataframesList = [all_pred_points_df, area_pred_points_df]
        all_pred_points_df = gpd.GeoDataFrame(pd.concat(dataframesList, ignore_index=True), crs=dataframesList[0].crs)
        
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df
    else:
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df


construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200826T2315'
area_crs = 26918
area_name = "catoctin_1"

# Create an empty geopandas GeoDataFrame
all_pred_points_df = gpd.GeoDataFrame()
all_pred_points_df['geometry'] = None
all_pred_points_df['id'] = None
all_pred_points_df.crs = ('EPSG:'+str(area_crs))

# Create an empty geopandas GeoDataFrame for duplicates
dup_pred_polys_df = gpd.GeoDataFrame()
dup_pred_polys_df['geometry'] = None
dup_pred_polys_df['id'] = None
dup_pred_polys_df.crs = ('EPSG:'+str(area_crs))

poly_folder = "/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/polys/"
all_points_outfp = os.path.join(poly_folder, (area_name+"_hearth_prediction_points_2.shp"))
dup_polys_outfp = os.path.join(poly_folder, (area_name+"_duplicate_hearth_prediction_polys_2.shp"))

#batch_groups = ['0-199','200-399','400-599','600-999','1000-1599']
batch_groups = ['400-599']
for batch_group in batch_groups:
    print(batch_group)  
    pred_poly_folder = os.path.join(poly_folder, (batch_group + "/"))
    all_pred_points_df, dup_pred_polys_df = preds_to_points("catoctin_1", pred_poly_folder, all_pred_points_df, dup_pred_polys_df, area_crs)

# Determine the output path for the Shapefile

# Write the data into that Shapefile
if not all_pred_points_df.empty:    
    all_pred_points_df.to_file(all_points_outfp)
    print("Total points:", len(all_pred_points_df))
if not dup_pred_polys_df.empty:
    dup_pred_polys_df.to_file(dup_polys_outfp)
    print("Total duplicate polys:", len(dup_pred_polys_df))

400-599
catoctin_1
/content/drive/MyDrive/crane_maryland/predictions/project_2021_nov_14/cfg20200826T2315/polys/400-599/catoctin_1_predictions.shp




Total points area: 501  Total duplicates: 0
Total points: 501


In [6]:
batch_groups = ['0-199','200-399','400-599','600-999','1000-1599']
for batch_group in batch_groups:
    print(batch_group)  

0-199
200-399
400-599
600-999
1000-1599


In [None]:
# OLD

import geopandas as gpd
import pandas as pd
import numpy as np
import os

# I think using 4326 is wrong and confusing

def preds_to_points(area, pred_poly_folder, all_pred_points_df, dup_pred_polys_df):
    # print(all_pred_points_df.shape)
    # 4326_catoctin_1_predictions.shp
    print(area)
    area_pred_polys_path = os.path.join(pred_poly_folder,("4326_" + area + "_predictions.shp"))
    print(area_pred_polys_path)
    if os.path.exists(area_pred_polys_path):
        area_pred_polys = gpd.read_file(area_pred_polys_path)
        # selection = pred_data[0:]
        # print(list(area_pred_polys))
        # print(area_pred_polys.shape)
        area_crs = 26918

        matched_pred_polys = list()    
        area_pred_polys.to_crs(area_crs)
        # for pred_poly in area_pred_polys:
        for index, row in area_pred_polys.iterrows():
            #print("row",row[0],row[1],row[2])
            pred_poly = row[2]
            # any_points = all_pred_points_df.within(pred_poly.loc[0, 'geometry'])
            any_points = all_pred_points_df.within(pred_poly)
            #print("any_points",any_points)
            if(any(any_points) == True):
                # print("MATCHES")
                matched_pred_polys.append(str(row[0]))
                
        # print(matched_pred_polys)
        # If there is more than 0 matches, remove them from the dataframe
        if(len(matched_pred_polys) > 0 ):
            print("area_pred_polys len before ",len(area_pred_polys))
            for mpp in matched_pred_polys:
                index_matches = area_pred_polys[area_pred_polys['id'] == mpp].index
                #dup_row = area_pred_polys.loc([area_pred_polys['id'] == mpp]
                # get the duplicate row
                dup_row = area_pred_polys.loc[area_pred_polys['id'] == mpp]
                #print("dup_row....",dup_row,dup_row['id'])
                #print("index_matches",index_matches)
                #print("dup_pred_polys_df len before",len(dup_pred_polys_df))
                # put the duplicate row into a dataframe it can be saved to check it.
                dup_pred_polys_df = dup_pred_polys_df.append(dup_row, ignore_index=True)
                #print("dup_pred_polys_df len after",len(dup_pred_polys_df))
                area_pred_polys.drop(index_matches, inplace = True)
            # area_pred_polys.drop(matched_pred_polys)
            print("area_pred_polys len after ",len(area_pred_polys))
            
        # Create an empty geopandas GeoDataFrame
        area_pred_points_df = gpd.GeoDataFrame()
        #area_pred_points_df.crs = {'init':'epsg:' + str(area_crs)}
        area_pred_points_df.crs = ('EPSG:' + str(area_crs))
        
        area_pred_points_df['geometry'] = area_pred_polys.centroid
        # make an id
        id_list = np.arange(1,len(area_pred_polys.centroid)+1)
        # print(id_list)
        id_list = [(area + "-" + (("000"+str(i))[-4:])) for i in id_list]
        # print(id_list)
        area_pred_points_df['id'] = id_list
        area_pred_points_df['score'] = area_pred_polys['score']
        
        dataframesList = [all_pred_points_df, area_pred_points_df]
        all_pred_points_df = gpd.GeoDataFrame(pd.concat(dataframesList, ignore_index=True), crs=dataframesList[0].crs)
        
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df
    else:
        print("Total points area:", len(all_pred_points_df)," Total duplicates:", len(dup_pred_polys_df))
        return all_pred_points_df, dup_pred_polys_df

import csv
construction_type = "charcoal_hearth_hill"
cfg_name = 'cfg20200826T2315'
pred_poly_folder = os.path.join("/content/drive/MyDrive/crane_maryland/polys/", (cfg_name + "/"))

# Create an empty geopandas GeoDataFrame
all_pred_points_df = gpd.GeoDataFrame()
all_pred_points_df['geometry'] = None
all_pred_points_df['id'] = None
#all_pred_points_df.crs = {'init':'epsg:4326'}
all_pred_points_df.crs = ('EPSG:4326')


# Create an empty geopandas GeoDataFrame for duplicates
dup_pred_polys_df = gpd.GeoDataFrame()
dup_pred_polys_df['geometry'] = None
dup_pred_polys_df['id'] = None
#dup_pred_polys_df.crs = {'init':'epsg:4326'}
dup_pred_polys_df.crs = ('EPSG:4326')


all_points_outfp = os.path.join(pred_poly_folder, "4326_000_hearth_prediction_points.shp")
dup_polys_outfp = os.path.join(pred_poly_folder, "4326_000_duplicate_hearth_prediction_polys.shp")


all_pred_points_df, dup_pred_polys_df = preds_to_points("catoctin_1", pred_poly_folder, all_pred_points_df, dup_pred_polys_df)

# Determine the output path for the Shapefile

# Write the data into that Shapefile
if not all_pred_points_df.empty:    
    all_pred_points_df.to_file(all_points_outfp)
    print("Total points:", len(all_pred_points_df))
if not dup_pred_polys_df.empty:
    dup_pred_polys_df.to_file(dup_polys_outfp)
    print("Total duplicate polys:", len(dup_pred_polys_df))

In [None]:
#one time fix up of long file names
batch_group = "0-199"
# Dictionary of slope_jpgs

slope_tifs_fp = '/content/drive/MyDrive/crane_maryland/slope_images/slope_'+batch_group+'/'

# Dictionary of jpgs

for file_name in os.listdir(slope_tifs_fp):
    origin_file_path = os.path.join(slope_tifs_fp,file_name)
    #print(origin_file_path)
    if(os.path.isfile(origin_file_path)):  
        if(origin_file_path[-4:]==".tif"):
            os.rename(origin_file_path,os.path.join(slope_tifs_fp,(file_name[:28]+'.tif')))
            # be careful comparing a partial filename
            print(file_name[:28])


slope_20120129_17SQD0990_utm
slope_20120129_17SQD1287_utm
slope_20120130_17SQD0978_utm
slope_20120130_17SQD1278_utm
slope_20120130_17SQD1280_utm
slope_20120130_17SQD1281_utm
slope_20120130_17SQD1478_utm
slope_20120130_17SQD1480_utm
slope_20120130_17SQD1581_utm
slope_20120130_17SQD1583_utm
slope_20120130_17SQD1778_utm
slope_20120130_17SQD1780_utm
slope_20120130_17SQD1781_utm
slope_20120130_17SQD2080_utm
slope_20120130_17SQD2081_utm
slope_20120130_17SQD2083_utm
slope_20120130_17SQD2181_utm
slope_20120131_17SPD9792_utm
slope_20120131_17SPD9793_utm
slope_20120131_17SPD9989_utm
slope_20120131_17SPD9990_utm
slope_20120131_17SPD9992_utm
slope_20120131_17SPD9993_utm
slope_20120131_17SQD0084_utm
slope_20120131_17SQD0086_utm
slope_20120131_17SQD0087_utm
slope_20120131_17SQD0092_utm
slope_20120131_17SQD0093_utm
slope_20120131_17SQD0284_utm
slope_20120131_17SQD0286_utm
slope_20120131_17SQD0287_utm
slope_20120131_17SQD0289_utm
slope_20120131_17SQD0290_utm
slope_20120131_17SQD0292_utm
slope_20120131