# Pennsylvania Select DEM tiles

The project examines slope rasters derived from LiDAR to find RCHs. Here we use USGS DEMs already created from LiDAR data. This saves conversion work from las files.

Evidence of RCHs is more often found in forested areas where ground has not been plowed or built on and so it may be useful to examine only LiDAR tiles that contain forested areas. 

## Inputs

### LiDAR tiles shapefile
Map search 
https://prd-tnm.s3.amazonaws.com/LidarExplorer/index.html#/

https://www.sciencebase.gov/catalog/items/queryForm?parentId=543e6b86e4b0fd76af69cf4c&offset=0&max=20&


do a search
+ text = PA Sandy 2014 (for example)
+ Max records to return: 200
+ Offset of first record: 0 (! not 20)
+ Output Format: = json
+ save json

text file
https://thor-f5.er.usgs.gov/ngtoc/metadata/waf/elevation/1_meter/geotiff/

In order east to west
1.	#batch_group = "pa_northcentral_2019_b19" - tiled
2.	#batch_group = "pa_allentown_2016"
3.	#batch_group = "pa_3_county_south_central_2018_d18" - tiled 70 9800
4.	#batch_group = "pa_luzernecounty_2018" - tiled 5 700
5.	#batch_group = "pa_sandy_2014"
6.	#batch_group = "pa_south_central_b1_2017" - tiled 94 13160
7.	#batch_group = "pa_south_central_b2_2017" - tiled 44 - 6160
8.	#batch_group = "pa_dauphin_2016" - tiled 17 2380
9.	#batch_group = "pa_westernpa_2019_d20" - tiled
10.   #batch_group = de_delawarevalley_hd_2015 tiled 107 14980
11. #batch_group ="md-pa_sandysupp_2014" tiled 70 9800

https://www.sciencebase.gov/catalog/items?q=PA%20South%20Central&max=200&offset=20&format=json&sort=title&order=asc&parentId=543e6b86e4b0fd76af69cf4c


Woodland

https://www.sciencebase.gov/catalog/item/5318a64ee4b051b1b924ea2c







# Set up directories

In [2]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import os

def check_path(fp):
    #return  
    if not os.path.exists(fp):
        print("missing: ", fp)
        os.makedirs(fp)
        if not os.path.exists(fp):
            print("still missing: ", fp)
        else:
            print("made directory: ", fp)
    else:
        print("exists:", fp)  

#batch_group = "pa_northcentral_2019_b19"
#batch_group = "pa_allentown_2016"
#batch_group = "pa_3_county_south_central_2018_d18"
#batch_group = "pa_luzernecounty_2018"
#batch_group = "pa_sandy_2014"
#batch_group = "pa_south_central_b1_2017"
#batch_group = "pa_south_central_b2_2017"
#batch_group = "pa_dauphin_2016"
#batch_group = "pa_westernpa_2019_d20"
#batch_group = "de_delawarevalley_hd_2015"
#batch_group ="md-pa_sandysupp_2014"
#batch_group ="va_northernshenandoah_2020_d20"
#batch_group ="va_southamptonhenricowmbg_2019_b19"
#batch_group ="va_uppermiddleneck_2018_d18"
#batch_group ="va_fema_r3_northeast_2016"
#batch_group ="va_west_chesapeake_bay_watershed_lidar_2017_b17"
#batch_group = "va_fema_r3_southwest_b_2016"
batch_group = "va_chesapeakebaysouth_2015"
#batch_group ="md_garret_co_2014"
#batch_group ="md_slope_0-199"
#batch_group ="md_slope_200-399"
#batch_group ="md_slope_400-599"
#batch_group ="md_slope_600-999"
#batch_group ="md_slope_1000-1599"
#batch_group ="md_slope_1600-2199"

#batch_group_url_folder_name = "PA_Northcentral_2019_B19"
#batch_group_url_folder_name = "PA_LuzerneCounty_2018"
#batch_group_url_folder_name = "DE_DelawareValley_HD_2015"
#batch_group_url_folder_name = "MD_PA_SandySupp_2014"
#batch_group_url_folder_name = "VA_NorthernShenandoah_2020_D20"
#batch_group_url_folder_name = "VA_SouthamptonHenricoWMBG_2019_B19"
#batch_group_url_folder_name = "MD_Garret_Co_2014"
#batch_group_url_folder_name = "VA_UpperMiddleNeck_2018_D18"
#batch_group_url_folder_name = "VA_FEMA_R3_Northeast_2016"
#batch_group_url_folder_name = "VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17"
#batch_group_url_folder_name = "VA_FEMA_R3_Southwest_B_2016"
batch_group_url_folder_name = "VA_ChesapeakeBaySouth_2015"

dem_tif_fp = os.path.join("/content/drive/MyDrive/crane_pennsylvania/dem/",batch_group)
check_path(dem_tif_fp)
slope_tif_fp = os.path.join("/content/drive/MyDrive/crane_pennsylvania/slope/",batch_group)
check_path(slope_tif_fp)

slope_tif_tiles640_fp = os.path.join(slope_tif_fp,"tiles640/")
check_path(slope_tif_tiles640_fp)
slope_tif_tiles640_jpgs_fp = os.path.join(slope_tif_tiles640_fp,"jpgs/")
check_path(slope_tif_tiles640_jpgs_fp)
slope_tif_tiles640_polys_fp = os.path.join(slope_tif_tiles640_fp,"polys/")
check_path(slope_tif_tiles640_polys_fp)

prediction_fp = '/content/drive/MyDrive/crane_pennsylvania/predictions/project_'+batch_group+'/'
check_path(prediction_fp)
prediction_xmls_fp = '/content/drive/MyDrive/crane_pennsylvania/predictions/project_'+batch_group+'/xmls/'
check_path(prediction_xmls_fp)

#dem_tif_fp, slope_tif_fp, slope_tif_tiles_fp, slope_tif_tiles_jpgs_fp

import os
def check_number_of_files(folder_path, file_extension):
    counter = 0
    try:
        for filename in os.listdir(folder_path):
            ext = filename[-1*(len(file_extension)):]
            if(ext == file_extension):
               counter = counter + 1            
        print("found: ",counter," files in ", folder_path)
    except:
        print("had an exception")         
        check_number_of_files(folder_path, file_extension)

import time
def list_of_files(source_directory, source_extension):
    print(source_directory, source_extension)  
    start_time = time.time()  
    files_list = []  
    counter = 0
    for path, subdirs, files in os.walk(source_directory):
        for name in files:
            # print(name)      
            if(name[-4:] == source_extension):
                counter = counter + 1
                # print(counter, os.path.join(path, name))
                files_list.append(os.path.join(path, name))
    execution_time = (time.time() - start_time)
    print('Execution time in seconds: ' + str(execution_time), counter)                
    return(files_list)

exists: /content/drive/MyDrive/crane_pennsylvania/dem/va_chesapeakebaysouth_2015
exists: /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015
exists: /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/
exists: /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/
exists: /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/polys/
exists: /content/drive/MyDrive/crane_pennsylvania/predictions/project_va_chesapeakebaysouth_2015/
exists: /content/drive/MyDrive/crane_pennsylvania/predictions/project_va_chesapeakebaysouth_2015/xmls/


In [None]:
# run only once
import os

def create_dir(path):
    if not os.path.exists(path):
        print(path, " does not exist. Making.")
        # Create a new directory because it does not exist 
        os.makedirs(path)
    else:
        print(path, " exists.")

create_dir(dem_tif_fp)
create_dir(slope_tif_fp)
create_dir(slope_tif_tiles_fp)
create_dir(slope_tif_tiles_jpgs_fp)
create_dir(prediction_xmls_fp)

/content/drive/MyDrive/crane_pennsylvania/dem/pa_south_central_b2_2017  does not exist. Making.
/content/drive/MyDrive/crane_pennsylvania/slope/pa_south_central_b2_2017  does not exist. Making.
/content/drive/MyDrive/crane_pennsylvania/slope/pa_south_central_b2_2017/tiles/  does not exist. Making.
/content/drive/MyDrive/crane_pennsylvania/slope/pa_south_central_b2_2017/tiles/jpgs/  does not exist. Making.
/content/drive/MyDrive/crane_pennsylvania/predictions/project_pa_south_central_b2_2017/xmls/  does not exist. Making.


# Wget the DEM files

+ Open the shape file containing the polygons of forested lidar tiles in Maryland.

+ For each row, wget the file at the url.

In [3]:
!pip install wget

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9675 sha256=68b71e1392d8b914a19e3957ce224b52022f0951ff3b74fb72438400e187df85
  Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [4]:
!wget --version

GNU Wget 1.19.4 built on linux-gnu.

-cares +digest -gpgme +https +ipv6 +iri +large-file -metalink +nls 
+ntlm +opie +psl +ssl/openssl 

Wgetrc: 
    /etc/wgetrc (system)
Locale: 
    /usr/share/locale 
Compile: 
    gcc -DHAVE_CONFIG_H -DSYSTEM_WGETRC="/etc/wgetrc" 
    -DLOCALEDIR="/usr/share/locale" -I. -I../../src -I../lib 
    -I../../lib -Wdate-time -D_FORTIFY_SOURCE=2 -DHAVE_LIBSSL -DNDEBUG 
    -g -O2 -fdebug-prefix-map=/build/wget-Xb5Z7Y/wget-1.19.4=. 
    -fstack-protector-strong -Wformat -Werror=format-security 
    -DNO_SSLv2 -D_FILE_OFFSET_BITS=64 -g -Wall 
Link: 
    gcc -DHAVE_LIBSSL -DNDEBUG -g -O2 
    -fdebug-prefix-map=/build/wget-Xb5Z7Y/wget-1.19.4=. 
    -fstack-protector-strong -Wformat -Werror=format-security 
    -DNO_SSLv2 -D_FILE_OFFSET_BITS=64 -g -Wall -Wl,-Bsymbolic-functions 
    -Wl,-z,relro -Wl,-z,now -lpcre -luuid -lidn2 -lssl -lcrypto -lpsl 
    ftp-opie.o openssl.o http-ntlm.o ../lib/libgnu.a 

Copyright (C) 2015 Free Software Foundation, Inc.
License 

# JSON download

In [5]:
# Check JSON for duplicates
import json
with open("/content/drive/MyDrive/crane_pennsylvania/pa_michaux_forest_area_rch/" + batch_group + ".json", 'r') as dem_data_file:
    dem_data = json.load(dem_data_file)
    titles = []
    #print(json.dumps(dem_data, indent=4, sort_keys=True))
    print("There are ",len(dem_data['items'])," files.")
    file_counter = 0
    for item in dem_data['items']:
        title = item['title'] 
        print(title)
        titles.append(title)
from collections import Counter

counts = dict(Counter(titles))
duplicates = {key:value for key, value in counts.items() if value > 1}
print("duplicates:", duplicates)

There are  131  files.
USGS one meter x22y415 VA ChesapeakeBaySouth 2015
USGS one meter x22y416 VA ChesapeakeBaySouth 2015
USGS one meter x22y417 VA ChesapeakeBaySouth 2015
USGS one meter x22y418 VA ChesapeakeBaySouth 2015
USGS one meter x22y419 VA ChesapeakeBaySouth 2015
USGS one meter x22y420 VA ChesapeakeBaySouth 2015
USGS one meter x22y421 VA ChesapeakeBaySouth 2015
USGS one meter x23y416 VA ChesapeakeBaySouth 2015
USGS one meter x23y417 VA ChesapeakeBaySouth 2015
USGS one meter x23y418 VA ChesapeakeBaySouth 2015
USGS one meter x23y419 VA ChesapeakeBaySouth 2015
USGS one meter x23y420 VA ChesapeakeBaySouth 2015
USGS one meter x24y415 VA ChesapeakeBaySouth 2015
USGS one meter x24y416 VA ChesapeakeBaySouth 2015
USGS one meter x24y417 VA ChesapeakeBaySouth 2015
USGS one meter x24y418 VA ChesapeakeBaySouth 2015
USGS one meter x24y419 VA ChesapeakeBaySouth 2015
USGS one meter x25y415 VA ChesapeakeBaySouth 2015
USGS one meter x25y416 VA ChesapeakeBaySouth 2015
USGS one meter x25y417 VA C

In [6]:
import os
import time
import json

url_of_dem_folder = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/" + batch_group_url_folder_name + "/TIFF/"

# import the wget module
from wget import download
#
# create a downloader class.
class downloader:
        
    # Create a downloadfile method
    # Accepting the url and the file storage location
    # Set the location to an empty string by default.

    def downloadFile(self, url, location=""):
         # Download file and with a custom progress bar
        download(url, out = location)


downloadObj = downloader()
with open("/content/drive/MyDrive/crane_pennsylvania/pa_michaux_forest_area_rch/" + batch_group + ".json", 'r') as dem_data_file:
    dem_data = json.load(dem_data_file)
    
    #print(json.dumps(dem_data, indent=4, sort_keys=True))
    print("There are ",len(dem_data['items'])," files.")
    file_counter = 0
    for item in dem_data['items']:
        title = item['title'] 
        print(title)

        dem_file_name = (title.strip()).replace(' ', "_").replace('-', "_")+ ".tif"
        dem_file_path = os.path.join(dem_tif_fp,dem_file_name)
        url_of_dem = url_of_dem_folder + dem_file_name 
        file_counter = file_counter + 1
        print(file_counter, url_of_dem)
        
        try:
            if not os.path.exists(dem_file_path):
                downloadObj.downloadFile(url_of_dem,dem_tif_fp)
                time.sleep(10)
            else:
                print(dem_file_path, " exists.")
        except:
            print("exception:",url_of_dem,dem_tif_fp)

There are  131  files.
USGS one meter x22y415 VA ChesapeakeBaySouth 2015
1 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_ChesapeakeBaySouth_2015/TIFF/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015.tif
USGS one meter x22y416 VA ChesapeakeBaySouth 2015
2 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_ChesapeakeBaySouth_2015/TIFF/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015.tif
USGS one meter x22y417 VA ChesapeakeBaySouth 2015
3 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_ChesapeakeBaySouth_2015/TIFF/USGS_one_meter_x22y417_VA_ChesapeakeBaySouth_2015.tif
USGS one meter x22y418 VA ChesapeakeBaySouth 2015
4 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_ChesapeakeBaySouth_2015/TIFF/USGS_one_meter_x22y418_VA_ChesapeakeBaySouth_2015.tif
USGS one meter x22y419 VA ChesapeakeBaySouth 2015
5 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_ChesapeakeBaySouth_2

# Txt file download

In [7]:
#adjust the split of the file name
url_of_dem_folder = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/" + batch_group_url_folder_name + "/TIFF/"   
dem_data_file = open("/content/drive/MyDrive/crane_pennsylvania/pa_michaux_forest_area_rch/" + batch_group + ".txt", 'r')
lines = dem_data_file.readlines()

import os
import time
import json

# import the wget module
from wget import download
#
# create a downloader class.
class downloader:
        
    # Create a downloadfile method
    # Accepting the url and the file storage location
    # Set the location to an empty string by default.

    def downloadFile(self, url, location=""):
         # Download file and with a custom progress bar
        download(url, out = location)
downloadObj = downloader()
file_counter = 0
# Strips the newline character
for line in lines:
    title = line.strip()[78:144]
    print(line.strip())
    print(title)  
    dem_file_name = (title.strip()).replace(' ', "_")+ ".tif"
    dem_file_path = os.path.join(dem_tif_fp,dem_file_name)
    url_of_dem = url_of_dem_folder + dem_file_name 
    file_counter = file_counter + 1
    print(file_counter, url_of_dem)
    #downloadObj.downloadFile(url_of_dem,dem_tif_fp)
    #break
    try:
        if not os.path.exists(dem_file_path):
            downloadObj.downloadFile(url_of_dem,dem_tif_fp)
            time.sleep(10)
        else:
            print(dem_file_path, " exists.")
    except:
        print("exception:",url_of_dem,dem_tif_fp)
    #break # Use break for the first time download of a batch to make sure it works and that the extract of the file name is correct. (title = line.strip()[78:124])

<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="USGS_1M_17_x55y416_VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17.xml">USGS_1M_17_x55y416_V..&gt;</a></td><td align="right">2021-04-02 18:14  </td><td align="right"> 13K</td><td>&nbsp;</td></tr>
USGS_1M_17_x55y416_VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17
1 https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17/TIFF/USGS_1M_17_x55y416_VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17.tif
/content/drive/MyDrive/crane_pennsylvania/dem/va_west_chesapeake_bay_watershed_lidar_2017_b17/USGS_1M_17_x55y416_VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17.tif  exists.
<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="USGS_1M_17_x56y415_VA_West_Chesapeake_Bay_Watershed_Lidar_2017_B17.xml">USGS_1M_17_x56y415_V..&gt;</a></td><td align="right">2021-04-02 18:14  </td><td align="right"> 13K</td><td>&nbsp;</td></tr>
USGS_1M_17_x56y

# Optional: Remove duplicate tiles. 
Some tiles overlap.  See: dem_0_1_3_pennsylvania_dem_tiles_remove_overlap.ipynb to remove them before making slopes and the rest to save storage, processing time and duplicate results.

In [None]:
check_number_of_files(dem_tif_fp, ".tif")

found:  94  files in  /content/drive/MyDrive/crane_pennsylvania/dem/pa_south_central_b1_2017


# Create a Slope tiff from DEM

In [7]:
import subprocess

# run cell above to set directories
#batch_group = "pa_sandy_2014/"
input_dem_tif_fp = dem_tif_fp
output_slope_tif_fp = slope_tif_fp

# find all images
counter = 0
for filename in os.listdir(input_dem_tif_fp):
    # extract image id
    image_id = filename[:-4]
    ext = filename[-4:]

    if(ext ==".tif"):
        print(counter, filename)
        input_fp = os.path.join(input_dem_tif_fp,filename)
        output_fp = os.path.join(output_slope_tif_fp,filename)
        command = "gdaldem slope " + input_fp + " " + output_fp + "  -of GTiff -b 1 -s 1.0"
        print("!"+command)
        subprocess.call(command, shell=True)
        #!gdaldem slope /content/drive/MyDrive/crane_pennsylvania/dem/pa_sandy_2014/USGS_one_meter_x34y446_PA_Sandy_2014.tif /content/drive/MyDrive/crane_pennsylvania/slope/pa_sandy_2014/USGS_one_meter_x34y446_PA_Sandy_2014_slope.tif -of GTiff -b 1 -s 1.0
        counter = counter + 1



0 USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015.tif
!gdaldem slope /content/drive/MyDrive/crane_pennsylvania/dem/va_chesapeakebaysouth_2015/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015.tif /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015.tif  -of GTiff -b 1 -s 1.0
1 USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015.tif
!gdaldem slope /content/drive/MyDrive/crane_pennsylvania/dem/va_chesapeakebaysouth_2015/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015.tif /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015.tif  -of GTiff -b 1 -s 1.0
2 USGS_one_meter_x22y417_VA_ChesapeakeBaySouth_2015.tif
!gdaldem slope /content/drive/MyDrive/crane_pennsylvania/dem/va_chesapeakebaysouth_2015/USGS_one_meter_x22y417_VA_ChesapeakeBaySouth_2015.tif /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/USGS_one_meter_x22y417_VA_C

In [10]:
check_number_of_files(slope_tif_fp, ".tif")

found:  168  files in  /content/drive/MyDrive/crane_pennsylvania/slope/va_uppermiddleneck_2018_d18


# Tile each 10,000 X 10,000 slope tiff into 640 X 640

640 x 640 was used since the YOLO example resizes images to 640 X 640. It seems reasonable to reduce the amount of distoration and so obtain better results. 
The overlap of tiles is 15 which equals 15m, this largest diameter of RCH.
With an overlap of 15, each tile is 625 pixels high and wide. 625 pixels fits into 10,000 pixels 16 times. Some slope tiles are wider or higher than 10,000 pixels (for example: 10012, 10012). The extra pixels are added to the last tile. The last tiles in a row or column are slightly larger and will be slightly distorted when resized to 640 X 640.



In [4]:
!pip install rasterio
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
# TODO file and test line 148 poly_fp = os.path.join(self.tt_tile_polys_path,(self.tiff_tile_name_prefix + output_file_name_base+'_tile_poly.shp')) to remove r15c15 from the file name

import rasterio
import rasterio.plot
import gdal
import osr
import geopandas as gpd
from shapely.geometry import Point, Polygon
import numpy as np
import gdalnumeric
import cv2
import os

class Tile_tiff:

    # class attribute
    tiff_to_tile_folder = "" # the source tiff to be split into tiles
    tiff_to_tile_filename = ""    
    tiff_tile_name_prefix = ""
    tt_tile_path = "/content/" # the path to where the tiles are saved
    tt_tile_polys_path  = "/content/" # the path to where the tiles' polygon shapefiles are saved

    # varables for gdal    
    tt_gdal_dataset = ""
    tt_srs = ""
    tt_cols = 0
    tt_rows = 0

    # varables for rasterio
    tt_raterio_dataset = 0
    tt_crs = 0
    tt_crs_int = 0
    tt_left_min_x = 0
    tt_bottom_min_y = 0
    tt_right_max_x = 0
    tt_top_max_y = 0
    tt_coords = 0
    tt_pixel_size_x = 0
    tt_pixel_size_y = 0

    # set this for tile size
    tt_tile_pixel_width = 640
    tt_tile_pixel_height = 640

    # set this for overlap
    tt_tile_pixel_width_overlap = 0 # 200
    tt_tile_pixel_height_overlap = 0 # 100

    tt_tile_matrix = []
    # gdal
    tt_gdal_driver = ""
    tt_gdal_dataset = ""
    tt_gdal_dataset_band = ""
    tt_gdal_transform = ""
    tt_gdal_data = ""

    def create_tile_matrix(self):
        self.tt_tile_matrix = []
        # remove + 1
        number_tiles_wide = int(self.tt_cols/(self.tt_tile_pixel_width - self.tt_tile_pixel_width_overlap))
        number_tiles_high = int(self.tt_rows/(self.tt_tile_pixel_height - self.tt_tile_pixel_height_overlap))
        print("create_tile_matrix", number_tiles_wide,number_tiles_high)    
        # rows
        for tif_rows in range(0, number_tiles_high):
            # columns
            for tif_cols in range(0, number_tiles_wide):
                
                lrx = 0 + ((self.tt_tile_pixel_width - self.tt_tile_pixel_width_overlap) * tif_cols)  # self.tt_left_min_x
                lry = 0 + ((self.tt_tile_pixel_height - self.tt_tile_pixel_height_overlap) * tif_rows)  # self.tt_bottom_min_y

                if tif_cols == number_tiles_wide:
                    ulx = self.tt_cols   
                else:           
                    ulx = lrx + self.tt_tile_pixel_width

                if tif_rows == number_tiles_high:
                    uly = self.tt_rows
                else:                        
                    uly = lry + self.tt_tile_pixel_height

                if(lrx>self.tt_cols):
                    lrx=self.tt_cols
                
                if(lry>self.tt_rows):
                    lry=self.tt_rows
                
                if(ulx>self.tt_cols):
                    ulx=self.tt_cols
                
                if(uly>self.tt_rows):
                    uly=self.tt_rows
                
                self.tt_tile_matrix.append([[lrx, lry], [ulx, uly], [tif_cols,tif_rows]])

        return(self.tt_tile_matrix)
    
    def create_tile_files(self):
        self.tt_boundary_polys = gpd.GeoDataFrame()
        self.tt_boundary_polys['geometry'] = None
        self.tt_boundary_polys.crs = ("EPSG:" + str(self.tt_crs_int))
        self.tt_boundary_polys.geometry = self.tt_boundary_polys.geometry.to_crs(crs=self.tt_crs_int)
        self.tt_boundary_polys.to_crs(crs=self.tt_crs_int)
        self.tt_boundary_polys = self.tt_boundary_polys.to_crs(epsg=self.tt_crs_int)
        for tile in self.tt_tile_matrix:
            minx = tile[0][0]
            maxx = tile[1][0]
            miny = tile[0][1]
            maxy = tile[1][1]

            tilex = "00"+str(tile[2][0])
            tilex = tilex[-2:]
            tiley = "00"+str(tile[2][1])
            tiley = tiley[-2:]

            self.tt_gdal_data = self.tt_gdal_dataset_band.ReadAsArray(minx, miny, maxx-minx, maxy-miny)

            output_file_name_base = "r" + tiley + "c" + tilex 
            output_file_name_tiff = self.tiff_tile_name_prefix + output_file_name_base + ".tif"
            output_file_path = os.path.join(self.tt_tile_path,output_file_name_tiff)
            # print(output_file_path)
            #print(self.tt_gdal_dataset)

            self.tile_dst_ds = gdal.Translate(output_file_path, self.tt_gdal_dataset, srcWin = [minx, miny, maxx-minx, maxy-miny])
            #print(self.tile_dst_ds)
            this_tile_x_min = self.tt_left_min_x + (minx*self.tt_pixel_size_x)
            this_tile_y_min = self.tt_top_max_y - (miny*self.tt_pixel_size_y)
            this_tile_x_max = self.tt_left_min_x + (maxx*self.tt_pixel_size_x)
            this_tile_y_max = self.tt_top_max_y - (maxy*self.tt_pixel_size_y)
            #print("this_tile_transform",this_tile_x_min, self.tt_gdal_transform[1], self.tt_gdal_transform[2], this_tile_y_min, self.tt_gdal_transform[4], self.tt_gdal_transform[5])
            this_tile_transform = (this_tile_x_min, self.tt_gdal_transform[1], self.tt_gdal_transform[2], this_tile_y_min, self.tt_gdal_transform[4], self.tt_gdal_transform[5])
            #print("this_tile_transform2",this_tile_transform)
            ## COLOR
            self.tile_dst_ds.GetRasterBand(1).SetRasterColorTable(self.tt_gdal_dataset_band.GetRasterColorTable())
            self.tile_dst_ds.GetRasterBand(1).SetRasterColorInterpretation(self.tt_gdal_dataset_band.GetRasterColorInterpretation())

            # Write metadata
            self.tile_dst_ds.SetGeoTransform(this_tile_transform)
            self.tile_dst_ds.SetProjection(self.tt_gdal_dataset.GetProjection())
            
            self.tile_dst_ds.GetRasterBand(1).WriteArray(self.tt_gdal_data)
            self.tile_dst_ds = None

            coords = [(this_tile_x_min, this_tile_y_min), (this_tile_x_max, this_tile_y_min), (this_tile_x_max, this_tile_y_max), (this_tile_x_min, this_tile_y_max)]
            poly = Polygon(coords)
            new_tp_row = {'id':output_file_name_base, 'geometry':poly}
            self.tt_boundary_polys = self.tt_boundary_polys.append(new_tp_row, ignore_index=True)

        poly_fp = os.path.join(self.tt_tile_polys_path,(self.tiff_tile_name_prefix + output_file_name_base+'_tile_poly.shp'))
        print(poly_fp)
        self.tt_boundary_polys.to_file(poly_fp)

    def __init__(self, tiff_to_tile_folder, tiff_to_tile_filename, tt_tile_path, tt_tile_polys_path):
        self.tiff_to_tile_folder = tiff_to_tile_folder
        self.tiff_to_tile_filename = tiff_to_tile_filename
        self.tiff_to_tile_path = os.path.join(self.tiff_to_tile_folder, self.tiff_to_tile_filename)
        self.tt_tile_path = tt_tile_path
        self.tt_tile_polys_path = tt_tile_polys_path

        self.tt_raterio_dataset = rasterio.open(self.tiff_to_tile_path)
        self.tiff_tile_name_prefix = self.tiff_to_tile_filename[:-4]
        self.tt_rows,self.tt_cols = self.tt_raterio_dataset.shape
        self.tt_crs =  self.tt_raterio_dataset.crs
        if(self.tt_crs.is_valid):            
            self.tt_crs_int = int(str(self.tt_crs)[5:])
        else:
            self.tt_crs_int = 3857
        self.tt_srs = osr.SpatialReference()

        self.tt_srs.ImportFromEPSG(int(str(self.tt_crs_int)))

        self.tt_left_min_x = self.tt_raterio_dataset.bounds[0]
        self.tt_bottom_min_y = self.tt_raterio_dataset.bounds[1]
        self.tt_right_max_x = self.tt_raterio_dataset.bounds[2]
        self.tt_top_max_y = self.tt_raterio_dataset.bounds[3]
        self.tt_coords = [(self.tt_left_min_x, self.tt_bottom_min_y), (self.tt_right_max_x, self.tt_bottom_min_y), (self.tt_right_max_x, self.tt_top_max_y), (self.tt_left_min_x, self.tt_top_max_y)]
    
        self.tt_pixel_size_x, self.tt_pixel_size_y = self.tt_raterio_dataset.res

        self.tt_gdal_driver = gdal.GetDriverByName('GTiff')
        self.tt_gdal_dataset = gdal.Open(self.tiff_to_tile_path)
        self.tt_gdal_dataset_band = self.tt_gdal_dataset.GetRasterBand(1)
        self.tt_gdal_transform = self.tt_gdal_dataset.GetGeoTransform()


    def get_attributes(self):
        return {
            "cols": str(self.tt_cols), 
            "rows": str(self.tt_rows),
            "crs": str(self.tt_crs),
            "left_min_x": str(self.tt_left_min_x),
            "bottom_min_y": str(self.tt_bottom_min_y),
            "right_max_x": str(self.tt_right_max_x),
            "top_max_y": str(self.tt_top_max_y),
            "coords": str(self.tt_coords),
            "pixel_size_x": str(self.tt_pixel_size_x),
            "pixel_size_y": str(self.tt_pixel_size_y)
            }

In [6]:
tifs_list = list_of_files(slope_tif_fp, ".tif")  
print(len(tifs_list), "in", slope_tif_fp)

/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015 .tif
Execution time in seconds: 0.5619513988494873 9431
9431 in /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015


In [7]:
# modified for smaller 1600 X 1600 pixel tiles for Maryland downloaded from NOAA
# overlap 160

input_slope_tif_fp = slope_tif_fp
output_slope_tif_tiles_fp = slope_tif_tiles640_fp
output_slope_tif_tiles_polys_fp = slope_tif_tiles640_polys_fp

# find all images
counter = 0
for filename in os.listdir(input_slope_tif_fp):
    # extract image id
    image_id = filename[:-4]
    ext = filename[-4:]

    if(ext ==".tif"):
        print(counter, filename)
        #if(os.path.exists(os.path.join(output_slope_tif_tiles_polys_fp,(filename[:-4]+'r15c15_tile_poly.shp-------------------')))):
        #    print('exists', filename)
        #else:
        output_slope_tif_subtiles_fp = os.path.join(output_slope_tif_tiles_fp, (filename[:-4] + "/"))
        if not os.path.exists(output_slope_tif_subtiles_fp):
            os.mkdir(output_slope_tif_subtiles_fp)          
            print("creating:", output_slope_tif_subtiles_fp)
        else:
            print(output_slope_tif_subtiles_fp, "exists.")
        tiles_list = list_of_files(output_slope_tif_subtiles_fp, ".tif")
        if len(tiles_list) == 256:
            print("All tiles there.")
        else:
            print("found only",len(tiles_list),"tiles.")
            tt = Tile_tiff(input_slope_tif_fp, filename, output_slope_tif_subtiles_fp, output_slope_tif_tiles_polys_fp)
            tt.tt_tile_pixel_width_overlap = 15  # 160  # For Maryland NOAA tiles
            tt.tt_tile_pixel_height_overlap = 15  # 160
            print(tt.get_attributes())
            print(tt.create_tile_matrix())
            tt.create_tile_files()
        counter = counter + 1

0 USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015.tif
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015/ exists.
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015/ .tif
Execution time in seconds: 0.005611896514892578 256
All tiles there.
1 USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015.tif
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015/ exists.
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015/ .tif
Execution time in seconds: 0.005881547927856445 256
All tiles there.
2 USGS_one_meter_x22y417_VA_ChesapeakeBaySouth_2015.tif
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y417_VA_ChesapeakeBaySouth

In [None]:
#one time

input_slope_tif_fp = slope_tif_fp
output_slope_tif_tiles_fp = slope_tif_tiles640_fp
output_slope_tif_tiles_polys_fp = slope_tif_tiles640_polys_fp

# tile just one slope
#input_slope_tif_fp = "/content/drive/MyDrive/crane_pennsylvania/slope/pa_westernpa_2019_d20/USGS_1M_17_x60y465_PA_WesternPA_2019_D20.tif"
filename = "slope_20120129_17SQD0990_utm.tif"
output_slope_tif_subtiles_fp = os.path.join(output_slope_tif_tiles_fp, (filename[:-4] + "/"))
if not os.path.exists(output_slope_tif_subtiles_fp):
    os.mkdir(output_slope_tif_subtiles_fp)          
    print("creating:", output_slope_tif_subtiles_fp)
else:
    print(output_slope_tif_subtiles_fp, "exists.")
tt = Tile_tiff(input_slope_tif_fp, filename, output_slope_tif_subtiles_fp, output_slope_tif_tiles_polys_fp)
tt.tt_tile_pixel_width_overlap = 160
tt.tt_tile_pixel_height_overlap = 160
print(tt.get_attributes())
print(tt.create_tile_matrix())
tt.create_tile_files()

/content/drive/MyDrive/crane_pennsylvania/slope/md_slope_0-199/tiles640/slope_20120129_17SQD0990_utm/ exists.
{'cols': '1598', 'rows': '1598', 'crs': 'EPSG:26918', 'left_min_x': '194503.453125', 'bottom_min_y': '4392108.0', 'right_max_x': '196101.453125', 'top_max_y': '4393706.0', 'coords': '[(194503.453125, 4392108.0), (196101.453125, 4392108.0), (196101.453125, 4393706.0), (194503.453125, 4393706.0)]', 'pixel_size_x': '1.0', 'pixel_size_y': '1.0'}
create_tile_matrix 3 3
[[[0, 0], [640, 640], [0, 0]], [[480, 0], [1120, 640], [1, 0]], [[960, 0], [1598, 640], [2, 0]], [[0, 480], [640, 1120], [0, 1]], [[480, 480], [1120, 1120], [1, 1]], [[960, 480], [1598, 1120], [2, 1]], [[0, 960], [640, 1598], [0, 2]], [[480, 960], [1120, 1598], [1, 2]], [[960, 960], [1598, 1598], [2, 2]]]
/content/drive/MyDrive/crane_pennsylvania/slope/md_slope_0-199/tiles640/polys/slope_20120129_17SQD0990_utmr02c02_tile_poly.shp


# Save a jpg copy of each tif tile

In [8]:
list_of_shps = list_of_files(slope_tif_tiles640_polys_fp, ".shp")
len_list_of_shps = len(list_of_shps)
print(".shp", len_list_of_shps)
list_of_slope_tifs = list_of_files(slope_tif_fp, ".tif")
len_list_of_slope_tifs = len(list_of_slope_tifs)
print(".tif", len_list_of_slope_tifs)
list_of_tile_tifs = list_of_files(slope_tif_tiles640_fp, ".tif")
len_list_of_tile_tifs = len(list_of_tile_tifs)
print(".tif", len_list_of_tile_tifs)
tiles_per_dem = len_list_of_tile_tifs/len_list_of_shps
if tiles_per_dem == 256:
    print("Looks good, 256 tiles per DEM:", tiles_per_dem)
else:
    print("Likely a problem. Not 256 tiles per DEM:", tiles_per_dem)  

/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/polys/ .shp
Execution time in seconds: 0.1780238151550293 131
.shp 131
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015 .tif
Execution time in seconds: 2.037362813949585 33667
.tif 33667
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/ .tif
Execution time in seconds: 0.8277630805969238 33536
.tif 33536
Looks good, 256 tiles per DEM: 256.0


In [9]:
print(slope_tif_tiles640_fp, slope_tif_tiles640_jpgs_fp)


/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/ /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/


In [10]:
#create directories for each dem to hold jpgs tiles
print(list_of_tile_tifs[0])
dem_names = {}
for tile_fp in list_of_tile_tifs:
    dem_names[tile_fp.split(sep="/")[-2]] = "-"

for dem_name in dem_names:
   print(dem_name)
   jpg_dir = os.path.join(slope_tif_tiles640_fp,"jpgs/")
   jpg_dir = os.path.join(jpg_dir,(dem_name+"/"))
   print(jpg_dir)
   if not os.path.exists(jpg_dir):
       print("making", jpg_dir)
       os.mkdir(jpg_dir)
   else:
       print("exists", jpg_dir) 

/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015r00c00.tif
USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015/
making /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x22y415_VA_ChesapeakeBaySouth_2015/
USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015/
making /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x22y416_VA_ChesapeakeBaySouth_2015/
USGS_one_meter_x22y417_VA_ChesapeakeBaySouth_2015
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles64

In [11]:
#Convert tifs to jpgs 

output_slope_tif_tiles_jpgs_fp = slope_tif_tiles640_jpgs_fp

import gdal
import numpy as np
import gdalnumeric
import os

def tif_to_jpg(tif_file, output_file_name, output_folder):
    #print(tif_file,output_file_name, output_folder)
    ds = gdal.Open(tif_file)
    #geoTrans = srcImage.GetGeoTransform()
    band = ds.GetRasterBand(1)
    width = ds.RasterXSize
    height = ds.RasterYSize

    data = band.ReadAsArray(0, 0, width, height)
    #convert all the bad data
    data[data==-9999.0] = 0
    max_value = np.max(data)
    color_multiplier = 255/(max_value-1)
    #print(color_multiplier)
    data = data*color_multiplier
    #print(data)
    data_int = np.array(data, dtype='int')
    #print(data_int)
    #clip = ds.readasarray(ds)
    data_int = data_int.astype(gdalnumeric.numpy.uint8)
    
    print(os.path.join(output_folder,(output_file_name)))
    gdalnumeric.SaveArray(data_int, os.path.join(output_folder,(output_file_name)), format="JPEG")


# find all images
counter = 0
for tif_filepath in list_of_tile_tifs:
    tif_filename = tif_filepath.split(sep="/")[-1]
    # extract image id
    image_id = tif_filename[:-4]
    ext = tif_filename[-4:]
    dem_name = tif_filepath.split(sep="/")[-2]

    if(ext ==".tif"):
        counter = counter + 1      
        if counter > -1:
            jpg_dir_fp = os.path.join(output_slope_tif_tiles_jpgs_fp,(dem_name + "/"))          
            jpg_image_fp = os.path.join(jpg_dir_fp,(image_id+".jpg"))
            if not os.path.exists(jpg_image_fp):
                tif_to_jpg(tif_filepath, (image_id+".jpg"), jpg_dir_fp)
                print(counter, tif_filepath, (image_id+".jpg"), jpg_dir_fp)
            else:
                print(counter, "exists", jpg_image_fp)
        

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015r03c12.jpg
31037 /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015r03c12.tif USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015r03c12.jpg /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015/
/content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/jpgs/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_2015r03c13.jpg
31038 /content/drive/MyDrive/crane_pennsylvania/slope/va_chesapeakebaysouth_2015/tiles640/USGS_one_meter_x76y416_VA_ChesapeakeBaySouth_20

In [None]:
!pip install rasterio

In [None]:
df = df.rename(columns={0:'id', 1:'filename',2:'geometry'}).set_geometry('geometry') 
df = df.set_crs(epsg=26918)
df_fp = os.path.join(prediction_fp,(batch_group+"_dem_boundary.shp"))
df.to_file(df_fp)

print("the file is in ",df_fp)

the file is in  /content/drive/MyDrive/crane_pennsylvania/predictions/project_pa_south_central_b2_2017/pa_south_central_b2_2017_dem_boundary.shp


# Move files to sub directories

Having 100,000 files in a directory led to performance problems.

In [None]:
import os
import time

def check_number_of_files(folder_path, file_extension):
    start_time = time.time()
    counter = 0
    try:
        for filename in os.listdir(folder_path):
            ext = filename[-1*(len(file_extension)):]
            if(ext == file_extension):
               counter = counter + 1            
        print("found: ",counter," files in ", folder_path)
    except:
        print("had an exception")         
        check_number_of_files(folder_path, file_extension)
    execution_time = (time.time() - start_time)
    print('Execution time in seconds: ' + str(execution_time))


In [None]:
import shutil
import glob
print(slope_tif_tiles640_fp)
print(slope_tif_tiles640_jpgs_fp)

# force a list of files to cache them to access Google drive
#check_number_of_files(slope_tif_tiles640_fp, ".tif")
check_number_of_files(slope_tif_tiles640_jpgs_fp, ".jpg")
#check_number_of_files(prediction_xmls_fp, ".xml")

def move_files_to_sub_directories(source_directory, source_extension):
    counter = 0
    sub_directories = []     
    for filename in os.listdir(source_directory):
        ext = filename[-4:]
        if(ext == source_extension):
            counter = counter + 1
            image_name = filename[:-4]
            dem_name = image_name[:-6] 
            print(counter, ext, dem_name, image_name)
            # have a list of sub directories.  Create the directory if it's not in the list yet and does not exist
            sub_directory_fp = os.path.join(source_directory, (dem_name+"/"))
            if not dem_name in sub_directories:
                sub_directories.append(dem_name)                
                if not os.path.exists(sub_directory_fp):
                    os.makedirs(sub_directory_fp)
                    print("made:", sub_directory_fp)
            source_file_fp =  os.path.join(source_directory, filename)
            destination_file_fp =  os.path.join(sub_directory_fp, filename)
            shutil.move(source_file_fp, destination_file_fp)


#move_files_to_sub_directories(slope_tif_tiles640_fp, ".tif")
move_files_to_sub_directories(slope_tif_tiles640_jpgs_fp, ".jpg")
#print(prediction_xmls_fp)
#move_files_to_sub_directories(prediction_xmls_fp, ".xml")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
112779 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c02
112780 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c03
112781 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c04
112782 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c05
112783 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c06
112784 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c07
112785 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c08
112786 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c09
112787 .jpg USGS_1M_17_x53y461_PA_WesternPA_2019_D20 USGS_1M_17_x53y461_PA_WesternPA_2019_D20r10c10
112788 .jpg USGS_1M_17_x53y461_PA_W

In [None]:
print(prediction_xmls_fp)

/content/drive/MyDrive/crane_pennsylvania/predictions/project_pa_northcentral_2019_b19/xmls/


In [None]:
def list_of_files(source_directory, source_extension):
    start_time = time.time()  
    files_list = []  
    counter = 0
    for path, subdirs, files in os.walk(source_directory):
        for name in files:
            #print(name)      
            if(name[-4:] == source_extension):
                counter = counter + 1
                #print(counter, os.path.join(path, name))
                files_list.append(os.path.join(path, name))
    execution_time = (time.time() - start_time)
    print('Execution time in seconds: ' + str(execution_time))                
    return(files_list)
l = list_of_files(slope_tif_tiles640_fp, ".tif")    
print(1, len(l))
l = list_of_files(slope_tif_tiles640_jpgs_fp, ".jpg")    
print(1, len(l))
#l = list_of_files(prediction_xmls_fp, ".xml")    
#print(1, len(l))

Execution time in seconds: 7.523052930831909
1 117760
Execution time in seconds: 4.093571424484253
1 117760


# Scratch pad

In [None]:
os.system('cmd /c "!gdaldem slope /content/drive/MyDrive/crane_pennsylvania/dem/pa_sandy_2014/USGS_one_meter_x34y446_PA_Sandy_2014.tif /content/drive/MyDrive/crane_pennsylvania/slope/pa_sandy_2014/USGS_one_meter_x34y446_PA_Sandy_2014_slope.tif -of GTiff -b 1 -s 1.0"')

32512

In [None]:
print(time.localtime())
print(time.asctime())

time.struct_time(tm_year=2022, tm_mon=9, tm_mday=2, tm_hour=11, tm_min=53, tm_sec=12, tm_wday=4, tm_yday=245, tm_isdst=0)
Fri Sep  2 11:53:12 2022


In [None]:
tt = Tile_tiff('/content/drive/MyDrive/crane_pennsylvania/slope/pa_sandy_2014/','USGS_one_meter_x34y446_PA_Sandy_2014.tif',
               '/content/drive/MyDrive/crane_pennsylvania/slope/pa_sandy_2014/tiles/')
                

tt.tt_tile_pixel_width_overlap = 30
tt.tt_tile_pixel_height_overlap = 30
print(tt.get_attributes())
print(tt.create_tile_matrix())
tt.create_tile_files()

In [None]:
import os
import time
def check_number_of_files(folder_path, file_extension):
    counter = 0
    try:
        for filename in os.listdir(folder_path):
            ext = filename[-1*(len(file_extension)):]
            if(ext == file_extension):
               counter = counter + 1            
        print("found: ",counter," files in ", folder_path)
    except:
        print("had an exception")         
        check_number_of_files(folder_path, file_extension)          
    print("found: ",counter," files in ", folder_path)
    return counter

def list_of_files(source_directory, source_extension):
    print(source_directory, source_extension)  
    start_time = time.time()  
    files_list = []  
    counter = 0
    for path, subdirs, files in os.walk(source_directory):
        for name in files:
            # print(name)      
            if(name[-4:] == source_extension):
                counter = counter + 1
                # print(counter, os.path.join(path, name))
                files_list.append(os.path.join(path, name))
    execution_time = (time.time() - start_time)
    print('Execution time in seconds: ' + str(execution_time), counter)                
    return(files_list)    