# Generating Quadkeys for each state

In [4]:
import os
import mercantile

#import gdal
import numpy as np
import mercantile,fiona
import rasterio as rio 
from rasterio import mask as msk
import random
import geopy.distance
#import os, osr
#import geopandas as gpd
import shutil
import pickle

ModuleNotFoundError: No module named 'mercantile'

In [2]:
def get_quad_tile(lat, lon, precision):
    ret = mercantile.tile(lon,lat,precision)
    return ret

def get_quad_key_from_tile(x, y, zoom):
    return mercantile.quadkey(x, y, zoom)

def get_quad_key_from_tile(tile):
    return mercantile.quadkey(tile.x, tile.y, tile.z)

def get_tile_from_key(key):
    return mercantile.quadkey_to_tile(key)

def get_quad_key(lat, lon, zoom):
    tile = get_quad_tile(lat, lon, precision=zoom)
    # print(tile)
    return get_quad_key_from_tile(tile.x, tile.y, tile.z)

def get_max_possible_xy(zoom):
    if zoom == 0:
        return 0
    return 2**zoom-1

def validate_tile(tile):
    max_xy = get_max_possible_xy(tile.z)
    if tile.x > max_xy or tile.x < 0 or tile.y > max_xy or tile.y < 0:
        return False
    return True

#GIVEN A QUAD_TILE, GET ITS LAT-LNG BOUNDS
def get_bounding_lng_lat(tile_key):
    tile = get_tile_from_key(tile_key)
    bounds = mercantile.bounds(tile)
    # print("ul: " , ul)
    return [bounds.west, bounds.east, bounds.north, bounds.south]

# Get all the tiles from bounds and zoom level 
# bounds: (l = left/west, b = bottom/south, r = right/east, t = top/north)
def get_all_tiles_for_a_bounds(bounds,zoom_level):
    all_tiles = mercantile.tiles(*bounds, zooms=[zoom_level])
    return all_tiles

# provide a root directory with folders named by quadkeys and return a list of quadkeys
def read_all_folders_as_quadkey(path):
    
    sub_dirs = [name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
    #print(sub_folders)
    #print(f"total folders/quadkeys: {len(sub_dirs)}")

    #sub dirs/folders named by quadkeys
    allQuadKeys = sub_dirs
    return allQuadKeys

#provide a list of quadkeys and state quadkeys dictionary and return the statewise splitting dictionary for the provided list
def split_quadkeys_to_each_state(allQuadKeys,state_all_quadkeys):
    #Counting SMAP downloaded kyes for each state
    smap_statewise_quadkeys = {}

    for qk in allQuadKeys:
        #print(type(qk))
        for state in state_all_quadkeys.keys():
            if qk in state_all_quadkeys[state]:
                #print("yes")
                if state not in smap_statewise_quadkeys.keys():
                    smap_statewise_quadkeys[state] = [qk]
                else:
                    smap_statewise_quadkeys[state].append(qk)
    return smap_statewise_quadkeys
            

## Statewise distributing the quadkeys/lat lon bounds

## State Lat Lon Bounds

In [9]:
#State lat lon bounds: (l = left/west, b = bottom/south, r = right/east, t = top/north)
California_bounds = (-124.41060660766607,32.5342307609976,-114.13445790587905,42.00965914828148) #California lat lon bounds
Colorado_bounds = (-109.05919619986199,36.99275055519555,-102.04212644366443,41.00198213121131) #Colorado
Arkansas_bounds = (-94.61946646626465,33.00413641175411,-89.65547287402873,36.49965029279292)
Texas_bounds = (-106.64719063660635,25.840437651866516,-93.5175532104321,36.50050935248352)
Wyoming_bounds = (-111.05843295392954,40.995109653686534,-104.05213107971079,45.006059349083486)
Oklahoma_bounds = (-103.00405723377233,33.61664597114971,-94.43282317863178,37.002200211792115)
NewMaxico_bounds = (-109.04842831788318,31.332406253852533,-103.0004679397794,37.00048209241092)

state_all_quadkeys = {}

In [16]:
bounds = NewMaxico_bounds
all_tiles = mercantile.tiles(*bounds, zooms=[13])
quadkeys = []
for t in all_tiles:
    #print(get_quad_key_from_tile(t))
    quadkeys.append(get_quad_key_from_tile(t))
state_all_quadkeys['NewMaxico_bounds'] = quadkeys
state_all_quadkeys
#with open('state_quadKey_list.pkl', 'wb') as fp:
#    pickle.dump(state_all_quadkeys, fp)

{'California_bounds': ['0212233310000',
  '0212233310002',
  '0212233310020',
  '0212233310022',
  '0212233310200',
  '0212233310202',
  '0212233310220',
  '0212233310222',
  '0212233312000',
  '0212233312002',
  '0212233312020',
  '0212233312022',
  '0212233312200',
  '0212233312202',
  '0212233312220',
  '0212233312222',
  '0212233330000',
  '0212233330002',
  '0212233330020',
  '0212233330022',
  '0212233330200',
  '0212233330202',
  '0212233330220',
  '0212233330222',
  '0212233332000',
  '0212233332002',
  '0212233332020',
  '0212233332022',
  '0212233332200',
  '0212233332202',
  '0212233332220',
  '0212233332222',
  '0230011110000',
  '0230011110002',
  '0230011110020',
  '0230011110022',
  '0230011110200',
  '0230011110202',
  '0230011110220',
  '0230011110222',
  '0230011112000',
  '0230011112002',
  '0230011112020',
  '0230011112022',
  '0230011112200',
  '0230011112202',
  '0230011112220',
  '0230011112222',
  '0230011130000',
  '0230011130002',
  '0230011130020',
  '0230011

In [17]:
state_all_quadkeys.keys()

dict_keys(['California_bounds', 'Colorado_bounds', 'Texas_bounds', 'Arkansas_bounds', 'Wyoming_bounds', 'Oklahoma_bounds', 'NewMaxico_bounds'])

In [20]:
# Saving the quadkeys for each state (SMAP data)
save = True
if save: 
    with open('state_quadKey_z13_list.pkl', 'wb') as fp:
        pickle.dump(state_all_quadkeys, fp)

In [26]:
# Reading all statewise quadkeys
file = open('state_quadKey_z13_list.pkl','rb')
state_all_quadkeys = pickle.load(file)
state_all_quadkeys.keys()

dict_keys(['California', 'Colorado', 'Arkansas', 'Texas', 'Wyoming', 'Oklahoma', 'NewMaxico'])

In [28]:
state_all_quadkeys['Colorado_bounds']

['0213223223332',
 '0231001001110',
 '0231001001112',
 '0231001001130',
 '0231001001132',
 '0231001001310',
 '0231001001312',
 '0231001001330',
 '0231001001332',
 '0231001003110',
 '0231001003112',
 '0231001003130',
 '0231001003132',
 '0231001003310',
 '0231001003312',
 '0231001003330',
 '0231001003332',
 '0231001021110',
 '0231001021112',
 '0231001021130',
 '0231001021132',
 '0231001021310',
 '0231001021312',
 '0231001021330',
 '0231001021332',
 '0231001023110',
 '0231001023112',
 '0231001023130',
 '0231001023132',
 '0231001023310',
 '0231001023312',
 '0231001023330',
 '0231001023332',
 '0231001201110',
 '0231001201112',
 '0231001201130',
 '0231001201132',
 '0231001201310',
 '0231001201312',
 '0231001201330',
 '0231001201332',
 '0231001203110',
 '0231001203112',
 '0231001203130',
 '0231001203132',
 '0231001203310',
 '0231001203312',
 '0231001203330',
 '0231001203332',
 '0231001221110',
 '0231001221112',
 '0231001221130',
 '0231001221132',
 '0231001221310',
 '0231001221312',
 '02310012

## All Quadkeys (length 13) for States: CA, CO, AK, TX, OK, NM

In [18]:
# total number of quadkeys for each state
for k in state_all_quadkeys.keys():
    print(f" {k} = {len(state_all_quadkeys[k])}")

 California_bounds = 64155
 Colorado_bounds = 18880
 Texas_bounds = 85215
 Arkansas_bounds = 11172
 Wyoming_bounds = 20286
 Oklahoma_bounds = 18620
 NewMaxico_bounds = 21823


## All Quadkeys (length 11) for States: CA, CO, AK, TX, OK, NM

In [27]:
# total number of quadkeys for each state
for k in state_all_quadkeys.keys():
    print(f" {k} = {len(state_all_quadkeys[k])}")

 California = 4071
 Colorado = 1271
 Arkansas = 725
 Texas = 5400
 Wyoming = 1312
 Oklahoma = 1176
 NewMaxico = 1440


## SMAP

### Reading all the SMAP data folders as quadkeys

In [21]:
smap_path = '/s/chopin/f/proj/fineET/data_for_soil_moisture_work/new_quadhash_all_datasets/smap_3km_preprocessed_202021/split'

folder = smap_path

sub_folders = [name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name))]

#print(sub_folders)
print(f"total folders {len(sub_folders)}")

#sub folder named after each quadkey
allQuadKeys = sub_folders

total folders 19200


In [27]:
if '0231010111130' in allQuadKeys:
    print ("yes")

yes


In [29]:
if '0231001001112' in allQuadKeys:
    print ("yes")

### converting each quadkey to Lat Lon bounds

In [20]:
allbbox = {}
zoomlevel = 11
for qk in allQuadKeys:
    #print(qk)
    tile = mercantile.quadkey_to_tile(qk)
    bbox = mercantile.bounds(tile) #l,b,r,t = mercantile.bounds(385, 820, 11) # l = left/west, b = bottom/south, r = right/east, t = top/north
    allbbox[qk] = bbox
print(f"total bbox:  {len(allbbox)}")

total bbox:  14740


In [30]:
#Counting SMAP downloaded kyes for each state
smap_statewise_quadkeys = {}

for qk in allQuadKeys:
    #print(type(qk))
    for state in state_all_quadkeys.keys():
        if qk in state_all_quadkeys[state]:
            #print("yes")
            if state not in smap_statewise_quadkeys.keys():
                smap_statewise_quadkeys[state] = [qk]
            else:
                smap_statewise_quadkeys[state].append(qk)
            
#state_all_quadkeys


In [31]:
smap_statewise_quadkeys.keys()

dict_keys(['Colorado_bounds', 'NewMaxico_bounds', 'Wyoming_bounds', 'Oklahoma_bounds'])

## SMAP Downloaded Quadkeys for each state

In [32]:
# SMAP Data: total number of quadkeys for each state
for k in smap_statewise_quadkeys.keys():
    print(f" {k} = {len(smap_statewise_quadkeys[k])}")

 Colorado_bounds = 18762
 NewMaxico_bounds = 414
 Wyoming_bounds = 114
 Oklahoma_bounds = 69


In [34]:
smap_statewise_quadkeys_CO = smap_statewise_quadkeys['Colorado_bounds']
smap_statewise_quadkeys_CO

['0231013013320',
 '0231013021132',
 '0231010101023',
 '0231012313231',
 '0231012201321',
 '0231010330312',
 '0231011103000',
 '0231012010112',
 '0231012001122',
 '0231011103300',
 '0231010230230',
 '0231011013001',
 '0231003133331',
 '0231003213012',
 '0231011001303',
 '0231011222003',
 '0231010130300',
 '0231003300332',
 '0231012103310',
 '0231013200013',
 '0231012221003',
 '0231011211210',
 '0231001230220',
 '0231011301031',
 '0231001030121',
 '0231003012002',
 '0231012130131',
 '0231010211313',
 '0231001303233',
 '0231010101012',
 '0231011223220',
 '0231010202033',
 '0231001121203',
 '0231003312120',
 '0231013032210',
 '0231010202333',
 '0231013210100',
 '0231010201122',
 '0231001113000',
 '0231012331001',
 '0231013301123',
 '0231003102213',
 '0231010210012',
 '0231010232323',
 '0231010002203',
 '0231012330311',
 '0231001032311',
 '0231012300023',
 '0231010201200',
 '0231011221301',
 '0231011210220',
 '0231013000220',
 '0231010111000',
 '0231012122302',
 '0231011002032',
 '02310111

In [33]:
# Saving the quadkeys for each state (SMAP data)

save = False
if save: 
    with open('SMAP_statewise_Colorado_quadKey_z13_list.pkl', 'wb') as fp:
        pickle.dump(smap_statewise_quadkeys, fp)

## GRIDMET

In [35]:
# Getting all the folders as quadkey list
path = '/s/chopin/f/proj/fineET/data_for_soil_moisture_work/new_quadhash_all_datasets/gridmet/split'
all_GridMET_quadKeys = read_all_folders_as_quadkey(path)

In [36]:
print(f" total quadkeys: {len(all_GridMET_quadKeys)}")

 total quadkeys: 19200


In [37]:
# Splitting quadkeys to each state
GridMET_statewise_quadkeys = split_quadkeys_to_each_state(all_GridMET_quadKeys,state_all_quadkeys)

In [39]:
# Saving the quadkeys for each state (SMAP data)
save = False
if save: 
    with open('GridMET_statewise_quadKey_z13_list.pkl', 'wb') as fp:
        pickle.dump(GridMET_statewise_quadkeys, fp)

## GRIDMET Downloaded Quadkeys for each state

In [38]:
# GridMET Data: total number of quadkeys for each state
for k in GridMET_statewise_quadkeys.keys():
    print(f" {k} = {len(GridMET_statewise_quadkeys[k])}")

 Colorado_bounds = 18762
 NewMaxico_bounds = 414
 Wyoming_bounds = 114
 Oklahoma_bounds = 69


## gNatsGo

In [40]:
path = '/s/chopin/f/proj/fineET/data_for_soil_moisture_work/new_quadhash_all_datasets/gNATSGO_preprocessed_202021/split'
# Getting all the folders as quadkey list
all_gNatsGo_quadKeys = read_all_folders_as_quadkey(path)

In [45]:
print(f" total quadkeys: {len(all_gNatsGo_quadKeys)}")

 total quadkeys: 12226


In [30]:
#all_gNatsGo_quadKeys

In [46]:
# Splitting quadkeys to each state
gNatsGo_statewise_quadkeys = split_quadkeys_to_each_state(all_gNatsGo_quadKeys,state_all_quadkeys)

In [32]:
#state_all_quadkeys

In [48]:
# Saving the quadkeys for each state (gNatsGo data)
save = False
if save: 
    with open('gNatsGo_statewise_quadKey_z13_list.pkl', 'wb') as fp:
        pickle.dump(gNatsGo_statewise_quadkeys, fp)

## gNatsGo Downloaded Quadkeys for each state

In [47]:
# gNatsGo Data: total number of quadkeys for each state
for k in gNatsGo_statewise_quadkeys.keys():
    print(f" {k} = {len(gNatsGo_statewise_quadkeys[k])}")

 Colorado_bounds = 12024
 NewMaxico_bounds = 303
 Wyoming_bounds = 102


In [11]:
gNatsGo_statewise_quadkeys

{'Colorado': ['02310121200',
  '02310122032',
  '02310011022',
  '02310102312',
  '02310103322',
  '02310102131',
  '02310101122',
  '02310103003',
  '02310121230',
  '02310102222',
  '02310122213',
  '02310031110',
  '02132322233',
  '02310011111',
  '02310101320',
  '02310031300',
  '02310010300',
  '02310102000',
  '02310100211',
  '02310031103',
  '02310033301',
  '02310123203',
  '02310100133',
  '02310122012',
  '02310010332',
  '02310032302',
  '02310100013',
  '02310122001',
  '02310011033',
  '02310031101',
  '02310122211',
  '02310030312',
  '02310031213',
  '02310012100',
  '02310100231',
  '02310102212',
  '02310100003',
  '02310013013',
  '02310032300',
  '02310100010',
  '02310123011',
  '02310101030',
  '02310122303',
  '02310013012',
  '02310121031',
  '02310100200',
  '02310100310',
  '02310121122',
  '02132323232',
  '02310032130',
  '02310011001',
  '02310103212',
  '02310100323',
  '02310033130',
  '02310102121',
  '02310123000',
  '02310121102',
  '02310031201',
  

In [70]:
print(f" total quadkeys: {len(all_gNatsGo_quadKeys)}")

 total quadkeys: 712


In [66]:
all_gNatsGo_quadKeys[500]

'02310200021'

In [71]:
#checking key lat long 
qk = all_gNatsGo_quadKeys[0]
get_bounding_lng_lat(qk)

[-111.97265625, -111.796875, 28.149503211544573, 27.99440141104615]

In [72]:
#checking key lat long 
qk = all_gNatsGo_quadKeys[300]
get_bounding_lng_lat(qk)

[-112.1484375, -111.97265625, 31.952162238024968, 31.80289258670676]

In [73]:
#checking key lat long 
qk = all_gNatsGo_quadKeys[700]
get_bounding_lng_lat(qk)

[-111.4453125, -111.26953125, 40.044437584608566, 39.90973623453718]

In [None]:
31.80289258670676,-112.1484375