TODO list:
- scale up the exports so I can do this everywhere
- Examine where the most changes have happened, visualize this on a map
- Expand more on this .ipynb about each of the cells, processing steps, etc

Importing useful packages

In [1]:
import ee
import geemap
import pandas as pd
import geopandas as gpd
import ipyleaflet
from tqdm import tqdm
import ipywidgets as widgets
from IPython.display import display

ee.Initialize()

Functions for handling geometry and data

In [2]:
def pd_shp_to_ee_poly(shp):
    """Converts Polygon from GeoPandas to a ee.Geometry.Polygon
    object suitable for use within Google Earth Engine."""
    xs, ys = shp.exterior.coords.xy
    shp_list = [[x, y] for x, y in zip(xs, ys)]
    roi = ee.Geometry.Polygon(shp_list, None, False)
    return roi


def mask_classes(image, vals_to_keep):
    """Masks values of the image to only include those
    within vals_to_keep."""
    masks = []
    finalMask = ee.Image(0)

    for val in vals_to_keep:
        masks.append(image.eq(val))
    
    for mask in masks:
        finalMask = finalMask.Or(mask) 
    
    return image.updateMask(finalMask)


def get_county_roi(county_name):
    """Returns a ee.Geometry.Polygon object representing
    a particular county within Georgia, along with the
    centroid of that object."""
    ga_counties = gpd.read_file("ga-counties/Counties_Georgia.shp")
    county_shp = ga_counties[ga_counties["NAME10"] == county_name].geometry.values[0]
    xs, ys = county_shp.centroid.coords.xy
    county_roi = pd_shp_to_ee_poly(county_shp).simplify(maxError = 1)

    return county_roi, (xs[0], ys[0])


def get_labels(collection, class_name):
    """Returns a data frame containing the band values/class/corresponding
    palette color."""
    class_vals = collection.first().get(f"{class_name}_class_values").getInfo()
    class_labels = collection.first().get(f"{class_name}_class_names").getInfo()
    class_palette = collection.first().get(f"{class_name}_class_palette").getInfo()

    class_desc = [lab.split(':')[1] for lab in class_labels]
    class_labels = [lab.split(':')[0] for lab in class_labels]

    class_df = pd.DataFrame({
        'layer_vals': class_vals,
        'labels': class_labels,
        'class_description': class_desc,
        'palette': class_palette
    })

    return class_df


def get_legend_keys_values(class_df, vals_to_keep):
    """Returns a reduced legend from class_df according to the
    values in the list vals_to_keep."""
    legend_keys = list(class_df[class_df.layer_vals.isin(vals_to_keep)].labels)
    legend_keys = [leg.split(':')[0] for leg in legend_keys]
    legend_colors = list(class_df[class_df.layer_vals.isin(vals_to_keep)].palette)
    return legend_keys, legend_colors


def union_polygons(poly_ls):
    """Given a list of polygons, returns the union of all of them."""
    basePoly = poly_ls[0]
    for poly in poly_ls[1:]:
        basePoly = basePoly.union(poly, maxError = 1)

    return basePoly


def rect_from_corners(tl, br):
    """Returns a ee.Geometry.Rectangle object from the
    top left and bottom right corners."""
    return ee.Geometry.Polygon(
        [[tl[0], tl[1]],
        [br[0], tl[1]],
        [br[0], br[1]],
        [tl[0],br[1]]], None, False
    )

In [3]:
# List of "main" counties in Atlanta
county_list = [
    'Cherokee', 'Clayton', 'Cobb', 
    'DeKalb', 'Douglas', 'Fayette', 
    'Forsyth', 'Fulton', 'Gwinnett', 
    'Henry','Rockdale'
]

# List of counties in the census specified statistical area
# for Atlanta
larger_county_list = [
    'Fulton', 'Gwinnett',
    'Cobb', 'DeKalb', 'Clayton',
    'Cherokee', 'Forsyth',
    'Henry', 'Paulding', 'Coweta',
    'Douglas', 'Fayette', 'Carroll',
    'Newton', 'Bartow', 'Walton', 
    'Rockdale', 'Barrow', 'Spalding', 'Pickens', 
    'Haralson', 'Dawson', 'Butts', 
    'Meriwether', 'Morgan',
    'Pike', 'Lamar', 'Jasper', 'Heard'
]

# Loading shape files for counties and tracts within Georgia
ga_counties = gpd.read_file("../ga-counties/Counties_Georgia.shp")
ga_tracts = gpd.read_file("../ga-tracts-2019/tl_2019_13_tract.shp")

In [4]:
at_counties = ga_counties[ga_counties["NAME10"].isin(county_list)]
at_counties

Unnamed: 0,OBJECTID,STATEFP10,COUNTYFP10,GEOID10,NAME10,NAMELSAD10,totpop10,WFD,RDC_AAA,MNGWPD,...,MSA,F1HR_NA,F8HR_NA,Reg_Comm,Acres,Sq_Miles,Label,GlobalID,last_edite,geometry
5,6,13,113,13113,Fayette,Fayette County,106567,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,127543.0,199.285995,FAYETTE,{0089049C-AF9E-48C9-83D8-75FE86DFE045},,"POLYGON ((-84.55686 33.52841, -84.55136 33.529..."
20,21,13,247,13247,Rockdale,Rockdale County,85215,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,84525.703125,132.070999,ROCKDALE,{1146BC41-D714-4074-BFCC-B1EA52C17B96},,"POLYGON ((-83.93160 33.65087, -83.93163 33.650..."
23,24,13,67,13067,Cobb,Cobb County,688078,N,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,220455.0,344.459991,COBB,{670FE76B-423B-4211-BFB0-8A9145225470},,"POLYGON ((-84.72423 33.90360, -84.72423 33.903..."
25,26,13,117,13117,Forsyth,Forsyth County,175511,N,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,158154.0,247.115997,FORSYTH,{4C27E78B-AD23-45A7-853A-CF98F6F55554},2021-07-01,"POLYGON ((-84.25878 34.15485, -84.25877 34.158..."
32,33,13,63,13063,Clayton,Clayton County,259424,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,92354.101562,144.302994,CLAYTON,{B1CDC3D4-04E4-4417-BF51-E5916A6F458E},,"POLYGON ((-84.45856 33.59444, -84.45861 33.595..."
41,42,13,151,13151,Henry,Henry County,203922,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,208948.0,326.480988,HENRY,{35264E7E-E781-4C21-B4CC-C812B3B58FDE},,"POLYGON ((-84.29468 33.43572, -84.29469 33.435..."
71,72,13,135,13135,Gwinnett,Gwinnett County,805321,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,279481.0,436.688995,GWINNETT,{3BBB29C1-CFA9-4906-B2DA-6D86E4FB8DAD},,"POLYGON ((-84.11333 34.07829, -84.11336 34.078..."
89,90,13,121,13121,Fulton,Fulton County,920581,N,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,341906.0,534.229004,FULTON,{44A3942A-9608-4E68-A927-A38721FF266D},,"POLYGON ((-84.44833 33.64688, -84.44833 33.646..."
119,120,13,97,13097,Douglas,Douglas County,132403,Y,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,128641.0,201.001007,DOUGLAS,{3429ADB5-8FED-490F-AFFD-F56DE8AF28B8},,"POLYGON ((-84.76750 33.78690, -84.76380 33.787..."
137,138,13,89,13089,DeKalb,DeKalb County,691893,N,Y,Y,...,Y,Y,Y,Atlanta Regional Commission,173496.0,271.087006,DEKALB,{3912F679-CB5F-4618-B448-53B943B436DF},,"POLYGON ((-84.18745 33.64618, -84.18771 33.646..."


In [5]:
# Merging info from the counties into ga_tracts, keeping
# only the tracts that are within the counties of interest
at_tracts = ga_tracts.merge(
    ga_counties[['COUNTYFP10', 'NAME10']],
    left_on = "COUNTYFP",
    right_on = "COUNTYFP10",
).query(
    "NAME10 in @county_list"
).reset_index()

at_tracts

Unnamed: 0,index,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,COUNTYFP10,NAME10
0,16,13,135,050311,13135050311,503.11,Census Tract 503.11,G5020,S,4923609,6728,+33.9485370,-084.2126207,"POLYGON ((-84.23193 33.94910, -84.23162 33.949...",135,Gwinnett
1,17,13,135,050527,13135050527,505.27,Census Tract 505.27,G5020,S,5954824,51004,+34.0086535,-083.9997169,"POLYGON ((-84.01345 33.99944, -84.01341 33.999...",135,Gwinnett
2,18,13,135,050107,13135050107,501.07,Census Tract 501.07,G5020,S,15525732,2993934,+34.1395971,-084.0655801,"POLYGON ((-84.09480 34.13222, -84.09474 34.132...",135,Gwinnett
3,19,13,135,050209,13135050209,502.09,Census Tract 502.09,G5020,S,6942688,255603,+33.9743018,-084.1453842,"POLYGON ((-84.16734 33.97362, -84.16652 33.974...",135,Gwinnett
4,20,13,135,050436,13135050436,504.36,Census Tract 504.36,G5020,S,5571346,22428,+33.9067283,-084.1087112,"POLYGON ((-84.13142 33.91235, -84.13065 33.912...",135,Gwinnett
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,1839,13,247,060304,13247060304,603.04,Census Tract 603.04,G5020,S,20678211,231927,+33.6579651,-084.0325251,"POLYGON ((-84.05760 33.67761, -84.05576 33.677...",247,Rockdale
779,1840,13,247,060201,13247060201,602.01,Census Tract 602.01,G5020,S,12641916,647420,+33.7032625,-084.0517635,"POLYGON ((-84.07701 33.68761, -84.07631 33.689...",247,Rockdale
780,1841,13,247,060407,13247060407,604.07,Census Tract 604.07,G5020,S,5915690,118567,+33.5656211,-084.0202011,"POLYGON ((-84.03745 33.55636, -84.03646 33.557...",247,Rockdale
781,1842,13,247,060309,13247060309,603.09,Census Tract 603.09,G5020,S,8338814,71359,+33.6876272,-084.0194306,"POLYGON ((-84.04143 33.69551, -84.04135 33.695...",247,Rockdale


Load geometries and class_df

In [6]:
# Create unified geometry for Georgia
poly_list = []

for county in county_list:
    county_shp = ga_counties[ga_counties["NAME10"] == county].geometry.values[0]
    poly_list.append(pd_shp_to_ee_poly(county_shp).simplify(maxError = 1))

atlanta_roi = union_polygons(poly_list).simplify(maxError = 1)

In [7]:
# Restrict collection to Atlanta, extract information about class labels
# for future reference
collection = (
    ee.ImageCollection('USGS/NLCD_RELEASES/2019_REL/NLCD')
    .map(lambda image: image.clip(atlanta_roi))
)

class_df = get_labels(collection, 'landcover')
class_df.to_csv("data-files/ncld_class_labels.csv")
class_df

Unnamed: 0,layer_vals,labels,class_description,palette
0,11,Open water,"areas of open water, generally with less than...",466b9f
1,12,Perennial ice/snow,areas characterized by a perennial cover of i...,d1def8
2,21,"Developed, open space",areas with a mixture of some constructed mate...,dec5c5
3,22,"Developed, low intensity",areas with a mixture of constructed materials...,d99282
4,23,"Developed, medium intensity",areas with a mixture of constructed materials...,eb0000
5,24,Developed high intensity,highly developed areas where people reside or...,ab0000
6,31,Barren land (rock/sand/clay),"areas of bedrock, desert pavement, scarps, ta...",b3ac9f
7,41,Deciduous forest,areas dominated by trees generally greater th...,68ab5f
8,42,Evergreen forest,areas dominated by trees generally greater th...,1c5f2c
9,43,Mixed forest,areas dominated by trees generally greater th...,b5c58f


Write function working on different geometry areas and images, record
roads separately rather than simply as a impervious surface.

In [8]:
def remove_roads(image):
    """Removes impervious surfaces corresponding
    to roads from the NCLD collection image."""
    image_imp = image.select('impervious_descriptor')
    image_land = image.select('landcover')

    # Create a mask for impervious surfaces
    impervious_class_roads = [20, 21, 22]

    land_mask = (
        mask_classes(image_imp, impervious_class_roads)
        .mask().Not()
    )

    return image_land.mask(land_mask)

def calculate_histogram_over_region(geometry, image):
    """Given an image, assumed to be from the NLCD collection, and a
    geometry region, return the histogram of pixel values for each class."""
    vals = (
        image.select('landcover').clip(geometry)
        .reduceRegion(
            ee.Reducer.frequencyHistogram()
        )
        .getInfo()
    )

    vals_df = (
        pd.DataFrame.from_dict(vals)
        .rename_axis(index = 'values')
        .reset_index()
    ).astype({'values': 'float64', 'landcover': 'float64'})

    return vals_df

In [9]:
years = [2001, 2004, 2006, 2008, 2011, 2013, 2016, 2019]
parts = [[0,100], [100, 200], [200, 300], [300, 400], [400, 500], [500, 600], [600, 700], [700, 783]]

for part_idx, part in enumerate(parts):
    at_tracts_fc = geemap.geopandas_to_ee(at_tracts.iloc[part[0]:part[1]])
    collection_list = collection.toList(collection.size())

    num_rows = at_tracts.shape[0]
    df_list = []

    for year_idx, year in enumerate(years):
        image = ee.Image(collection_list.get(year_idx))
        image = remove_roads(image)
        
        new_fc = image.reduceRegions(
            collection = at_tracts_fc,
            reducer = ee.Reducer.frequencyHistogram()
        )

        geemap.ee_export_vector_to_drive(
            new_fc,
            description = f'ncld_histogram_{year}_{part_idx}',
            folder = 'data_satellite',
            file_format = 'csv',
            selectors = ['GEOID', 'histogram']
        )


  pd.Int64Index,


Exporting ncld_histogram_2001_0...
Exporting ncld_histogram_2004_0...
Exporting ncld_histogram_2006_0...
Exporting ncld_histogram_2008_0...
Exporting ncld_histogram_2011_0...
Exporting ncld_histogram_2013_0...
Exporting ncld_histogram_2016_0...
Exporting ncld_histogram_2019_0...


  pd.Int64Index,


Exporting ncld_histogram_2001_1...
Exporting ncld_histogram_2004_1...
Exporting ncld_histogram_2006_1...
Exporting ncld_histogram_2008_1...
Exporting ncld_histogram_2011_1...
Exporting ncld_histogram_2013_1...
Exporting ncld_histogram_2016_1...
Exporting ncld_histogram_2019_1...


  pd.Int64Index,


Exporting ncld_histogram_2001_2...
Exporting ncld_histogram_2004_2...
Exporting ncld_histogram_2006_2...
Exporting ncld_histogram_2008_2...
Exporting ncld_histogram_2011_2...
Exporting ncld_histogram_2013_2...
Exporting ncld_histogram_2016_2...
Exporting ncld_histogram_2019_2...


  pd.Int64Index,


Exporting ncld_histogram_2001_3...
Exporting ncld_histogram_2004_3...
Exporting ncld_histogram_2006_3...
Exporting ncld_histogram_2008_3...
Exporting ncld_histogram_2011_3...
Exporting ncld_histogram_2013_3...
Exporting ncld_histogram_2016_3...
Exporting ncld_histogram_2019_3...


  pd.Int64Index,


Exporting ncld_histogram_2001_4...
Exporting ncld_histogram_2004_4...
Exporting ncld_histogram_2006_4...
Exporting ncld_histogram_2008_4...
Exporting ncld_histogram_2011_4...
Exporting ncld_histogram_2013_4...
Exporting ncld_histogram_2016_4...
Exporting ncld_histogram_2019_4...


  pd.Int64Index,


Exporting ncld_histogram_2001_5...
Exporting ncld_histogram_2004_5...
Exporting ncld_histogram_2006_5...
Exporting ncld_histogram_2008_5...
Exporting ncld_histogram_2011_5...
Exporting ncld_histogram_2013_5...
Exporting ncld_histogram_2016_5...
Exporting ncld_histogram_2019_5...


  pd.Int64Index,


Exporting ncld_histogram_2001_6...
Exporting ncld_histogram_2004_6...
Exporting ncld_histogram_2006_6...
Exporting ncld_histogram_2008_6...
Exporting ncld_histogram_2011_6...
Exporting ncld_histogram_2013_6...
Exporting ncld_histogram_2016_6...
Exporting ncld_histogram_2019_6...


  pd.Int64Index,


Exporting ncld_histogram_2001_7...
Exporting ncld_histogram_2004_7...
Exporting ncld_histogram_2006_7...
Exporting ncld_histogram_2008_7...
Exporting ncld_histogram_2011_7...
Exporting ncld_histogram_2013_7...
Exporting ncld_histogram_2016_7...
Exporting ncld_histogram_2019_7...
