In [1]:
import geemap
import pandas as pd
import numpy as np
import pdb
from IPython.display import display
import ee
import os

In [2]:
ee.Initialize()

In [3]:
# Define the region of interest for Georgia and Iowa
iowa = geemap.shp_to_ee('F:/US states/iowa.shp')

In [5]:
#Add date to image
def addDate(image):
    img_date = ee.Date(image.date())
    img_date = ee.Number.parse(img_date.format('YYYYMMdd'))
    return image.addBands(ee.Image(img_date).rename('imagedate').toInt())

# Define the date range for Iowa imagery (2010) AND MOSAIC 
iowa_mosaic_2010 = ee.ImageCollection('USDA/NAIP/DOQQ') \
    .filterBounds(iowa) \
    .filterDate('2010-01-01', '2010-12-31') \
    .map(addDate) \
    .mosaic()

In [18]:
#Extract values from features
#Note: return feature.limit(5000) added to get the function to work for this large dataset

# Function to extract values from features within buffer polygons
def dam_rasterExtraction_within_buffer(image, buffer_polygon):
    dam_clip_within_buffer = dam_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=dam_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

def terrace_rasterExtraction_within_buffer(image, buffer_polygon):
    terrace_clip_within_buffer = terrace_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=terrace_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

def basin_rasterExtraction_within_buffer(image, buffer_polygon):
    basin_clip_within_buffer = basin_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=basin_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

def grassed_rasterExtraction_within_buffer(image, buffer_polygon):
    grassed_clip_within_buffer = grassed_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=grassed_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

def contour_rasterExtraction_within_buffer(image, buffer_polygon):
    contour_clip_within_buffer = contour_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=contour_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

def strip_rasterExtraction_within_buffer(image, buffer_polygon):
    strip_clip_within_buffer = strip_clip.filterBounds(buffer_polygon.geometry())
    feature = image.sampleRegions(
        collection=strip_clip_within_buffer,
        scale=1, # Assuming NAIP imagery resolution
        geometries=True
    )
    return feature.limit(5000)

In [9]:
#Random buffer zones - 100 of them. NOTE: in earlier version, _buffer was used instead of _clip

buffer_points = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_areas/buffer.shp')

contour_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/contour_buffer.shp')
grassed_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/grassed_buffer.shp')
dam_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/dam_buffer.shp')
strip_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/strip_buffer.shp')
terrace_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/terrace_buffer.shp')
basin_clip = geemap.shp_to_ee('F:/Iowa BMP/Iowa clipped shapefiles/buffer_clips/basin_buffer.shp')

'''
currently at lease two of the buffer points overlap. and many are in cities. and many are devoid of certain bmps
'''

# Create a map
Map = geemap.Map(center=[40.6, -94], zoom=12)

Map.addLayer(buffer_points, {}, 'buffer points')
Map.addLayer(basin_clip, {}, 'basin_buffer')

Map

Map(center=[40.6, -94], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(…

In [10]:
# dam_mosaic = geemap.ee_to_pandas(dam_rasterExtraction(iowa_mosaic_2010))
# dam_mosaic #currently cuts off after 5000 elements for whole mosaic - i need that FOR EACH BUFFER ZONE (100 of them)

Unnamed: 0,HUC_12,NRCS_CODE,PRACTICE,Present2_1,Present201,CREATOR_NA,SHAPE_Leng,Present80s,DATE_CREAT,R,B,imagedate,G,N
0,071000090701,378,Pond Dam,,1,AH,103.069722,,2017-09-25,109,126,20100907,137,169
1,071000090701,378,Pond Dam,,1,AH,103.069722,,2017-09-25,109,124,20100907,132,163
2,071000090701,378,Pond Dam,,1,AH,103.069722,,2017-09-25,82,122,20100907,126,165
3,071000090701,378,Pond Dam,,1,AH,103.069722,,2017-09-25,118,131,20100907,140,177
4,071000090701,378,Pond Dam,,1,AH,103.069722,,2017-09-25,116,130,20100907,137,172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,102400090202,378,Pond Dam,,1,AB,60.168289,,2017-01-13,76,100,20100812,85,80
4996,102400090202,378,Pond Dam,,1,AB,60.168289,,2017-01-13,91,104,20100812,104,82
4997,102400090202,378,Pond Dam,,1,AB,60.168289,,2017-01-13,52,101,20100812,65,40
4998,102400090202,378,Pond Dam,,1,AB,60.168289,,2017-01-13,61,102,20100812,81,56


In [11]:
#Convert buffer polygon shapefile into list of all polygon features
buffer_list = buffer_points.toList(buffer_points.size())

In [15]:
#Dam
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(dam_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
dam_mosaic = pd.concat(result_list)


In [21]:
# dam_mosaic

In [19]:
#Terrace
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(terrace_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
terrace_mosaic = pd.concat(result_list)


In [22]:
#Basin
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(basin_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
basin_mosaic = pd.concat(result_list)


In [23]:
#Grassed
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(grassed_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
grassed_mosaic = pd.concat(result_list)

In [24]:
#Contour
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(contour_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
contour_mosaic = pd.concat(result_list)

In [25]:
#Strip
# Initialize an empty list to store the results
result_list = []

# Iterate over each buffer polygon and extract raster values within each buffer
for i in range(buffer_list.size().getInfo()):
    try:
        buffer_polygon = ee.Feature(buffer_list.get(i))
        result = geemap.ee_to_pandas(strip_rasterExtraction_within_buffer(iowa_mosaic_2010, buffer_polygon))
        result_list.append(result)
    except Exception:
        continue

# Merge the results into a single dataframe
strip_mosaic = pd.concat(result_list)

In [34]:
#Preliminary model training framework

ulti_log = pd.concat([contour_mosaic, dam_mosaic, grassed_mosaic, \
                      terrace_mosaic, basin_mosaic, strip_mosaic]).reset_index()

# Get the labeled training data for each band
red_train = ulti_log['R']
blue_train = ulti_log['B'] ##WHY DOES THIS WORK FOR MOSAIC BUT NOT FOR NON MOSAIC
green_train = ulti_log['G']
nir_train = ulti_log['N']

xargs = np.column_stack((red_train, nir_train, green_train, blue_train)) 
# # put the three features as three columns of the matrix

# # Get the labeled value
yargs = ulti_log['PRACTICE']

seed = 3

# Split to training and test data
from sklearn.model_selection import train_test_split
xargs_train, xargs_test, yargs_train, yargs_test = train_test_split(xargs, yargs, test_size=0.2, random_state=seed)

In [35]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler

#Random forest classification
pipe = Pipeline(
    [
        ('forest', RandomForestClassifier(n_estimators = 100, min_samples_leaf=10, random_state=seed))
    ]
)

pipe.fit(xargs_train, yargs_train) #Train
y_pred=pipe.predict(xargs_test) #Fit the testing data

In [36]:
# ulti_log

print(accuracy_score(yargs_test, y_pred))
print(confusion_matrix(yargs_test, y_pred)) 

0.6623502998772507
[[ 8817  7602     3   361  1025   139]
 [ 1890 78606    12   483  4611   334]
 [   75  2100    81    25   517    13]
 [  350  2507     1  3342   289     5]
 [ 1227 22958    20   234 11923   249]
 [  271  4785     5    83   915  1373]]


In [32]:
# ulti_log

Unnamed: 0,index,HUC_12,NRCS_CODE,PRACTICE,Present2_1,CREATOR_NA,Present80s,SHAPE_Area,LAST_EDIT_,Merge,Present201,SHAPE_Leng,DATE_CREAT,LAST_EDITO,R,B,imagedate,G,N,Examined
0,0,102400010104,332,Contour Buffer Strips,,II,,84567.254411,,,1,1940.670593,2018-02-13,,105,125,20100903,146,187,
1,1,102400010104,332,Contour Buffer Strips,,II,,84567.254411,,,1,1940.670593,2018-02-13,,114,123,20100903,141,175,
2,2,102400010104,332,Contour Buffer Strips,,II,,84567.254411,,,1,1940.670593,2018-02-13,,89,118,20100903,137,192,
3,3,102400010104,332,Contour Buffer Strips,,II,,84567.254411,,,1,1940.670593,2018-02-13,,103,121,20100903,146,191,
4,4,102400010104,332,Contour Buffer Strips,,II,,84567.254411,,,1,1940.670593,2018-02-13,,103,121,20100903,146,191,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
786147,4995,070802050808,585,Stripcropping,,JS,,553641.972262,,,1,3287.384789,2017-05-24,,131,116,20100908,123,168,
786148,4996,070802050808,585,Stripcropping,,JS,,553641.972262,,,1,3287.384789,2017-05-24,,116,114,20100908,112,165,
786149,4997,070802050808,585,Stripcropping,,JS,,553641.972262,,,1,3287.384789,2017-05-24,,116,112,20100908,110,154,
786150,4998,070802050808,585,Stripcropping,,JS,,553641.972262,,,1,3287.384789,2017-05-24,,116,112,20100908,110,154,


In [33]:
#Export df to csv, to save time

import os

out_dir = os.path.expanduser('~F:/Iowa BMP/')
out_csv = os.path.join(out_dir, 'mosaic_bands.csv')
# ulti_log.to_csv(out_csv, index = False)