In [None]:
import geopandas as gpd
import pandas as pd
import glob
import datetime
from pprint import pprint

In [None]:

date = datetime.date.today().strftime("%m%d%y")

In [None]:
'''
This python script purpose is to clean spatial data files that will be used in 
future analysis. Cleaning the data includes removing fields that are not needed, 
removing invalid geometries from the geodataframes, adding geometery where there 
is none, and clipping all the data to the 7 county metro as the area of interest.
'''

# Loading all shapefile datasets in as geopandas dataframes
hydrography = gpd.read_file("zip://shp_water_dnr_hydrography.zip")
water2018 = gpd.read_file("zip://impaired_2018_lakes.zip")
water2016 = gpd.read_file("zip://impaired_2016_lakes.zip")
water2014 = gpd.read_file("zip://impaired_2014_lakes.zip")
metro = gpd.read_file("zip://shp_bdry_metro_counties_and_ctus.zip")

###
### CLEANING AND CLIPPING IMPAIRED WATER 2014, 2016, AND 2018
###

# Cleaning the metro dataset, dissolving on the county name. 
metro_dissolve = metro.dissolve(by = "CO_NAME")

# Dropping all the unnecessary columns
water2018 = water2018.drop(["CAT", "CAT_DESC", "REACH_DESC", "USE_CLASS", "AFFECTED_U", "LIKE_MEET", 
                            "NON_POLL", "NAT_BACK", "ADD_MON", "APPROVED", "NEEDS_PLN", "IMP_PARAM", "NEW_IMPAIR", 
                            "HUC_8", "HUC_8_NAME", "HUC_4", "BASIN", "TRIBAL_INT", "INDIAN_RES", "AMMONIA", "CHLORIDE", 
                            "FISHESBIO", "HG_F", "HG_W", "NUTRIENTS", "PCB_F", "PFOS_F", "Shape_Leng", "Shape_Area"], axis = 1)

water2016 = water2016.drop(["CAT", "DATASET_NA", "REACH_DESC", "USE_CLASS", "AFFECTED_U", "TMDL_NOT_R", 
                            "TMDL_NOT_1", "IMPAIR_PAR", "IMPAIR_P_1", "NEW_IMPAIR", "NEW_IMPA_1", "TMDL_APPRO", "TMDL_APP_1", 
                            "TMDL_NEEDE", "TMDL_NEE_1", "HUC_8", "HUC_8_NAME", "HUC_4", "BASIN", "TRIBAL_INT", "INDIAN_RES", 
                            "CHLORIDE", "FISHESBIO", "HG_F", "HG_W", "NUTRIENTS", "PCB_F", "PFOS_F", "SHAPE_Leng", "SHAPE_Area"], axis = 1)

water2014 = water2014.drop(["LOCATION", "CAT", "AFFECTED_U", "NOPLN", "APPROVED", "NEEDSPLN", "IMPAIR_PAR", 
                            "NEW_2014", "HUC8", "HUC8_NAME", "HUC4", "BASIN", "WDWMO_NAME", "WDWMO_TYPE", "Chloride", 
                            "HgF", "HgW", "Nutrients", "PCBF", "PFOS_W", "SHAPE_Leng", "Shape_Le_1", "Shape_Area"], axis = 1)

# Renaming the columns to match the two other datasets
water2014 = water2014.rename(columns = {"WATER_NAME" : "NAME", "ALL_COUNTI" : "COUNTY", "ACRES" : "AREA_ACRES"})


# Locate all invalid gometries and drop them from the dataset
water2018_drop_invalid = water2018.loc[water2018['geometry'].is_valid, :]

water2016_drop_invalid = water2016.loc[water2016['geometry'].is_valid, :]

water2014_drop_invalid = water2014.loc[water2014['geometry'].is_valid, :]


# Clipping the three impaired water files to the 7 county metro
water2018_clip = gpd.clip(water2018_drop_invalid, metro_dissolve)
water2016_clip = gpd.clip(water2016_drop_invalid, metro_dissolve)

# 2014 needed to be reprojected - then clip was performed
water2014_proj = water2014_drop_invalid.to_crs('EPSG:26915')
water2014_clip = gpd.clip(water2014_proj, metro_dissolve)

###
### CLEANING THE 2020 IMPAIRED WATER DATA SET
###

# Load water 2020 data csv, selecting out the columns that we want and addinga geometry column
# and pulling out only the lake features.
water2020 = gpd.read_file("wq-iw1-65.csv")
water2020 = water2020[["Water body name", "AUID", "County", "Water body type", "geometry"]]
water2020_lake = water2020.loc[(water2020["Water body type"] == "Lake")]

# Dropping the "water body type" field since it is no longer needed
water2020_lake = water2020_lake[["AUID", "Water body name", "County", "geometry"]]

# Renaming the columns to match the two other datasets
water2020_lake = water2020_lake.rename(columns = {"Water body name" : "NAME", "County" : "COUNTY"})

# Selecting out the 7 county metro
counties = ["Anoka", "Hennepin", "Ramsey", "Washington", "Carver", "Scott", "Dakota"]
water2020_metro = water2020_lake.loc[(water2020_lake["COUNTY"].isin(counties))]

# Varifying all the correct counties are there
water2020_metro["COUNTY"].unique()

# Drop Duplicate AUIDs
water2020_clean = water2020_metro.drop_duplicates(subset = ["AUID"])


# Creating a list of the gpdf to loop through and find the smallest lake size
dfs = [water2014_clip, water2016_clip, water2018_clip]
dfs_names = ["water2014_clip", "water2016_clip", "water2018_clip"]

# New field for impairment status in all data sets
for df in dfs:
    df["status"] = "Impaired"

def find_min(dfs):
    '''
    finds smallest lake within the impaired datasets
    Parameter: list of dataframes
    '''
    
    global minimum
    minimum = []
    for df in dfs:
        minimum.append(df["AREA_ACRES"].min())
    minimum = min(minimum)

# Find smallest lake size of from all impaired lakes
find_min(dfs)


###
### CLEANING THE HYDROGRAPHY DATA SET
###

# Locate all invalid gometries and drop them from the dataset
hydro_drop_invalid = hydrography.loc[hydrography['geometry'].is_valid, :]

# Clipping hydro to the 7 county metro
hydro_clip = gpd.clip(hydro_drop_invalid, metro_dissolve)

# Narrowing down the number of features in the hydro layer to only lakes and ponds
hydro_lake = hydro_clip.loc[hydro_clip["wb_class"] == "Lake or Pond"]

# Selecting only the lakes that are at least the size of the the impaired water dataframes
hydro_lake = hydro_lake.loc[(hydro_lake["acres"] >= minimum)]

# Dropping all excess fields from the dataframe
hydro_clean = hydro_lake.drop(["fw_id", "dowlknum", "sub_flag", "wb_class", "lake_class", "shore_mi", "center_utm", "center_u_1",
                               "dnr_region", "fsh_office", "outside_mn", "delineated", "delineatio", "delineat_1", "delineat_2", 
                               "approved_b", "approval_d", "approval_n", "has_flag", "flag_type", "publish_da", "lksdb_basi", "has_wld_fl",
                               "wld_flag_t", "created_us", "created_da", "last_edite", "last_edi_1", "ow_use", "pwi_class", "map_displa", 
                               "shape_Leng", "shape_Area", "INSIDE_X", "INSIDE_Y", "in_lakefin"], axis = 1)

# New field for impairment status to be used when data is joined with the impaired data sets
hydro_clean["status"] = ""

# Dissolve hydrography geometry by lake name 
hydro_clean = hydro_clean.rename(columns={'pw_basin_n': 'NAME'})
hydro_dis = (hydro_clean.dissolve(by='NAME')).reset_index()


def complete_hydro(waterdata, waterdata_name):
    '''
    [add doc]
    
    '''
    
    # Combine nonimpaired with impaired. Returns a pandas dataframe
    join_hydro = hydro_dis.merge(waterdata, on ='NAME', how='left') 
    
    # Set geometry to hydro_dis dataset for all features
    projected = join_hydro.set_geometry(join_hydro['geometry_x'], 
                                        crs='EPSG:26915')
    
    # Combine duplicate features
    projected_dis = (projected.dissolve(by='NAME')).reset_index() 
    
    # Remove unneccessary fields
    projected_dis = projected_dis[['NAME', 
                                   'geometry', 
                                   'acres', 
                                   'cty_name', 
                                   'unique_id', 
                                   'status_y']]
    
    # Fill status of nonimpaired lakes
    projected_dis = projected_dis.fillna("nonimpaired")
    projected_df = projected_dis.rename(columns = {'status_y': 'status'})

    projected_df.to_file(f'{waterdata_name}.shp')

# Nonimpaired and impaired completed dataset for each year    
for df in dfs:
    for name in dfs_names:
        complete_hydro(df, name)

    
###
### JOINING GEOMETRY TO WATER2020_CLEAN
### 

water2020_join_auid = water2020_clean.merge(water2018_clip, how = "left", on = "AUID")

water2020_join_auid = water2020_join_auid[["AUID", "NAME_x", "COUNTY_x", "AREA_ACRES", "geometry_y", "status"]]

water2020_join_auid = water2020_join_auid.rename(columns = {"NAME_x" : "NAME", "COUNTY_x" : "COUNTY", "geometry_y" : "geometry"})

complete_hydro(water2020_join_auid, "water2020_clip")


In [None]:
water2014 = gpd.read_file("water2014_clip.shp")

water2014_buffer = gpd.GeoDataFrame(water2014.buffer(500))
water2014_buffer["NAME"] = water2014["NAME"]
water2014_buffer = water2014_buffer.set_geometry(water2014_buffer[0])
water2014_buffer = water2014_buffer[["NAME", "geometry"]]

In [None]:
buffer_size = int(input("Provide a distance for the size of the buffer in meters: "))

In [None]:
water2014 = gpd.read_file("water2014_clip.shp")
water2016 = gpd.read_file("water2016_clip.shp")
water2018 = gpd.read_file("water2018_clip.shp")
water2020 = gpd.read_file("water2020_clip.shp")

In [None]:
def buffer_lakes(buffer, water_feat):
    lake_buffer = gpd.GeoDataFrame(water_feat.buffer(buffer))
    lake_buffer["NAME"] = water_feat["NAME"]
    lake_buffer = lake_buffer.set_geometry(lake_buffer[0])
    lake_buffer = lake_buffer[["NAME", "geometry"]]
    return lake_buffer

In [None]:
buffer2014 = buffer_lakes(buffer_size, water2014)
buffer2016 = buffer_lakes(buffer_size, water2016)
buffer2018 = buffer_lakes(buffer_size, water2018)
buffer2020 = buffer_lakes(buffer_size, water2020)

In [None]:
# Get all metro data by finding all files ending in _metro.zip
directory = r'/home/leex6165/gisproj/'
path = f'{directory}*19_metro.zip'

buffer = buffer2018
data_2018 = pd.DataFrame({'NAME': buffer['NAME'], 'STATUS': water2018['status']})

# Get counts in each lake buffer per month
for file in glob.glob(path):
    sg_data = f'zip://{file}'
    patterns = (gpd.read_file(sg_data)).to_crs('EPSG:26915')
    data_join = gpd.sjoin(buffer, patterns, op='intersects')

    # Get counts of points in each lake buffer.
    data_grp = data_join.groupby('NAME', as_index=False)['index_right'].count()
    data_grp = data_grp.rename(columns = {'index_right': f'{file[-15:-10]}_counts'})
    
    data_2018 = data_2018.merge(data_grp, how='outer')


In [None]:
path = f'{directory}*20_metro.zip'

buffer = buffer2020
data_2020 = pd.DataFrame({'NAME': buffer['NAME'], 'STATUS': water2020['status']} )

# Get counts in each lake buffer per month
for file in glob.glob(path):
    sg_data = f'zip://{file}'
    patterns = (gpd.read_file(sg_data)).to_crs('EPSG:26915')
    data_join = gpd.sjoin(buffer, patterns, op='intersects')

    # Get counts of points in each lake buffer.
    data_grp = data_join.groupby('NAME', as_index=False)['index_right'].count()
    data_grp = data_grp.rename(columns = {'index_right': f'{file[-15:-10]}_counts'})
    
    data_2020 = data_2020.merge(data_grp, how='outer')

#### MAKE 2 CELLS BELOW INTO FUNCTION 

In [None]:
# Counts of visitation for nonimpaired and impaired lakes 2020
def vis_stats(counts_df):
    vis = counts_df.groupby(['STATUS']).sum()
    lkcounts = counts_df.groupby(['STATUS'])['NAME'].count()

    vis['avg monthly vis'] = vis.mean(axis=1)
    vis['Lake Counts'] = lkcounts
    vis['Total visits'] = (vis.sum(axis=1))
    vis['Total visits'] = vis['Total visits'] - (vis['avg monthly vis'] + vis['Lake Counts'])
    vis['Avg monthly vis per lake'] = vis['avg monthly vis'] / vis['Lake Counts']
    vis['Avg yearly vis per lake'] = vis['Total visits'] / vis['Lake Counts']
    vis.loc['Total vis per month']= vis.sum(axis=0)

    return vis

In [None]:
vis_2020 = vis_stats(data_2020)
vis_2020.to_csv(f'{directory}/vis_stats2020_{buffer_size}m.csv', 
                index=True)

In [None]:
vis_2018 = vis_stats(data_2018)
vis_2018.to_csv(f'{directory}/vis_stats2018_{buffer_size}m.csv', 
                index=True)

In [None]:
def min_max(counts_df, year):
    total = counts_df
    total['Total visits'] = total.sum(axis=1)

    maximum = total.sort_values(by=['Total visits'], ascending=False, ignore_index=True)
    print(f'The top five most visted lakes for {year} are: ')
    for row in range(len(maximum[0:5])):
        print(f"Lake Name: {maximum['NAME'][row]}"
              f"\nTotal visits: {maximum['Total visits'][row]}"
              f"\nStatus: {maximum['STATUS'][row]}\n")

    minimum = total.sort_values(by=['Total visits'], ascending=True, ignore_index=True)
    print(f'The top five least visted lakes for {year} are: ')
    for row in range(len(minimum[0:5])):
        print(f"Lake Name: {minimum['NAME'][row]}"
              f"\nTotal visits: {minimum['Total visits'][row]}"
              f"\nStatus: {minimum['STATUS'][row]}\n")

In [None]:
min_max(data_2020, "2020")

In [None]:
min_max(data_2018, "2018")

### FINDING ADDED AND REMOVED LAKES BETWEEN EACH YEAR

In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
impaired2014 = gpd.read_file("water2014_impaired.shp")
impaired2016 = gpd.read_file("water2016_impaired.shp")
impaired2018 = gpd.read_file("water2018_impaired.shp")

impaired2020 = pd.read_csv("water2020_impaired.csv")

In [None]:
added_16 = impaired2016.loc[impaired2016["AUID"].isin(impaired2014["AUID"]) == False]

removed_14 = impaired2014.loc[impaired2014["AUID"].isin(impaired2016["AUID"]) == False]

added_18 = impaired2018.loc[impaired2018["AUID"].isin(impaired2016["AUID"]) == False]

removed_16 = impaired2016.loc[impaired2016["AUID"].isin(impaired2018["AUID"]) == False]

added_20 = impaired2020.loc[impaired2020["AUID"].isin(impaired2018["AUID"]) == False]

removed_18 = impaired2018.loc[impaired2018["AUID"].isin(impaired2020["AUID"]) == False]

In [None]:
def impaired_change(y1_df, y1, y2_df, y2):
    added = y2_df.loc[y2_df['AUID'].isin(y1_df['AUID']) == False]
    removed = y1_df.loc[y1_df['AUID'].isin(y2_df['AUID']) == False]
    
    print(f"There were {len(removed['NAME'])} lakes removed from impaired waters list {y1}-{y2}:")
    for row in removed['NAME']:
        print (row)
    
    print(f"\nThere were {len(added['NAME'])} lakes added to impaired waters list {y1}-{y2}:")
    for row in added['NAME']:
        print(row)        

In [None]:
impaired_change(impaired2014, '2014', impaired2016, '2016')

In [None]:
print("From the years 2014-2016 the following lakes were removed from the impaired waters list:")
for row in removed_14["NAME"]:
    print(row)

print("\nFrom the years 2014-2016 the following lakes were added to the impaired waters list:")
for row in added_16["NAME"]:
    print(row)
    
print("\nFrom the years 2016-2018 the following lakes were removed from the impaired waters list:")
for row in removed_16["NAME"]:
    print(row)

print("\nFrom the years 2016-2018 the following lakes were added to the impaired waters list:")
for row in added_18["NAME"]:
    print(row)
    
print("\nFrom the years 2018-2020 the following lakes were removed from the impaired waters list:")
for row in removed_18["NAME"]:
    print(row)

print("\nFrom the years 2018-2020 the following lakes were added to the impaired waters list:")
for row in added_20["NAME"]:
    print(row)

In [None]:
add20 = added_20.count()

In [None]:
add20

In [None]:
removed_18.count()

In [None]:
'''
Nicole Dunn and Maisong Francis

This script is designed to be used after WaterData.py and SafeGraph.py.
This script buffers lake features based on user input and gets visitation counts
within each lake buffer. The results are statistical outputs of visitation 
counts per month and year for each category of impairment status: impaired and 
nonimpaired, and returns top five most and least visited lakes for each year. 
'''
import pandas as pd 
import geopandas as gpd
import glob

# Load in all the clipped shapefiles
water2014 = gpd.read_file("water2014_clip.shp")
water2016 = gpd.read_file("water2016_clip.shp")
water2018 = gpd.read_file("water2018_clip.shp")
water2020 = gpd.read_file("water2020_clip.shp")

# Load in impaired only clipped files
impaired2014 = gpd.read_file("water2014_impaired.shp")
impaired2016 = gpd.read_file("water2016_impaired.shp")
impaired2018 = gpd.read_file("water2018_impaired.shp")
impaired2020 = pd.read_csv("water2020_impaired.csv")

def buffer_lakes(buffer, water_feat):
    ''' Buffer the clipped impaired water features.
    Parameters:
    -----------
    buffer: int
        The user obtained distance for the buffer function
    water_feat: geodataframe
        The geodataframe of an impaired water dataset
    
    Return:
    -------
    lake_buffer: geodataframe
        The buffer around each lake feature in the impaired water dataframe
    '''
    lake_buffer = gpd.GeoDataFrame(water_feat.buffer(buffer))
    lake_buffer["NAME"] = water_feat["NAME"]
    lake_buffer = lake_buffer.set_geometry(lake_buffer[0])
    lake_buffer = lake_buffer[["NAME", "geometry"]]
    return lake_buffer


def vis_stats(counts_df):
    ''' Compute statistical metrics for visitations to impaired and nonimpaired 
    lakes. Metrics include average month visits, lake counts, total visits, 
    averae month visits per lake, average yearly visits per lake, and total visits
    per year. 
    
    Parameter
    ----------
    counts_df: gpd DataFrame
        The dataframe produced from spatial joining visitation counts to lake 
        buffers.
        
    Return
    ------
    vis
        gpd dataframe with statistical metrics
    '''
    
    vis = counts_df.groupby(['STATUS']).sum()
    lkcounts = counts_df.groupby(['STATUS'])['NAME'].count()

    vis['avg monthly vis'] = vis.mean(axis=1)
    vis['Lake Counts'] = lkcounts
    vis['Total visits'] = (vis.sum(axis=1))
    vis['Total visits'] = vis['Total visits'] - (vis['avg monthly vis'] + vis['Lake Counts'])
    vis['Avg monthly vis per lake'] = vis['avg monthly vis'] / vis['Lake Counts']
    vis['Avg yearly vis per lake'] = vis['Total visits'] / vis['Lake Counts']
    vis.loc['Total vis per year']= vis.sum(axis=0)
    return vis


def min_max(counts_df, year):
    '''
    Returns most visited and least visted lakes with impairment status and 
    visitation counts. 
   
    Parameters
    ----------
    counts_df: gpd DataFrame
       The dataframe produced from spatial joining visitation counts to lake 
       buffers.
    year: str
       The year of the impaired waters dataset
       
    Returns
    -------
    str
       Printed names, impairment status, and visitation counts of top five most
       and least visited lakes.
    '''
    total = counts_df
    total['Total visits'] = total.sum(axis=1)

    maximum = total.sort_values(by=['Total visits'], 
                                ascending=False, 
                                ignore_index=True)
    print(f'The top five most visted lakes for {year} are: ')
    for row in range(len(maximum[0:5])):
        print(f"Lake Name: {maximum['NAME'][row]}"
              f"\nTotal visits: {maximum['Total visits'][row]}"
              f"\nStatus: {maximum['STATUS'][row]}\n")

    minimum = total.sort_values(by=['Total visits'], 
                                ascending=True, 
                                ignore_index=True)
    
    print(f'The top five least visted lakes for {year} are: ')
    for row in range(len(minimum[0:5])):
        print(f"Lake Name: {minimum['NAME'][row]}"
              f"\nTotal visits: {minimum['Total visits'][row]}"
              f"\nStatus: {minimum['STATUS'][row]}\n")

    
def impaired_change(y1_df, y1, y2_df, y2):
    ''' Returns removed and added impaired lakes between two biennial 
    impaired waters lists. 
    
    Parameters
    ----------
    y1_df: gpd DataFrame
        The impaired waters clipped to study area and written out from 
        WaterData.py and read with Geopandas.Must be the earlier year between 
        two dataframes being compared. 
    y1: str
        Year of earlier dataset
    y2_df:
        The impaired waters clipped to study area and written out from 
        WaterData.py and read with Geopandas. Must be the later year between 
        two dataframes being compared. 
    y2: str
        Year of the later dataset
    
    Returns:
    --------
    str
        The number of impaired lakes removed and added, and the names of those lakes. 
    '''
    
    added = y2_df.loc[y2_df['AUID'].isin(y1_df['AUID']) == False]
    removed = y1_df.loc[y1_df['AUID'].isin(y2_df['AUID']) == False]
    
    print(f"There were {len(removed['NAME'])} lakes removed from impaired waters list {y1}-{y2}:")
    for row in removed['NAME']:
        print (row)
    
    print(f"\nThere were {len(added['NAME'])} lakes added to impaired waters list {y1}-{y2}:")
    for row in added['NAME']:
        print(row)             
        
##############################################################################

# Get a user input for the size of the buffer
buffer_size = int(input("Provide a distance for the size of the buffer in meters: "))
              
# Calling the buffer_lakes function for each year of the impaired 
# water datasets and assigning them to new variables
buffer2014 = buffer_lakes(buffer_size, water2014)
buffer2016 = buffer_lakes(buffer_size, water2016)
buffer2018 = buffer_lakes(buffer_size, water2018)
buffer2020 = buffer_lakes(buffer_size, water2020)


##############################################################################

# Get all metro data by finding all files ending in _metro.zip
directory = r'Project/'
path = f'{directory}*19_metro.zip'

buffer = buffer2018
data_2018 = pd.DataFrame({'NAME': buffer['NAME'], 'STATUS': water2018['status']})

# Get counts in each lake buffer per month for 2018 water data and 2019 foot traffic
for file in glob.glob(path):
    sg_data = f'zip://{file}'
    patterns = (gpd.read_file(sg_data)).to_crs('EPSG:26915')
    data_join = gpd.sjoin(buffer, patterns, op='intersects')

    # Get counts of points in each lake buffer.
    data_grp = data_join.groupby('NAME', as_index=False)['index_right'].count()
    data_grp = data_grp.rename(columns = {'index_right': f'{file[-15:-10]}_counts'})
    
    data_2018 = data_2018.merge(data_grp, how='outer')
    
    
##############################################################################    
    
# Get counts in each lake buffer per month for 2018 water data and 2019 foot traffic
path = f'{directory}*20_metro.zip'
buffer = buffer2020
data_2020 = pd.DataFrame({'NAME': buffer['NAME'], 
                          'STATUS': water2020['status']} )

for file in glob.glob(path):
    sg_data = f'zip://{file}'
    patterns = (gpd.read_file(sg_data)).to_crs('EPSG:26915')
    data_join = gpd.sjoin(buffer, patterns, op='intersects')

    # Get counts of points in each lake buffer.
    data_grp = data_join.groupby('NAME', as_index=False)['index_right'].count()
    data_grp = data_grp.rename(columns = {'index_right': f'{file[-15:-10]}_counts'})
    
    data_2020 = data_2020.merge(data_grp, how='outer')
    
############################################################################## 

# Counts of visitation for nonimpaired and impaired lakes for each year
# Write results to csv
vis_2018 = vis_stats(data_2018)
vis_2018.to_csv(f'{directory}/vis_stats2018_{buffer_size}m.csv', 
                index=True)

vis_2020 = vis_stats(data_2020)
vis_2020.to_csv(f'{directory}/vis_stats2020_{buffer_size}m.csv', 
                index=True)


# Find most and least visited lake for each year
min_max(data_2018, "2018")
min_max(data_2020, "2020")

##############################################################################

### FINDING ADDED AND REMOVED LAKES BETWEEN EACH YEAR

impaired_change(impaired2014, '2014', impaired2016, '2016')
impaired_change(impaired2016, '2016', impaired2018, '2018')
impaired_change(impaired2018, '2018', impaired2020, '2020')