# Create NLDI Site JSON
Date Updated: 08/22/2022

Steps to be taken...
- read in input files (created with SQL queries)
- clean & processes the Data...
- match the NLDI schema
- convert to geojson
- export outputs

In [None]:
# Needed Libararies

import os
import numpy as np
import pandas as pd
import geopandas as gpd
import geoplot as gplt

pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
# Setting work directory

cwd = os.getcwd()
Output = cwd
print("current directory = "+Output)

## Input Data
- organizations
- sites

In [None]:
# organization input data and dataframe creation

fileInput = "rawdata/organization_raw.csv"
dforg = pd.read_csv(fileInput).replace(np.nan, "")
print(len(dforg))
dforg.head(1)

In [None]:
# Sites input data and dataframe creation

fileInput = "rawdata/sites_raw.zip"
df = pd.read_csv(fileInput).replace(np.nan, "")
print(len(df))
df.head(1)

## Clean & Processes the Data

In [None]:
# ensure lat and long values are numeric.  If not / if NaN, drop data.

# Temp fix
# Ensure long is < 0 and > -125 (few ID sites out of bounds)
# Ensure lat is > 10 and < 50 (few MT sites out of bounds).

df['Latitude'] = pd.to_numeric(df['Latitude'], errors='coerce')
df['Longitude'] = pd.to_numeric(df['Longitude'], errors='coerce')

df = df[(df['Latitude'].astype(int) > 10) & (df['Latitude'].astype(int) < 50)]
df = df[(df['Longitude'].astype(int) < 0) & (df['Longitude'].astype(int) > -125)]

print(len(df))
df.head(1)

In [None]:
# add State field

def createState(Val):
    Val = Val.strip()
    outString = Val[:2]
    return outString

df['State'] = df.apply(lambda row: createState(row['SiteUUID']), axis=1)
df['State'].unique()

In [None]:
# Left-Join Organization info

df = df.merge(dforg, on='State', how='left')
print(len(df))
df.head(1)

In [None]:
# add custom NLDI fields

df['GeoconnexAddress'] = "https://geoconnex.us/wade/sites/" + df['SiteUUID'].astype(str)
df['WaDELandingPageLink'] = "https://westdaat.westernstateswater.org/details/site/" + df['SiteUUID'].astype(str)
print(len(df))
df.head()

<!-- ## Convert to geojson -->

## Match NLDI Schema
- match this sample ![image.png](attachment:image.png)

In [None]:
# create new dataframe.  match schmea from NLDI.
df2 = pd.DataFrame(columns=['feature_id', 'feature_name', 'feature_uri'], index=df.index)

df2['feature_id'] = df['SiteUUID']
df2['feature_name'] = "State: " + df['State'].astype(str) + " Source: " + df['OrganizationUUID'].astype(str) + " ID: " + df['SiteUUID'].astype(str)
df2['feature_uri'] = df['GeoconnexAddress']

print(len(df2))
df2.head()

In [None]:
# convert dataframe to geodataframe

gdf2 = gpd.GeoDataFrame(df2, geometry=gpd.points_from_xy(df.Longitude, df.Latitude), crs="EPSG:4326")
print(len(gdf2))
gdf2.head()

## Check & Examine the resuling Data
- plot it out, look for oddities.

In [None]:
gplt.pointplot(gdf2)

## Exporting Files
- be sure to save the results on this google drive: https://drive.google.com/drive/folders/1JKDaSUcfao4BgvCFFT5B_OY6aGbWTI9e

In [None]:
# export geodataframe as geojson
# compress to zip file by hand
df2.to_csv('outputs/wadeCSVFIle.csv', index=False)
gdf2.to_file("outputs/wadeGeoJSON.geojson", driver='GeoJSON')