In [33]:
# Import necessary libraries and set a variable for the directory
import arcgis
import requests
import zipfile
import io
import pandas as pd
import arcpy
import numpy
import json
import os

directory = 'C:\\Users\\15612\Documents\\Arc II\\Lab 2'

In [34]:
# Establish variables for the ArcGIS Pro project and map
project = arcpy.mp.ArcGISProject("CURRENT")
m = project.listMaps("Map")[0]
spatial_ref = arcpy.SpatialReference(4326)

In [35]:
# Define a function to unzip data to
def unzip(zip_file_path, directory):
    
    # Use zipfile library to extract data into a folder in the directory
    with zipfile.ZipFile(zip_file_path,'r') as zip_file:
        zip_file.extractall(directory)

In [36]:
# Define a function to reproject data (if needed)
def reproject(in_data,out_data):

    # Use the 'Project' tool to reproject incoming data
    arcpy.management.Project(
        in_dataset = in_data,
        out_dataset = out_data,
        out_coor_system = spatial_ref
    )
    
    # Delete old data with incorrect coordinate system
    arcpy.management.Delete(
            in_data = in_data
        )

## Quality assurance of point data

The code below develops a function for determining if the point data pulled into the project is realistic and applicable for the project at hand.

This function:
- Determines if point data is within Minnesota boundaries
    - If not, a spatial join is performed to only output points which intersect within Minnesota boundaries

In [37]:
# Define a function for determining if points are outside of MN state boundaries
def point_data_geom_check(point_fc, state_fc):
    
    # Use 'GetCount' tool to find total number of points
    before = arcpy.management.GetCount(
        in_rows = point_fc
    ).getOutput(0)

    # Select features if they intersect with MN feature class
    arcpy.management.SelectLayerByLocation(
        in_layer = point_fc,
        select_features = state_fc
    )

    # Use 'GetCount' tool again to find the number of newly selected features
    after = arcpy.management.GetCount(
        in_rows = point_fc
    ).getOutput(0)
    
    # If the number of features intersecting with the MN feature class is different, create a new feature class with the selected points
    if before != after:
        
        print('Some points lack location data or are outside state boundaries. Performing spatial join...')
        
        arcpy.management.CopyFeatures(
            in_features = point_fc,
            out_feature_class = point_fc + '_sj'
        )
        
        arcpy.management.SelectLayerByAttribute(
            in_layer_or_view = point_fc,
            selection_type = 'CLEAR_SELECTION',
            where_clause = '*'
        )
        
        # Delete the old feature class with extraneous points
        arcpy.management.Delete(
            in_data = point_fc
        )
        
        print('Done')
        
    # If the number of selected points equals the total, remove the selection and continue
    else:
        
        arcpy.management.SelectLayerByAttribute(
            in_layer_or_view = point_fc,
            selection_type = 'CLEAR_SELECTION',
            where_clause = '*'
        )
        
        print('Point location data contains no null values and is within state boundary.')

In [38]:
# Establish variables for the dates
today = str(date.today())

day_num = int(today[8:10])
month_num = int(today[5:7])
year_num = int(today[0:4])
day = str(day_num)
month = str(month_num)
year = str(year_num)

In [39]:
# Acquire MN temperature data from Iowa State API in GeoJSON format
minnesota_temp = requests.get(r'https://mesonet.agron.iastate.edu/api/1/daily.geojson?network=MN_RWIS&date=' + today)
minn_json = minnesota_temp.json()

## Quality assurance of weather data

The code below develops a function for analyzing the geojson of the weather data taken from Iowa State and determining if the data is or is likely incorrect and unusable.

This function analyzes:
- If there is no temperature data
- Potentially inaccurate temperature data by month
- If there are any negative values within precipitation fields
- If the date under the date field is incorrect

All errors are then added to the .csv file to avoid data deletion, while also allowing the user to avoid problematic information.

In [40]:
# Establish variables for the empty error list and the feature set in the GeoJSON
error_list = []
fs = minn_json['features']

# Create for loop iterating through each feature in the geojson
for f in fs:
    properties = f['properties']
    properties_list = [properties['precip'],properties['snow'],properties['snowd']]
    # Set variables equal to maximum and minimum temperature
    max_temp = properties['max_tmpf']
    min_temp = properties['min_tmpf']
    
    # Make sure the date of the incoming data is today
    if properties['date'] == today:
            
        # Create if statement for any empty temperature data
        if max_temp == None or min_temp == None:
            properties['error'] = 'Error: No temperature data available at this location'

        # Use monthly temperature records and add and subtract 5 from the record high and low respectively
        elif month_num == 1:
            if max_temp > 74:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -62:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 2:
            if max_temp > 81:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -65:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 3:
            if max_temp > 93:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -55:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 4:
            if max_temp > 106:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -27:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 5:
            if max_temp > 117:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -1:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 6:
            if max_temp > 115:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < 10:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 7:
            if max_temp > 120:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < 19:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 8:
            if max_temp > 115:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < 16:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 9:
            if max_temp > 116:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < 5:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 10:
            if max_temp > 103:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -11:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 11:
            if max_temp > 89:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -52:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif month_num == 12:
            if max_temp > 79:
                properties['error'] = 'Error: Maximum temperature is likely incorrect'
            if min_temp < -62:
                properties['error'] = 'Error: Minimum temperature is likely incorrect'
            else:
                properties['error'] = 'No Errors Found'
        elif field in properties_list < 0:
            properties['error'] = 'Error: Negative values present in precipitation fields'
        else:
            properties['error'] = 'No Errors Found'
            
    # If the data's date is incorrect, print 'Incorrect date'
    else:
        properties[error] = 'Error: Incorrect date'
        
# Print geojson
print(minn_json)

{'type': 'FeatureCollection', 'crs': {'type': 'name', 'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}}, 'features': [{'type': 'Feature', 'properties': {'station': 'MN001', 'date': '2024-03-14', 'max_tmpf': 51.08, 'min_tmpf': 45.319977, 'precip': None, 'max_gust': 150.44968, 'snow': None, 'snowd': None, 'min_rh': 53.0, 'max_rh': 87.0, 'max_dwpf': 45.986023, 'min_dwpf': 32.360012, 'min_feel': 39.49833, 'avg_feel': None, 'max_feel': 51.08, 'max_drct': None, 'precip_est': False, 'tmpf_est': False, 'max_gust_localts': '2024-03-14T00:55:00Z', 'temp_hour': None, 'avg_sknt': None, 'vector_avg_drct': None, 'min_rstage': None, 'max_rstage': None, 'id': 'MN001', 'name': 'Twin Lakes I-35 Mile Post 1', 'error': 'No Errors Found'}, 'geometry': {'type': 'Point', 'coordinates': [-93.354057312, 43.5083312988]}}, {'type': 'Feature', 'properties': {'station': 'MN002', 'date': '2024-03-14', 'max_tmpf': 52.16001, 'min_tmpf': 39.38, 'precip': None, 'max_gust': 18.660908, 'snow': None, 'snowd': None,




In [41]:
# Create empty list for each necessary field
sta_id_list = []
date_list = []
max_temp_list = []
min_temp_list = []
precip_list = []
snow_list = []
snow_dep_list = []
sta_name_list = []
lon_list = []
lat_list = []
error_list = []

# Append correct data from the GeoJSON to each list
fs = minn_json['features'] 
for f in fs:
    sta_id_list.append(f['properties']['station'])
    date_list.append(f['properties']['date'])
    max_temp_list.append(f['properties']['max_tmpf'])
    min_temp_list.append(f['properties']['min_tmpf'])
    precip_list.append(f['properties']['precip'])
    snow_list.append(f['properties']['snow'])
    snow_dep_list.append(f['properties']['snowd'])
    sta_name_list.append(f['properties']['name'])
    lon_list.append(f['geometry']['coordinates'][0])
    lat_list.append(f['geometry']['coordinates'][1])
    error_list.append(f['properties']['error'])
    
# Create a dictionary for pandas to use to create a dataframe
gdf_dict = {
    "station_id":sta_id_list,
    "date":date_list,
    "max_daily_temp_F":max_temp_list,
    "min_daily_temp_F":min_temp_list,
    "precip_in":precip_list,
    "snow_in":snow_list,
    "snow_depth_in":snow_dep_list,
    "station_name":sta_name_list,
    "longitude":lon_list,
    "latitude":lat_list,
    "error":error_list
}

# Set a variable equal to the date without the dash marks for saving
today_no_dash = today.replace('-','')

# Establish a variable for where the temperature logs will be saved
temp_log_path = directory + '\\Daily Minn Temp Logs\\MN_temp_log_' + today_no_dash + '.csv'

# Create a .csv file using the pandas dataframe and add the .csv to the map
station_df = pd.DataFrame(gdf_dict)
station_df.to_csv(temp_log_path)
m.addDataFromPath(temp_log_path)

# Establish a variable for the .csv file and use the latitude and longitude values to create a point feature class
csv_file = 'MN_temp_log_' + today_no_dash + '.csv'

arcpy.management.XYTableToPoint(
    in_table = csv_file,
    out_feature_class = 'mn_stations_' + today_no_dash,
    x_field = 'longitude',
    y_field = 'latitude'
)

In [42]:
# Run the function to detect extraneous point data
point_data_geom_check('mn_stations_' + today_no_dash, 'Minnesota')

Some points lack location data or are outside state boundaries. Performing spatial join...
Done


## Quality assurance check of weather station data

- Some points were found outside of Minnesota state boundary. A spatial join was performed to only display point data of weather stations within Minnesota state boundaries.

In [43]:
# Unzip BMSB data
unzip(r'C:\Users\15612\Documents\Arc II\Lab 2\BMSB_EDDMapS\45663.zip',directory + '\\BMSB_EDDMapS\\BMSB')

## Quality assurance of brown marmorated stinkbug (BMSB) location data

Due to the relatively unorganized manner of the BMSB data, these records, unlike the weather station data, were cleaned before being added to the project. This included limiting the total number of fields to 8 before adding the point data using latitude and longitude coordinates. Once again, the point data was then checked to determine:
- Any points outside of Minnesota state boundaries
- Any points with no latitude and/or longitude values

These points were then deleted for more understandable and recognizable data.

In [44]:
# Establish a variable for the .csv file showing BMSB data
BMSB_table_excess = pd.read_csv(directory + "\\BMSB_EDDMapS\\BMSB\\mappings.csv", encoding='unicode_escape')

# Limit the fields to the necessary ones
BMSB_df = BMSB_table_excess[["objectid", "ObsDate", "Location", "reporter", "Latitude", "Longitude", "Phenology", "RecSrcTyp"]].copy()

# Create a new field names 'County' from the 'Location' field and remove anything other than the county information
BMSB_df['Location'] = BMSB_df['Location'].apply(lambda x: x.replace('"',''))
BMSB_df['County'] = BMSB_df['Location'].apply(lambda x: x.split(",")[0])
BMSB_df = BMSB_df.drop(['Location'],axis=1)

# Convert table to sedf
sedf = arcgis.GeoAccessor.from_xy(
    df = BMSB_df, 
    x_column = "Longitude",
    y_column = "Latitude"
)

# Convert sedf to feature class
sedf.spatial.to_featureclass(location=os.path.join(gdb_path, "BMSB_sightings"))

'C:\\Users\\15612\\Documents\\ArcGIS\\Projects\\ArcII Lab 2\\ArcII Lab 2.gdb\\BMSB_sightings'

In [45]:
# Run function to check for any extraneous data
point_data_geom_check('BMSB_sightings', 'Minnesota')

Some points lack location data or are outside state boundaries. Performing spatial join...
Done


## Quality assurance check of BMSB data

- Some points were found outside of Minnesota state boundary. A spatial join was performed to only display point data of weather stations within Minnesota state boundaries.

### Once this notebook has been run, move to the "Geodatabase Connection" notebook