In [5]:
#First we upload our packages
import arcpy
from arcpy import env  
from arcpy.sa import *
import pandas as pd
import geopandas as gpd
import numpy as np
import random
import psycopg2
from shapely.geometry import Point, Polygon

In [37]:
#Next is to read in the CSV of our September 2023 data as dataframe
csv_file_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\temp_data.csv"
temp_data = pd.read_csv(csv_file_path)

In [38]:
# Then we must convert numeric columns to appropriate data types
numeric_cols = ['high_F', 'low_F', 'precip', 'snow_inch', 'snowd_inch']
temp_data[numeric_cols] = temp_data[numeric_cols].apply(pd.to_numeric, errors='coerce')

In [39]:
#This data also needs coordinate info for each point
#We will use a Mesonet URL to gather reporting station location
url = "https://mesonet.agron.iastate.edu/sites/networks.php?network=MN_COOP&format=csv&nohtml=on"

# Read the CSV data from the URL into a data frame
loc_df = pd.read_csv(url)

In [40]:
# Merge the "lat" and "lon" columns from df based on the matching "nwsli" and "stid" columns
temp_data = pd.merge(temp_data, loc_df[['stid', 'lat', 'lon']], how='left', left_on='nwsli', right_on='stid')

# Drop the redundant "stid" column
temp_data.drop(columns=['stid'], inplace=True)

# Display the updated precip_data data frame
print(temp_data)

      nwsli        date      time  ...  snowd_inch       lat       lon
0     MOOM5  2023-09-01       NaN  ...         NaN  46.45000 -92.75780
1     TWRM5  2023-09-01       NaN  ...         NaN  46.96670 -95.66670
2     HKHM5  2023-09-01   7:00 AM  ...         NaN  43.76372 -91.34814
3     BABM5  2023-09-01       NaN  ...         NaN  47.71210 -91.95330
4     NHPM5  2023-09-01  11:00 PM  ...         0.0  45.01000 -93.37920
...     ...         ...       ...  ...         ...       ...       ...
5695  NHPM5  2023-09-30  11:00 PM  ...         0.0  45.01000 -93.37920
5696  PKGM5  2023-09-30   8:00 AM  ...         0.0  47.25000 -93.59000
5697  PELM5  2023-09-30   8:00 AM  ...         0.0  46.58330 -96.08890
5698  LCHM5  2023-09-30   8:00 AM  ...         0.0  45.12790 -94.53480
5699  LMBM5  2023-09-30   7:00 AM  ...         0.0  44.24000 -95.32000

[5700 rows x 10 columns]


In [43]:
# Define Minnesota boundary box
minnesota_boundary = Polygon([( -97.5, 43.0), (-89.0, 43.0), (-89.0, 49.5), (-97.5, 49.5)])

# Check if the data falls within the Minnesota boundary
temp_data['Coordinates'] = list(zip(temp_data.lon, temp_data.lat))
temp_data['Coordinates'] = temp_data['Coordinates'].apply(Point)
gdf = gpd.GeoDataFrame(temp_data, geometry='Coordinates')

within_minnesota = gdf[gdf.geometry.within(minnesota_boundary)]
if len(within_minnesota) == 0:
    print("No data falls within Minnesota boundary.")
else:
    print("Data falls within Minnesota boundary.")

Data falls within Minnesota boundary.


In [8]:
# Convert the "Date" column to datetime type
temp_data['date'] = pd.to_datetime(temp_data['date'])

# Check the type of the "Date" column after conversion
date_column_type = temp_data['date'].dtype

print("Type of 'date' column after conversion:", date_column_type)

Type of 'date' column after conversion: datetime64[ns]


In [9]:
#Now we need to choose the date which we will interpolate for
input_date = input("Enter a date (YYYY-MM-DD format): ")

# Convert input string to datetime object
input_date = pd.to_datetime(input_date)

# Filter DataFrame based on the input date
temp_data = temp_data[temp_data['date'] == input_date]

# Display the filtered DataFrame
print(temp_data)

Enter a date (YYYY-MM-DD format): 2023-09-16
      nwsli       date      time  ...  snowd_inch      lat      lon
2850  NISM5 2023-09-16       NaN  ...         NaN  46.5000 -94.2667
2851  CRLM5 2023-09-16   8:00 AM  ...         0.0  46.6700 -94.1100
2852  BWNM5 2023-09-16   8:00 AM  ...         0.0  44.7335 -94.3417
2853  WRUM5 2023-09-16  10:00 PM  ...         NaN  48.9008 -95.4006
2854  RUDM5 2023-09-16   7:00 AM  ...         0.0  43.8052 -91.7501
...     ...        ...       ...  ...         ...      ...      ...
3035  LSAM5 2023-09-16       NaN  ...         NaN  44.9783 -93.2469
3036  ALXM5 2023-09-16   6:00 AM  ...         NaN  45.8782 -95.3827
3037  BLHM5 2023-09-16       NaN  ...         NaN  45.8608 -94.3600
3038  KIMM5 2023-09-16   6:00 AM  ...         0.0  45.3533 -94.3056
3039  MPXM5 2023-09-16  11:00 PM  ...         0.0  44.8496 -93.5644

[190 rows x 10 columns]


In [10]:
# This cell will calculate average temperature for each row
temp_data['avg_temp'] = (temp_data['high_F'] + temp_data['low_F']) / 2

Unnamed: 0,nwsli,date,time,high_F,low_F,precip,snow_inch,snowd_inch,lat,lon,avg_temp
2850,NISM5,2023-09-16,,,,,,,46.5000,-94.2667,
2851,CRLM5,2023-09-16,8:00 AM,71.0,50.0,0.00,0.0,0.0,46.6700,-94.1100,60.5
2852,BWNM5,2023-09-16,8:00 AM,75.0,51.0,0.00,0.0,0.0,44.7335,-94.3417,63.0
2853,WRUM5,2023-09-16,10:00 PM,,,,,,48.9008,-95.4006,
2854,RUDM5,2023-09-16,7:00 AM,67.0,53.0,0.05,0.0,0.0,43.8052,-91.7501,60.0
...,...,...,...,...,...,...,...,...,...,...,...
3035,LSAM5,2023-09-16,,68.0,54.0,0.08,0.0,,44.9783,-93.2469,61.0
3036,ALXM5,2023-09-16,6:00 AM,,,0.00,0.0,,45.8782,-95.3827,
3037,BLHM5,2023-09-16,,,,,,,45.8608,-94.3600,
3038,KIMM5,2023-09-16,6:00 AM,76.0,50.0,0.00,0.0,0.0,45.3533,-94.3056,63.0


In [11]:
# Then these next cells will calculate evapotranspiration
# First it defines the constant for the Hargreaves method
constant = 0.0023

# Convert 'date' column to datetime object
temp_data['date'] = pd.to_datetime(temp_data['date'])

In [12]:
# Then this cell calculates ET using the Hargreaves method for each row
ET0_values = []
for index, row in temp_data.iterrows():
    avg_temperature = row['avg_temp']
    temperature_range = row['high_F'] - row['low_F']
    
    # Calculate the day of the year
    day_of_year = row['date'].dayofyear
    
    # Calculate ET0 using the Hargreaves method
    ET0 = constant * (avg_temperature + 17.8) * (temperature_range ** 0.5) * (1.0 + 0.033 * np.sin(np.deg2rad(360 * (day_of_year - 81) / 365)))
    
    ET0_values.append(ET0)

# Add the calculated ET0 values as a new column to the DataFrame
temp_data['ET'] = ET0_values

# Display the updated DataFrame
print(temp_data)


      nwsli       date      time  high_F  ...      lat      lon  avg_temp        ET
2850  NISM5 2023-09-16       NaN     NaN  ...  46.5000 -94.2667       NaN       NaN
2851  CRLM5 2023-09-16   8:00 AM    71.0  ...  46.6700 -94.1100      60.5  0.827384
2852  BWNM5 2023-09-16   8:00 AM    75.0  ...  44.7335 -94.3417      63.0  0.912751
2853  WRUM5 2023-09-16  10:00 PM     NaN  ...  48.9008 -95.4006       NaN       NaN
2854  RUDM5 2023-09-16   7:00 AM    67.0  ...  43.8052 -91.7501      60.0  0.671242
...     ...        ...       ...     ...  ...      ...      ...       ...       ...
3035  LSAM5 2023-09-16       NaN    68.0  ...  44.9783 -93.2469      61.0  0.679870
3036  ALXM5 2023-09-16   6:00 AM     NaN  ...  45.8782 -95.3827       NaN       NaN
3037  BLHM5 2023-09-16       NaN     NaN  ...  45.8608 -94.3600       NaN       NaN
3038  KIMM5 2023-09-16   6:00 AM    76.0  ...  45.3533 -94.3056      63.0  0.950022
3039  MPXM5 2023-09-16  11:00 PM    70.0  ...  44.8496 -93.5644      58.5  0

In [14]:
#We will determine how many null values lie in our data
# This line will calculate the total number of rows
total_rows = temp_data.shape[0]

# Calculate the number of null values in avg_temp and ET columns
null_avg_temp = temp_data['avg_temp'].isnull().sum()
null_ET = temp_data['ET'].isnull().sum()

# Calculate the percentage of null values
null_avg_temp_percentage = (null_avg_temp / total_rows) * 100
null_ET_percentage = (null_ET / total_rows) * 100

# Define the threshold for warning
threshold = 50

# Finally, this line will let us know if the percentage of null values exceeds the threshold for either column
if null_avg_temp_percentage > threshold or null_ET_percentage > threshold:
    print("Warning: The percentage of null values in either avg_temp or ET column exceeds 50%.")


In [15]:
# Then we need to create a feature class for our points
output_fc = 'Points'

# Create a new feature class
arcpy.management.CreateFeatureclass(
    arcpy.env.workspace,
    output_fc,
    'POINT',
    spatial_reference=arcpy.SpatialReference(4326)  # WGS84 Geographic Coordinate System
)

# Check the data type of the 'ET' column in the DataFrame
et_dtype = temp_data['ET'].dtype

# Add field to store ET data, ensuring correct data type
if et_dtype == 'float64':
    arcpy.management.AddField(output_fc, 'ET', 'FLOAT')
else:
    arcpy.management.AddField(output_fc, 'ET', 'DOUBLE')

# Add 'date' and 'nwsli' fields
arcpy.management.AddField(output_fc, 'date', 'DATE')
arcpy.management.AddField(output_fc, 'nwsli', 'TEXT')

# Add 'GDD' field
arcpy.management.AddField(output_fc, 'GDD', 'FLOAT')

# Open an insert cursor
with arcpy.da.InsertCursor(output_fc, ['SHAPE@XY', 'ET', 'date', 'nwsli', 'GDD']) as cursor:
    # Iterate over each row in the DataFrame
    for index, row in temp_data.iterrows():
        # Extract lat, lon, ET, GDD, date, and nwsli values
        lat = row['lat']
        lon = row['lon']
        date = row['date']
        ET = row['ET']
        GDD = row['GDD']  # Fetch GDD value
        nwsli = row['nwsli']
        
        # Create a point geometry
        point = arcpy.Point(lon, lat)
        point_geometry = arcpy.PointGeometry(point)
        
        # Insert the point feature with the ET, GDD, date, and nwsli values
        cursor.insertRow([point_geometry, ET, date, nwsli, GDD])

print(f"Feature class '{output_fc}' created successfully.")


Feature class 'Points' created successfully.


In [17]:
#This performs the IDW interpolation
outIDW = Idw("Points.shp", "ET")
output_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\IDW_ETPoints.tif"
outIDW.save(output_path)

In [6]:
#This performs the ordinary kriging interpolation
outKriging = Kriging("Points.shp", "ET", KrigingModelOrdinary("SPHERICAL", 0.021507), 0.0215068000000001)
output_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\Kriging_ETPoints.tif"
outKriging.save(output_path)

In [7]:
#This performs the universal kriging interpolation
outKriging = Kriging("Points.shp", "ET", KrigingModelUniversal("SPHERICAL", 0.021507), 0.0215068000000001)
output_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\Univ_Kriging_ETPoints.tif"
outKriging.save(output_path)

In [10]:
#This performs the resampling of the tifs to reduce the number of points needed
arcpy.management.Resample(
    in_raster=r"Idw_ETPoints.tif",
    out_raster=r"Idw_ET_Resample",
    cell_size="0.2 0.2",
    resampling_type="NEAREST"
)

arcpy.management.Resample(
    in_raster=r"Kriging_ETPoints.tif",
    out_raster=r"Kriging_ET_Resample",
    cell_size="0.2 0.2",
    resampling_type="NEAREST"
)

arcpy.management.Resample(
    in_raster=r"Univ_Kriging_ETPoints.tif",
    out_raster=r"Univ_Kriging_ET_Resample",
    cell_size="0.2 0.2",
    resampling_type="NEAREST"
)

In [11]:
#This converts the raster to points
arcpy.conversion.RasterToPoint(
    in_raster=r"Idw_ET_Resample",
    out_point_features=r"Idw_ET_Point",
    raster_field="value"
)

arcpy.conversion.RasterToPoint(
    in_raster=r"Kriging_ET_Resample",
    out_point_features=r"Kriging_ET_Point",
    raster_field="value"
)

arcpy.conversion.RasterToPoint(
    in_raster=r"Univ_Kriging_ET_Resample",
    out_point_features=r"Univ_Kriging_ET_Point",
    raster_field="value"
)

In [8]:
# QC
# exploratory Interpolation
arcpy.ga.ExploratoryInterpolation(
    in_features="Points",
    value_field="ET",
    out_cv_table=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\ExploratoryInterpolation1",
    out_geostat_layer=None,
    interp_methods="ORDINARY_KRIGING;UNIVERSAL_KRIGING;IDW",
    comparison_method="SINGLE",
    criterion="ACCURACY",
    criteria_hierarchy="ACCURACY PERCENT #",
    weighted_criteria="ACCURACY 1",
    exclusion_criteria=None
)



In [12]:
#This will export the Universal Kriging points to a PostGIS database
arcpy.conversion.ExportFeatures(
    in_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\Univ_Kriging_ET_Point",
    out_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\PostgreSQL-34-final_project(postgres).sde\final_project.postgres.et_data"
)
print("Export complete")

Export complete
