In [6]:
import arcpy
import pandas as pd
import psycopg2
import requests
from shapely.wkb import loads as wkb_loads
import requests
import csv
from arcpy import env
import os
import numpy as np
import io
import json
from io import StringIO
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import zipfile
from osgeo import gdal
import geopandas
import pyproj
import random
from sqlalchemy import create_engine

Note: Portions of this notebook were developed using AI generated code.

### Interpolation

In [66]:
# This connects to the previous lab's database
db_params = {
    'database': 'lab1.2',  
    'user': '(USER)',  
    'password': '(PASSWORD), 
    'host': '(HOST)',  
    'port': '5432' 
}
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()

# We then query the desired temperature data (including converting the geometry column to WKT)
sql_query = "SELECT station_id, air_temper, wkt_geom FROM minn_temp_wkt"
cursor.execute(sql_query)
data = cursor.fetchall()

# Then we convert the fetched data to a DataFrame
df = pd.DataFrame(data, columns=['station_id', 'air_temper', 'wkt_geom'])

conn.close()


In [None]:
# We can also go ahead and define the connection properties for our output PostGIS database
connection_properties = {
    'database': 'lab3',
    'user': '(USER)',  
    'password': '(PASSWORD), 
    'host': '(HOST)',
    'port': '5432'
}

# Create a new database connection file
db_connection_file = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde"
arcpy.CreateDatabaseConnection_management(out_folder_path=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2",
                                          out_name="database_connection",
                                          database_platform="POSTGRESQL",
                                          instance=connection_properties['host'],
                                          account_authentication="DATABASE_AUTH",
                                          username=connection_properties['user'],
                                          password=connection_properties['password'],
                                          save_user_pass="SAVE_USERNAME",
                                          database=connection_properties['database'],
                                          version_type="TRANSACTIONAL",
                                          version="dbo.DEFAULT"
                                          )


In [67]:
# We can establish a  workspace for feature class output
arcpy.env.workspace = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb"


In [68]:
# We then assign a variable to the output feature class
output_fc = "Temperature_Data"

# We also assign the spatial reference
spatial_reference = arcpy.SpatialReference(4326)

# Next is to create the output feature class that will show off our temperature data
arcpy.management.CreateFeatureclass(arcpy.env.workspace, output_fc, "POINT", spatial_reference=spatial_reference)

# Then we add fields to the feature class for station ID and air temperature records
arcpy.management.AddField(output_fc, "station_id", "TEXT")
arcpy.management.AddField(output_fc, "air_temper", "FLOAT")

# Then we iterate over each row in the DataFrame to grab our needed info
with arcpy.da.InsertCursor(output_fc, ["SHAPE@", "station_id", "air_temper"]) as cursor:
    for index, row in df.iterrows():
        station_id = row['station_id']
        air_temper = row['air_temper']
        wkt_geom = row['wkt_geom']
        
        # Then use that info to construct points
        point = arcpy.Point()
        point.X, point.Y = map(float, wkt_geom[7:-1].split())
        geom = arcpy.PointGeometry(point, spatial_reference)
        
        # Then insert that point info into the feature class
        cursor.insertRow((geom, station_id, air_temper))


In [69]:
# In order to test our interpolations later, we will need to create a subset of this data and remove it from the feature layer
input_feature_layer = "Temperature_Data"

# We will save it as a separate feature layer
output_feature_layer = "Random_Selected_Features"
output_feature_class = "Random_Selected_Features_Class"

# We first need to get the total count of features in the input feature layer
total_features_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))

# The I will generate a list of 16 random indices
random_indices = random.sample(range(1, total_features_count + 1), 16)

# I will use a SQL expression to select the randomly chosen features
sql_expression = "OBJECTID IN ({})".format(','.join(map(str, random_indices)))

# Then I will create a new feature layer with the randomly selected features
arcpy.MakeFeatureLayer_management(input_feature_layer, output_feature_layer, sql_expression)

print("Randomly selected 16 features and created a new feature layer:", output_feature_layer)

# The I will save the selected features to a new feature class
arcpy.CopyFeatures_management(output_feature_layer, output_feature_class)
print("Saved the selected features to a new feature class:", output_feature_class)


Randomly selected 16 features and created a new feature layer: Random_Selected_Features
Saved the selected features to a new feature class: Random_Selected_Features_Class


In [5]:
# The next step is perform our interpolations
# We will start with inverse distance weighting
with arcpy.EnvManager(scratchWorkspace=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb"):
    Idw_Temper = arcpy.sa.Idw(
        in_point_features="Temperature_Data",
        z_field="air_temper",
        cell_size=0.0218500799999999,
        power=2,
        search_radius="VARIABLE 12",
        in_barrier_polyline_features=None
    )
    Idw_Temper.save(r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Idw_Temper")

In [6]:
# Then kriging
with arcpy.EnvManager(scratchWorkspace=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb"):
    Kriging_Temper = arcpy.sa.Kriging(
        in_point_features="Temperature_Data",
        z_field="air_temper",
        kriging_model="Spherical 0.021850 # # #",
        cell_size=0.0218500799999999,
        search_radius="VARIABLE 12",
        out_variance_prediction_raster=None
    )
    Kriging_Temper.save(r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper")

In [7]:
# And finally splining
with arcpy.EnvManager(scratchWorkspace=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb"):
    Spline_Temper = arcpy.sa.Spline(
        in_point_features="Temperature_Data",
        z_field="air_temper",
        cell_size=0.0218500799999999,
        spline_type="REGULARIZED",
        weight=0.1,
        number_points=12
    )
    Spline_Temper.save(r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Spline_Temper")

In [1]:
#The interpolations need to be saved in our PostGIS database. As they are rasters, I will first resample them to cut down on file size

arcpy.management.Resample(
    in_raster="Idw_Temper",
    out_raster=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Idw_Temper_Resample",
    cell_size="0.1 0.1",
    resampling_type="NEAREST"
)
arcpy.management.Resample(
    in_raster="Kriging_Temper",
    out_raster=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper_Resample",
    cell_size="0.1 0.1",
    resampling_type="NEAREST"
)
arcpy.management.Resample(
    in_raster="Spline_Temper",
    out_raster=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Spline_Temper_Resample",
    cell_size="0.1 0.1",
    resampling_type="NEAREST"
)
print("Resampling complete")

Resampling complete


In [2]:
# Then I will convert them into a point cloud to make it easier for them to be inserted into PostGIS
arcpy.conversion.RasterToPoint(
    in_raster="Spline_Temper_Resample",
    out_point_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Spline_Temper_Point",
    raster_field="Value"
)
arcpy.conversion.RasterToPoint(
    in_raster="Kriging_Temper_Resample",
    out_point_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper_Point",
    raster_field="Value"
)
arcpy.conversion.RasterToPoint(
    in_raster="Idw_Temper_Resample",
    out_point_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Idw_Temper_Point",
    raster_field="Value"
)
print("Raster to Point operations complete for all three")

Raster to Point operations complete for all three


In [None]:
# Next step is manually save our interpolated point maps into our PostGIS Database (which I have linked to in advance)
# This exports kriging
arcpy.conversion.ExportFeatures(
    in_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper_Point",
    out_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde\lab3.postgres.kriging_temp_point",
    where_clause="",
    use_field_alias_as_name="NOT_USE_ALIAS",
    field_mapping=r'pointid "pointid" true true false 4 Long 0 0,First,#,C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper_Point,pointid,-1,-1;grid_code "grid_code" true true false 4 Float 0 0,First,#,C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Kriging_Temper_Point,grid_code,-1,-1',
    sort_field=None
)
print("Export complete")

In [None]:
# Then splining
arcpy.conversion.ExportFeatures(
    in_features="Spline_Temper_Point",
    out_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde\lab3.postgres.Spline_Temp_Point",
    where_clause="",
    use_field_alias_as_name="NOT_USE_ALIAS",
    field_mapping='pointid "pointid" true true false 4 Long 0 0,First,#,Spline_Temper_Point,pointid,-1,-1;grid_code "grid_code" true true false 4 Float 0 0,First,#,Spline_Temper_Point,grid_code,-1,-1',
    sort_field=None
)
print("Export complete")

In [3]:
# And then IDW
arcpy.conversion.ExportFeatures(
    in_features="Idw_Temper_Point",
    out_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde\lab3.postgres.Idw_Temper_Point",
    where_clause="",
    use_field_alias_as_name="NOT_USE_ALIAS",
    field_mapping='pointid "pointid" true true false 4 Long 0 0,First,#,Idw_Temper_Point,pointid,-1,-1;grid_code "grid_code" true true false 4 Float 0 0,First,#,Idw_Temper_Point,grid_code,-1,-1',
    sort_field=None
)
print("Export complete")

Export complete


### Accuracy

In [7]:
# Next part of the assignment is to evaluate the accuracy of our interpolations
# To that, we will take the randomly selected points we grabbed from before and use their positions to sample our interpolated data
# First with kriging
arcpy.sa.Sample(
    in_rasters="Kriging_Temper",
    in_location_data="Random_Selected_Features",
    out_table=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Sample_Kriging_Temp",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [8]:
# Then with splining
arcpy.sa.Sample(
    in_rasters="Spline_Temper",
    in_location_data="Random_Selected_Features",
    out_table=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\Sample_Spline_Temp",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [9]:
# And then with IDW
arcpy.sa.Sample(
    in_rasters="Idw_Temper",
    in_location_data="Random_Selected_Features",
    out_table=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Lab 3\GIS 5572 Lab 3.gdb\Sample_Idw_Temp",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [10]:
# Next we will create a dataframe based on our randoly selected features from earlier
input_feature_class = "Random_Selected_Features"

# We will first convert feature classes to a NumPy array
fields = ["OID@", "station_id", "air_temper", "SHAPE@X", "SHAPE@Y"]
array = arcpy.da.FeatureClassToNumPyArray(input_feature_class, fields)

# Then convert the NumPy array to a dataframe
df_original = pd.DataFrame(array)

# Rename the OID@ field to ObjectID
df_original.rename(columns={"OID@": "OBJECTID"}, inplace=True)

# Then we will print the DataFrame to check it
print(df_original)


    OBJECTID station_id  air_temper   SHAPE@X   SHAPE@Y
0          2      MN002   23.502501 -94.11910  44.90680
1         26      MN027   20.497499 -93.37344  48.60179
2         30      MN031   26.594999 -94.99542  44.54471
3         51      MN052   20.895000 -93.48518  47.84037
4         62      MN063   25.750000 -95.05439  46.41334
5         63      MN064   26.477501 -94.23284  45.76654
6         67      MN068   20.827499 -96.00102  47.76911
7         69      MN070   25.072500 -92.48230  43.90234
8         72      MN073   27.162500 -94.03397  44.15434
9         73      MN074   25.730000 -93.96315  44.36554
10       101      MN105   23.332500 -93.70241  47.29679
11       111      MN119   21.407499 -95.66613  47.53178
12       117      MN127   26.475000 -93.43246  46.11800
13       136      MN146   32.177502 -91.30423  43.85790
14       147      MN157   25.035000 -94.84316  44.76047
15       160      MN170   27.457500 -94.29614  46.52462


In [11]:
# Now I will add the sampled interpolated data to the same dataframe
# That will be done by first making individual dataframes for each method
table_paths = [
    "Sample_Idw_Temp",
    "Sample_Kriging_Temp",
    "Sample_Spline_Temp"
]

dfs = []
for table_path in table_paths:
    array = arcpy.da.TableToNumPyArray(table_path, "*")
    df = pd.DataFrame(array)
    dfs.append(df)

# To check, let's print the dataframes
for i, df in enumerate(dfs):
    print(f"DataFrame {i+1} ({table_paths[i]}):\n{df}\n")


DataFrame 1 (Sample_Idw_Temp):
    OBJECTID  Random_Selected_Features         X         Y  Idw_Temper_Band_1
0          1                         2 -94.11910  44.90680          23.502819
1          2                        26 -93.37344  48.60179          20.497238
2          3                        30 -94.99542  44.54471          26.590687
3          4                        51 -93.48518  47.84037          20.896429
4          5                        62 -95.05439  46.41334          25.750624
5          6                        63 -94.23284  45.76654          26.477339
6          7                        67 -96.00102  47.76911          20.827522
7          8                        69 -92.48230  43.90234          25.072533
8          9                        72 -94.03397  44.15434          27.152906
9         10                        73 -93.96315  44.36554          25.728815
10        11                       101 -93.70241  47.29679          23.332548
11        12                     

In [None]:
# We will the join our dataframes based on Object ID
dfs_to_join = [
    df_original,  # Temperature_Data DataFrame
    dfs[0],  # Sample_Idw_Temp DataFrame
    dfs[1],  # Sample_Kriging_Temp DataFrame
    dfs[2]   # Sample_Spline_Temp DataFrame
]

# Here we will define the suffixes for the joined columns
suffixes = ['_original', '_idw', '_kriging', '_spline']

# Then we perform the join for each dataframe
for i, df_to_join in enumerate(dfs_to_join[1:], start=1):
    df_original = df_original.merge(df_to_join, how='left', left_on='OBJECTID', right_on='OBJECTID', suffixes=('', suffixes[i]))

# Let's check
print(df_original)


In [None]:
# We will then do some arthimetic between columns to determine the difference between each
df_original['Idw_Temper_Band_1_difference'] = df_original['air_temper'] - df_original['Idw_Temper_Band_1']
df_original['Spline_Temper_Band_1_difference'] = df_original['air_temper'] - df_original['Spline_Temper_Band_1']
df_original['Kriging_Temper_Band_1_difference'] = df_original['air_temper'] - df_original['Kriging_Temper_Band_1']

# Print the DataFrame with the new 
df_original


In [86]:
# We will then calculate the mean difference for each method
idw_mean = df_original['Idw_Temper_Band_1_difference'].mean()
spline_mean = df_original['Spline_Temper_Band_1_difference'].mean()
kriging_mean = df_original['Kriging_Temper_Band_1_difference'].mean()

# Then use that to calculate RMSE for each method
idw_rmse = np.sqrt(np.mean(df_original['Idw_Temper_Band_1_difference']**2))
spline_rmse = np.sqrt(np.mean(df_original['Spline_Temper_Band_1_difference']**2))
kriging_rmse = np.sqrt(np.mean(df_original['Kriging_Temper_Band_1_difference']**2))

# Then we create a dictionary to hold the results
accuracy_results = {
    'Method': ['IDW', 'Spline', 'Kriging'],
    'Mean': [idw_mean, spline_mean, kriging_mean],
    'RMSE': [idw_rmse, spline_rmse, kriging_rmse]
}

# Then we can create DataFrame from the dictionary
accuracy_df = pd.DataFrame(accuracy_results)
print(accuracy_df)


    Method      Mean      RMSE
0      IDW  3.005262  3.005262
1   Spline  3.005135  3.005135
2  Kriging  3.274860  3.274860


In [89]:
# We now need to save the table
# First we will convert it into a CSV
output_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\accuracy_temperature.csv"
accuracy_df.to_csv(output_path, index=False)

print("Accuracy DataFrame saved to:", output_path)

Accuracy DataFrame saved to: C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\Lab 3 v2.gdb\accuracy_temperature.csv


In [None]:
# Then we will use the database connection to save it.
sde_connection = r'C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde'
output_table = "accuracy_temperature"
csv_file = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\accuracy_temperature.csv"
arcpy.TableToTable_conversion(csv_file, sde_connection, output_table)

print(f"Table '{output_table}' saved to SDE connection '{sde_connection}'.")

In [98]:
# Now we will convert the sampling differences into a point feature
# Let us start with IDW
data_array = df_original[['SHAPE@X', 'SHAPE@Y', 'Idw_Temper_Band_1_difference']].to_numpy()
spatial_reference = arcpy.SpatialReference(4326)  # WGS 1984
feature_class_name = "Idw_Difference_Point"
arcpy.management.CreateFeatureclass(arcpy.env.workspace, feature_class_name, "POINT", spatial_reference=spatial_reference)
arcpy.management.AddField(feature_class_name, "Idw_Temper_Band_1_difference", "DOUBLE")
with arcpy.da.InsertCursor(feature_class_name, ['SHAPE@X', 'SHAPE@Y', 'Idw_Temper_Band_1_difference']) as cursor:
    for row in data_array:
        cursor.insertRow(row)

print("Point feature class created successfully.")

Point feature class created successfully.


In [99]:
# Then Kriging
data_array = df_original[['SHAPE@X', 'SHAPE@Y', 'Kriging_Temper_Band_1_difference']].to_numpy()
spatial_reference = arcpy.SpatialReference(4326)
feature_class_name = "Kriging_Difference_Point"
arcpy.management.CreateFeatureclass(arcpy.env.workspace, feature_class_name, "POINT", spatial_reference=spatial_reference)
arcpy.management.AddField(feature_class_name, "Kriging_Temper_Band_1_difference", "DOUBLE")
with arcpy.da.InsertCursor(feature_class_name, ['SHAPE@X', 'SHAPE@Y', 'Kriging_Temper_Band_1_difference']) as cursor:
    for row in data_array:
        cursor.insertRow(row)

print("Point feature class created successfully.")

Point feature class created successfully.


In [100]:
# And then splining
data_array = df_original[['SHAPE@X', 'SHAPE@Y', 'Spline_Temper_Band_1_difference']].to_numpy()
spatial_reference = arcpy.SpatialReference(4326)  # WGS 1984
feature_class_name = "Spline_Difference_Point"
arcpy.management.CreateFeatureclass(arcpy.env.workspace, feature_class_name, "POINT", spatial_reference=spatial_reference)
arcpy.management.AddField(feature_class_name, "Spline_Temper_Band_1_difference", "DOUBLE")
with arcpy.da.InsertCursor(feature_class_name, ['SHAPE@X', 'SHAPE@Y', 'Spline_Temper_Band_1_difference']) as cursor:
    for row in data_array:
        cursor.insertRow(row)

print("Point feature class created successfully.")

Point feature class created successfully.


In [2]:
#We will save IDW difference point layer to our PostGIS database
arcpy.conversion.ExportFeatures('Idw_Difference_Point',r'C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\Lab 3 v2\PostgreSQL-34-lab3(postgres).sde\lab3.postgres.Idw_Difference_Point')
