In [21]:
# Importing necessary libraries 
import pandas as pd
import geopandas as gpd
import psycopg2
from shapely.geometry import Point, Polygon
import pandas as pd
from sqlalchemy import create_engine
import random
from math import sqrt

In [29]:
# QA\QC check
# Read the CSV file into a data frame
csv_file_path = r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\rounded_gdd_data.csv"
df = pd.read_csv(csv_file_path)

# Check for null values
null_values = df.isnull().sum()
if null_values.any():
    print("Null values found:")
    print(null_values)
else:
    print("No null values found.")


Null values found:
Station             0
Date                0
Max Temp (F)      277
Min Temp (F)      277
Latitude            0
Longitude           0
GDD               277
Cumulative_GDD    277
dtype: int64


In [30]:
# Define Minnesota boundary box
minnesota_boundary = Polygon([( -97.5, 43.0), (-89.0, 43.0), (-89.0, 49.5), (-97.5, 49.5)])

# Filter data falling within Minnesota boundary
df['Coordinates'] = list(zip(df.Longitude, df.Latitude))
df['Coordinates'] = df['Coordinates'].apply(Point)
gdf = gpd.GeoDataFrame(df, geometry='Coordinates')

within_minnesota = gdf[gdf.geometry.within(minnesota_boundary)]
if len(within_minnesota) == 0:
    print("No data falls within Minnesota boundary.")
else:
    print("Data falls within Minnesota boundary.")

Data falls within Minnesota boundary.


In [31]:
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

# Convert the datetime format to the desired format
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

# Print the data frame
print(df)

                                           Station  ...                       Coordinates
0                      Twin Lakes I-35 Mile Post 1  ...   POINT (-93.35405731 43.5083313)
1                     Silver Lake TH 7 Mile Post 1  ...          POINT (-94.1191 44.9068)
2                 Little Chicago I-35 Mile Post 70  ...  POINT (-93.29242706 44.47850037)
3                     Rush City I-35 Mile Post 157  ...  POINT (-92.99275208 45.64292145)
4                      Rutledge I-35 Mile Post 198  ...  POINT (-92.83856201 46.21257019)
...                                            ...  ...                               ...
4855  U.S.2 - Solway - MP 99.7         MN US MNDOT  ...   POINT (-95.16248322 47.5232811)
4856  T.H.1 - Warren - MP 11.5         MN US MNDOT  ...   POINT (-96.8911438 48.19535065)
4857  T.H.11 - Karlstad - MP 37.7      MN US MNDOT  ...  POINT (-96.38069916 48.64707947)
4858  T.H.371 - Nisswa - MP 46.2       MN US MNDOT  ...  POINT (-94.29614258 46.52462006)
4859  U.S.

In [32]:
#This converts and checks the format of the Date column
df['Date'] = pd.to_datetime(df['Date'])

# Check the type of the "Date" column after conversion
date_column_type = df['Date'].dtype

print("Type of 'Date' column after conversion:", date_column_type)

Type of 'Date' column after conversion: datetime64[ns]


In [34]:
# Ask user for input date
input_date = input("Enter a date (YYYY-MM-DD format): ")

# Convert input string to datetime object
input_date = pd.to_datetime(input_date)

# Filter DataFrame based on the input date
df = df[df['Date'] == input_date]

# Display the filtered DataFrame
print(df)

Enter a date (YYYY-MM-DD format): 2023-09-09
                                           Station  ...                       Coordinates
1296                   Twin Lakes I-35 Mile Post 1  ...   POINT (-93.35405731 43.5083313)
1297                  Silver Lake TH 7 Mile Post 1  ...          POINT (-94.1191 44.9068)
1298              Little Chicago I-35 Mile Post 70  ...  POINT (-93.29242706 44.47850037)
1299                  Rush City I-35 Mile Post 157  ...  POINT (-92.99275208 45.64292145)
1300                   Rutledge I-35 Mile Post 198  ...  POINT (-92.83856201 46.21257019)
...                                            ...  ...                               ...
1453  U.S.2 - Solway - MP 99.7         MN US MNDOT  ...   POINT (-95.16248322 47.5232811)
1454  T.H.1 - Warren - MP 11.5         MN US MNDOT  ...   POINT (-96.8911438 48.19535065)
1455  T.H.11 - Karlstad - MP 37.7      MN US MNDOT  ...  POINT (-96.38069916 48.64707947)
1456  T.H.371 - Nisswa - MP 46.2       MN US MNDOT  ...

In [35]:
#Ceate a GDD point fetaure class
# Start by defining the output feature class name
output_fc = 'GDDPoints'

# Create a new feature class
arcpy.management.CreateFeatureclass(
    arcpy.env.workspace,
    output_fc,
    'POINT',
    spatial_reference=arcpy.SpatialReference(4326)  # WGS84 Geographic Coordinate System
)

# Add fields to store the necessary data, ensuring correct data types
arcpy.management.AddField(output_fc, 'Station', 'TEXT')
arcpy.management.AddField(output_fc, 'Date', 'DATE')
arcpy.management.AddField(output_fc, 'MaxTemp', 'FLOAT')
arcpy.management.AddField(output_fc, 'MinTemp', 'FLOAT')
arcpy.management.AddField(output_fc, 'Latitude', 'DOUBLE')
arcpy.management.AddField(output_fc, 'Longitude', 'DOUBLE')
arcpy.management.AddField(output_fc, 'GDD', 'FLOAT')
arcpy.management.AddField(output_fc, 'Cumulative_GDD', 'FLOAT')

# Open an insert cursor
with arcpy.da.InsertCursor(output_fc, ['SHAPE@XY', 'Station', 'Date', 'MaxTemp', 'MinTemp', 'Latitude', 'Longitude', 'GDD', 'Cumulative_GDD']) as cursor:
    # Iterate over each row in the DataFrame
    for index, row in df.iterrows():
        # Extract necessary values
        station = row['Station']
        date = row['Date']
        max_temp = row['Max Temp (F)']
        min_temp = row['Min Temp (F)']
        lat = row['Latitude']
        lon = row['Longitude']
        gdd = row['GDD']
        cumulative_gdd = row['Cumulative_GDD']
        
        # Create a point geometry
        point = arcpy.Point(lon, lat)
        point_geometry = arcpy.PointGeometry(point)
        
        # Insert the point feature with the necessary values
        cursor.insertRow([point_geometry, station, date, max_temp, min_temp, lat, lon, gdd, cumulative_gdd])

print(f"Feature class '{output_fc}' created successfully.")

Feature class 'GDDPoints' created successfully.


In [36]:
#Create a subset of this data and remove it from the feature layer
input_feature_layer = "GDDPoints"

# Save it as a separate feature layer
output_feature_layer = "Random_Selected_GDD_Points"
output_feature_class = "Random_Selected_GDD_Points"

# Get the total count of features in the input feature layer
total_features_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))

# Generate a list of 32 random indices
random_indices = random.sample(range(1, total_features_count + 1), 32)

#Use a SQL expression to select the randomly chosen features
sql_expression = "OBJECTID IN ({})".format(','.join(map(str, random_indices)))

# Create a new feature layer with the randomly selected features
arcpy.MakeFeatureLayer_management(input_feature_layer, output_feature_layer, sql_expression)

print("Randomly selected 32 features and created a new feature layer:", output_feature_layer)

# Save the selected features to a new feature class
arcpy.CopyFeatures_management(output_feature_layer, output_feature_class)
print("Saved the selected features to a new feature class:", output_feature_class)


Randomly selected 32 features and created a new feature layer: Random_Selected_GDD_Points
Saved the selected features to a new feature class: Random_Selected_GDD_Points


In [37]:
#Interpolation Analysis
# IDW
arcpy.ga.IDW(
    in_features="GDDPoints",
    z_field="GDD",
    out_ga_layer=None,
    out_raster=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\GDD_IDW",
    cell_size=0.0218500824,
    power=2,
    search_neighborhood="NBRTYPE=Standard S_MAJOR=2.32298810990939 S_MINOR=2.32298810990939 ANGLE=0 NBR_MAX=15 NBR_MIN=10 SECTOR_TYPE=ONE_SECTOR",
    weight_field=None
)

In [38]:
# Ordinary kriging
with arcpy.EnvManager(scratchWorkspace=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb"):
    out_surface_raster = arcpy.sa.Kriging(
        in_point_features="GDDPoints",
        z_field="GDD",
        kriging_model="Spherical # # # #",
        cell_size=0.0218500824,
        search_radius="VARIABLE 12",
        out_variance_prediction_raster=None
    )
    out_surface_raster.save(r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\Kriging") 


In [39]:
# Universal Kriging
with arcpy.EnvManager(scratchWorkspace=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb"):
    out_surface_raster = arcpy.sa.Kriging(
        in_point_features="GDDPoints",
        z_field="GDD",
        kriging_model="LinearDrift 0.021850 # # #",
        cell_size=0.0218500824,
        search_radius="VARIABLE 12",
        out_variance_prediction_raster=None
    )
    out_surface_raster.save(r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\Kriging_univ")  



In [58]:
#To test the model, this samples the data
arcpy.sa.Sample(
    in_rasters="Kriging.tif",
    in_location_data="Random_Selected_GDD_Points",
    out_table=r"Sample_Kriging_GDD",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [59]:
arcpy.sa.Sample(
    in_rasters="Kriging_univ.tif",
    in_location_data="Random_Selected_GDD_Points",
    out_table=r"Sample_Kriging_Univ_GDD",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [60]:
arcpy.sa.Sample(
    in_rasters="GDD_IDW.tif",
    in_location_data="Random_Selected_GDD_Points",
    out_table=r"Sample_IDW_GDD",
    resampling_type="NEAREST",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="TABLE"
)
print("Sampling complete")

Sampling complete


In [62]:
# Create a dataframe based on our randomly selected features from earlier
input_feature_class = "Random_Selected_GDD_Points"

# Convert feature classes to a NumPy array
fields = ["OID@","Station", "GDD"]
array = arcpy.da.FeatureClassToNumPyArray(input_feature_class, fields)

# Convert the NumPy array to a dataframe
df_GDD = pd.DataFrame(array)

# Rename the OID@ field to ObjectID
df_GDD.rename(columns={"OID@": "OBJECTID"}, inplace=True)

# Print the DataFrame to check it
print(df_GDD)

    OBJECTID                                       Station        GDD
0          1                             I-90 Mile Post 67  16.020000
1          2                      Shooks MN-1 Mile Post 34  10.440000
2          3                        Ely MN-1 Mile Post 275   5.130000
3          4                     Waseca TH 14 Mile Post ??  17.100000
4          5                  Red Wing US-61 Mile Post 101  17.370001
5          6                      Effie MN-1 Mile Post 194   9.450000
6          7                    Anthony MN-200 Mile Post 9  13.950000
7          8                  Lake Park US-10 Mile Post 32  12.510000
8          9                   Verndale US-10 Mile Post 94  12.510000
9         10                      Rice US-10 Mile Post 162  11.250000
10        11                    Brooks US-59 Mile Post 331  11.070000
11        12                  Rochester US-63 Mile Post 32  16.290001
12        13                            MN-71 Mile Post 43  16.740000
13        14        

In [63]:
# Define the table path
table_paths = [
    "Sample_IDW_GDD",
    "Sample_Kriging_GDD",
    "Sample_Kriging_Univ_GDD"
]

dfs = []
for table_path in table_paths:
    array = arcpy.da.TableToNumPyArray(table_path, "*")
    df = pd.DataFrame(array)
    dfs.append(df)

# To check, let's print the dataframes
for i, df in enumerate(dfs):
    print(f"DataFrame {i+1} ({table_paths[i]}):\n{df}\n")

DataFrame 1 (Sample_IDW_GDD):
    OBJECTID  Random_Selected_GDD_Points          X          Y  GDD_IDW_Band_1
0          1                           1 -95.118538  43.638691       16.021612
1          2                           2 -94.432426  47.874310       10.440683
2          3                           3 -92.048111  47.854401        5.130960
3          4                           4 -93.494500  44.059800       17.101177
4          5                           5 -92.697792  44.601021       17.369537
5          6                           6 -93.485184  47.840370        9.450415
6          7                           7 -96.731651  47.295940       13.944532
7          8                           8 -96.111443  46.877449       12.510365
8          9                           9 -95.054390  46.413342       12.509303
9         10                          10 -94.232841  45.766541       11.247498
10        11                          11 -96.001022  47.769112       11.070024
11        12          

In [None]:
for df in dfs:
    # Perform the merge based on the OBJECTID column
    df_GDD = pd.merge(df_GDD, df, on='OBJECTID', how='left')


In [67]:
# Quantitative Assesment
# Let's calculate RMSE, MAE, and R-squared for each method

# Extract relevant columns
observed = df_GDD['GDD']
idw_predicted = df_GDD['GDD_IDW_Band_1']
kriging_predicted = df_GDD['Kriging_Band_1']
kriging_univ_predicted = df_GDD['Kriging_univ_Band_1']

# Calculate RMSE, MAE, and R-squared for IDW
idw_rmse = np.sqrt(((observed - idw_predicted) ** 2).mean())
idw_mae = np.abs(observed - idw_predicted).mean()
idw_r2 = 1 - (((observed - idw_predicted) ** 2).sum() / ((observed - observed.mean()) ** 2).sum())

# Calculate RMSE, MAE, and R-squared for Kriging
kriging_rmse = np.sqrt(((observed - kriging_predicted) ** 2).mean())
kriging_mae = np.abs(observed - kriging_predicted).mean()
kriging_r2 = 1 - (((observed - kriging_predicted) ** 2).sum() / ((observed - observed.mean()) ** 2).sum())

# Calculate RMSE, MAE, and R-squared for Universal Kriging
kriging_univ_rmse = np.sqrt(((observed - kriging_univ_predicted) ** 2).mean())
kriging_univ_mae = np.abs(observed - kriging_univ_predicted).mean()
kriging_univ_r2 = 1 - (((observed - kriging_univ_predicted) ** 2).sum() / ((observed - observed.mean()) ** 2).sum())

# Print the results
print("IDW:")
print("RMSE:", idw_rmse)
print("MAE:", idw_mae)
print("R-squared:", idw_r2)
print("------------------------")
print("Kriging:")
print("RMSE:", kriging_rmse)
print("MAE:", kriging_mae)
print("R-squared:", kriging_r2)
print("------------------------")
print("Universal Kriging:")
print("RMSE:", kriging_univ_rmse)
print("MAE:", kriging_univ_mae)
print("R-squared:", kriging_univ_r2)


IDW:
RMSE: 0.013680996
MAE: 0.0048885653
R-squared: 0.9999866212865527
------------------------
Kriging:
RMSE: 0.05682798
MAE: 0.033576965
R-squared: 0.9997691640892299
------------------------
Universal Kriging:
RMSE: 1.0311575
MAE: 0.7854618
R-squared: 0.923997312784195


In [None]:
#Exploratory interpolation
arcpy.ga.ExploratoryInterpolation(
    in_features="GDDPoints",
    value_field="GDD",
    out_cv_table=r"C:\Users\Deepika\OneDrive\Documents\ArcGIS\Projects\MyProject2\MyProject2.gdb\ExploratoryInterpolation1",
    out_geostat_layer=None,
    interp_methods="SIMPLE_KRIGING;UNIVERSAL_KRIGING;IDW",
    comparison_method="SINGLE",
    criterion="ACCURACY",
    criteria_hierarchy="ACCURACY PERCENT #",
    weighted_criteria="ACCURACY 1",
    exclusion_criteria=None
)


In [69]:
#Resample Kriging
arcpy.management.Resample(
    in_raster=r"Kriging_univ",
    out_raster=r"Kriging_univ_resample",
    cell_size="0.1 0.1",
    resampling_type="NEAREST"
)

In [70]:
#Convert that resample into points
arcpy.conversion.RasterToPoint(
    in_raster=r"Kriging_univ_resample",
    out_point_features=r"Kriging_univ_point",
    raster_field="value"
)

In [1]:
#Export the Kriging points to a PostGIS database
arcpy.conversion.ExportFeatures(
    in_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\GIS 5572 Final.gdb\Kriging_univ_point",
    out_features=r"C:\Users\conno\OneDrive\Documents\ArcGIS\Projects\GIS 5572 Final\PostgreSQL-34-final_project(postgres).sde\final_project.postgres.gdd_data"
)
print("Export complete")

Export complete
