In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Simulated dataset
# Features: Weather condition (temp), Vegetation index (NDVI), Terrain slope, Satellite observation (reflectance)
# Target: SWE in inches

# Generate synthetic feature data
np.random.seed(42)
num_samples = 100

# Simulated features
temperature = np.random.uniform(low=-10, high=30, size=num_samples)  # Temperature in °C
ndvi = np.random.uniform(low=0, high=1, size=num_samples)  # NDVI (Normalized Difference Vegetation Index)
slope = np.random.uniform(low=0, high=30, size=num_samples)  # Terrain slope in degrees
reflectance = np.random.uniform(low=0.1, high=0.8, size=num_samples)  # Satellite reflectance (0-1)

# Combine features into a single feature matrix
features = np.column_stack((temperature, ndvi, slope, reflectance))

# Simulated SWE data (in inches), influenced by features
swe_data = (0.5 * temperature + 0.3 * ndvi - 0.2 * slope + 0.4 * reflectance + np.random.normal(scale=1, size=num_samples))

print("Original SWE data (in inches):")
print(swe_data[:10])  # Print the first 10 entries for brevity

print("\nFeatures used for training:")
print(features[:10])  # Print the first 10 entries for brevity

# Normalize SWE values to the range 0-1
max_swe = 200
swe_normalized = swe_data / max_swe

print("\nNormalized SWE data (range 0-1):")
print(swe_normalized[:10])  # Print the first 10 entries for brevity

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, swe_normalized, test_size=0.2, random_state=42)

print("\nTraining features:")
print(X_train[:10])  # Print the first 10 entries for brevity
print("Training normalized SWE values:")
print(y_train[:10])  # Print the first 10 entries for brevity
print("\nTesting features:")
print(X_test[:10])  # Print the first 10 entries for brevity
print("Testing normalized SWE values:")
print(y_test[:10])  # Print the first 10 entries for brevity

# Initialize and train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

print("\nModel training complete.")

# Make predictions
y_pred_normalized = model.predict(X_test)

print("\nPredicted normalized SWE values:")
print(y_pred_normalized[:10])  # Print the first 10 entries for brevity

# Denormalize the predictions to the original SWE scale
y_pred = y_pred_normalized * max_swe

print("\nDenormalized predicted SWE values (in inches):")
print(y_pred[:10])  # Print the first 10 entries for brevity

# Calculate and print the Mean Squared Error
y_test_original = y_test * max_swe
mse = mean_squared_error(y_test_original, y_pred)

print("\nOriginal SWE values for testing set (in inches):")
print(y_test_original[:10])  # Print the first 10 entries for brevity
print(f"Mean Squared Error: {mse}")

print("\nComparison of original and predicted SWE values:")
for orig, pred in zip(y_test_original[:10], y_pred[:10]):
    print(f"Original: {orig:.2f} inches, Predicted: {pred:.2f} inches")


Original SWE data (in inches):
[  5  20  50  80 120 150 180 200]

Features used for training (e.g., time or index):
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]]

Normalized SWE data (range 0-1):
[0.025 0.1   0.25  0.4   0.6   0.75  0.9   1.   ]

Training features:
[[0]
 [7]
 [2]
 [4]
 [3]
 [6]]
Training normalized SWE values:
[0.025 1.    0.25  0.6   0.4   0.9  ]

Testing features:
[[1]
 [5]]
Testing normalized SWE values:
[0.1  0.75]

Model training complete.

Predicted normalized SWE values:
[0.1405 0.7235]

Denormalized predicted SWE values (in inches):
[ 28.1 144.7]

Original SWE values for testing set (in inches):
[ 20. 150.]
Mean Squared Error: 46.84999999999992

Comparison of original and predicted SWE values:
Original: 20.00 inches, Predicted: 28.10 inches
Original: 150.00 inches, Predicted: 144.70 inches
