In [2]:
%pip install tensorflow




In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Reshape, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2


In [4]:
# Load the dataset
file_path = 'Final Mesonet Dataset.csv'
weather_data = pd.read_csv(file_path)

# Selecting relevant columns
columns_of_interest = ["AirT_10m", "RH_10m", "Pressure_1"]
weather_data_filtered = weather_data[columns_of_interest].copy()

In [5]:
# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(weather_data_filtered)

# Create sequence
def create_sequences(data, input_len, output_len):
    X, y = [], []
    for i in range(len(data) - input_len - output_len + 1):
        X.append(data[i:i + input_len])
        y.append(data[i + input_len:i + input_len + output_len])
    return np.array(X), np.array(y)

input_length = 12 * 60  # 12 hours (in minutes)
output_length = 3 * 60  # 3 hours (in minutes)
X, y = create_sequences(scaled_data, input_length, output_length)

In [6]:
# Split data into training and testing sets
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Build LSTM model
model = Sequential([
    LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
    Dense(y_train.shape[1] * y_train.shape[2]),
    BatchNormalization(),
    Dropout(0.2),
    Dense(y_train.shape[1] * y_train.shape[2], kernel_regularizer=l2(0.01), name="dense_layer"),
])
model.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


In [None]:
# Reshape data for output compatibility
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)


Epoch 1/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 784ms/step - loss: 1.7432 - val_loss: 0.0958
Epoch 2/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 586ms/step - loss: 0.0534 - val_loss: 0.0507
Epoch 3/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 410ms/step - loss: 0.0323 - val_loss: 0.0340
Epoch 4/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 447ms/step - loss: nan - val_loss: nan
Epoch 5/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 554ms/step - loss: nan - val_loss: nan
Epoch 6/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 465ms/step - loss: nan - val_loss: nan
Epoch 7/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 493ms/step - loss: nan - val_loss: nan
Epoch 8/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 481ms/step - loss: nan - val_loss: nan
Epoch 9/20
[1m242/242[0m [32m━━━━━━━

In [None]:
# evaluate model
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_test.shape[0], output_length, -1)  # Reshape predictions to match original format

# Inverse scaling predictions and actual values for evaluation
y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 3)).reshape(y_pred.shape)
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 3)).reshape(y_test.shape)

# Check for NaN values
print("NaN in y_test_rescaled:", np.isnan(y_test_rescaled).any())
print("NaN in y_pred_rescaled:", np.isnan(y_pred_rescaled).any())

# Handle NaN values if they exist
y_test_rescaled = np.nan_to_num(y_test_rescaled, nan=0.0)
y_pred_rescaled = np.nan_to_num(y_pred_rescaled, nan=0.0)


[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 114ms/step
NaN in y_test_rescaled: False
NaN in y_pred_rescaled: True


In [11]:
# Calculate Mean Squared Error
mse = mean_squared_error(y_test_rescaled.reshape(-1, 3), y_pred_rescaled.reshape(-1, 3))
print("Mean Squared Error (Overall):", mse)

# Per feature MSE
mse_per_feature = mean_squared_error(
    y_test_rescaled.reshape(-1, 3),
    y_pred_rescaled.reshape(-1, 3),
    multioutput="raw_values",
)
print("Mean Squared Error (Per Feature):", mse_per_feature)

Mean Squared Error (Overall): 343271.1843156395
Mean Squared Error (Per Feature): [6.54461495e+02 7.68292429e+03 1.02147617e+06]
