In [None]:
import xarray as xr
import pandas as pd
import numpy as np

# Load the processed NetCDF dataset with NDCI
dataset = xr.open_dataset("/mnt/data/processed_data.nc")

# Extract the NDCI data (replace 'NDCI' with the exact variable name if different)
ndci_data = dataset['NDCI']


In [None]:
# Convert NDCI data to a DataFrame
ndci_df = ndci_data.to_dataframe(name="ndci").reset_index()

# Filter for a specific grid cell by latitude and longitude
latitude, longitude = 43.0, -87.5  # specify the coordinates you need
grid_data = ndci_df[(ndci_df['latitude'] == latitude) & (ndci_df['longitude'] == longitude)]

# Extract the NDCI values as a time series
ndci_series = grid_data['ndci'].values


In [None]:
# Split data: first 700 for training, last 300 for testing
train_data, test_data = ndci_series[:700], ndci_series[700:]

# Normalize the data for training
mean, std = train_data.mean(), train_data.std()
train_data_normalized = (train_data - mean) / std
test_data_normalized = (test_data - mean) / std


In [None]:
# Define a function to prepare the time series data for LSTM
def create_dataset(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Set time steps (e.g., using last 10 values to predict the next)
time_steps = 10
X_train, y_train = create_dataset(train_data_normalized, time_steps)
X_test, y_test = create_dataset(test_data_normalized, time_steps)

# Reshape the input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Define the LSTM model
model = Sequential([
    LSTM(50, input_shape=(time_steps, 1), return_sequences=True),
    LSTM(50),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)


In [None]:
from sklearn.metrics import mean_squared_error

# Predict on the test set
predicted_normalized = model.predict(X_test)
predicted = predicted_normalized * std + mean  # De-normalize predictions
actual = test_data[time_steps:]  # Actual values for comparison

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(actual, predicted))
print(f"RMSE: {rmse}")


In [None]:
import matplotlib.pyplot as plt

# Plot the actual vs. predicted values
plt.figure(figsize=(10, 6))
plt.scatter(range(len(actual)), actual, color='blue', label='Actual')
plt.scatter(range(len(predicted)), predicted, color='red', label='Predicted')
plt.title(f'NDCI Prediction for Grid Cell ({latitude}, {longitude})')
plt.xlabel('Time Step')
plt.ylabel('NDCI Value')
plt.legend()
plt.show()
