In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.stattools import adfuller
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


# Load dataset
file_path = "surface heigh.csv"

if os.path.exists(file_path):
    try:
        df = pd.read_csv(file_path, delimiter=",", encoding="utf-8", on_bad_lines="skip", engine="python", skiprows=8)  # Skip metadata

        # Convert all columns (except datetime) to numeric, forcing errors to NaN
        for col in df.columns:
            if col not in ["DATETIME", "TIME"]:  # Exclude time-related columns
                df[col] = pd.to_numeric(df[col], errors="coerce")

        # Replace -1.E+34 (error values) with NaN
        df.replace([-1e+34, "-1.E+34", -1.0e+34, "-1.000000e+34"], np.nan, inplace=True)

        print(f"✅ Loaded {file_path} successfully! Shape: {df.shape}")
    except Exception as e:
        print(f"❌ Error loading {file_path}: {e}")
else:
    print(f"⚠️ Warning: {file_path} not found!")
    exit()

# Check for missing values and fill them
missing_before = df.isnull().sum().sum()
print(f"🔍 Missing Values Before Filling: {missing_before}")

# Fill missing values using bfill, ffill, and interpolation
df.bfill(axis=0, inplace=True)  # Backfill
df.ffill(axis=0, inplace=True)  # Forward fill
df.interpolate(method='linear', axis=0, inplace=True)  # Linear interpolation

missing_after = df.isnull().sum().sum()
print(f"🔍 Missing Values After Filling: {missing_after}")

# Select numeric columns
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns

if not numeric_cols.empty:
    # Scale data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[numeric_cols])

    # Reshape data for LSTM
    sequence_length = 10
    X = []
    y = []
    for i in range(sequence_length, len(scaled_data)):
        X.append(scaled_data[i-sequence_length:i])
        y.append(scaled_data[i])

    X = np.array(X)
    y = np.array(y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=False)

    # Reshape for LSTM input
    X_train = X_train.reshape(X_train.shape[0], sequence_length, scaled_data.shape[1])
    X_test = X_test.reshape(X_test.shape[0], sequence_length, scaled_data.shape[1])

    # Build LSTM model
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(sequence_length, scaled_data.shape[1])),
        LSTM(units=50),
        Dense(scaled_data.shape[1])
    ])

    # Compile model
    model.compile(optimizer='adam', loss='mse')

    # Train model
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

    # Evaluate model
    loss = model.evaluate(X_test, y_test)
    print(f"✅ Test Loss (MSE): {loss:.4f}")

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Compute R² Score and MAE
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    # Print results
    print(f"📊 R² Score: {r2:.4f}")
    print(f"📉 Mean Absolute Error (MAE): {mae:.4f}")
    print(f"📈 Mean Squared Error (MSE): {mse:.4f}")

    # Predict future value
    last_sequence = scaled_data[-sequence_length:]
    last_sequence = last_sequence.reshape(1, sequence_length, scaled_data.shape[1])
    future_value = model.predict(last_sequence)
    future_value = scaler.inverse_transform(future_value)[0]  # Inverse transform to original scale

    print(f"🔮 Predicted Future Value: {future_value}")
else:
    print(f"⚠️ No numeric columns found in {file_path}.")


✅ Loaded surface heigh.csv successfully! Shape: (20384, 5)
🔍 Missing Values Before Filling: 8736
🔍 Missing Values After Filling: 0


  df.interpolate(method='linear', axis=0, inplace=True)  # Linear interpolation
  super().__init__(**kwargs)


Epoch 1/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 0.0423 - val_loss: 0.0123
Epoch 2/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0043 - val_loss: 0.0056
Epoch 3/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 0.0021 - val_loss: 0.0044
Epoch 4/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 0.0011 - val_loss: 0.0044
Epoch 5/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 9.4995e-04 - val_loss: 0.0029
Epoch 6/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 5.7360e-04 - val_loss: 0.0023
Epoch 7/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - loss: 5.3003e-04 - val_loss: 0.0038
Epoch 8/50
[1m892/892[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - loss: 4.5431e-04 - val_loss: 0.0022
Epoch 9/50
[1m892/8