In [12]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import r2_score, mean_absolute_error

# Use raw string to avoid path errors
file_path = r"C:\Users\KIIT\Oceans\surface_height.csv"

# Load dataset
if os.path.exists(file_path):
    try:
        df = pd.read_csv(file_path, delimiter=",", encoding="utf-8", on_bad_lines="skip", engine="python", skiprows=8)

        # Convert all columns (except datetime) to numeric, forcing errors to NaN
        for col in df.columns:
            if col not in ["DATETIME", "TIME"]:
                df[col] = pd.to_numeric(df[col], errors="coerce")

        # Replace -1.E+34 (error values) with NaN
        df.replace([-1e+34, "-1.E+34", -1.0e+34, "-1.000000e+34"], np.nan, inplace=True)

        print(f"✅ Loaded {file_path} successfully! Shape: {df.shape}")
    except Exception as e:
        print(f"❌ Error loading {file_path}: {e}")
else:
    print(f"⚠️ Warning: {file_path} not found!")
    exit()

# Select numeric columns
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns

if not numeric_cols.empty:
    # Scale data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df[numeric_cols])

    # Reshape data for LSTM
    sequence_length = 10
    X = []
    y = []
    for i in range(sequence_length, len(scaled_data)):
        X.append(scaled_data[i-sequence_length:i])
        y.append(scaled_data[i])

    X = np.array(X)
    y = np.array(y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=False)

    # Reshape for LSTM input
    X_train = X_train.reshape(X_train.shape[0], sequence_length, scaled_data.shape[1])
    X_test = X_test.reshape(X_test.shape[0], sequence_length, scaled_data.shape[1])

    # Build LSTM model
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(sequence_length, scaled_data.shape[1])),
        LSTM(units=50),
        Dense(scaled_data.shape[1])
    ])

    # Compile model
    model.compile(optimizer='adam', loss='mse')

    # Train model
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

    # Evaluate model
    loss = model.evaluate(X_test, y_test)
    print(f"✅ Test Loss (MSE): {loss:.4f}")

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Compute R² Score and MAE
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    # Print results
    print(f"📊 R² Score: {r2:.4f}")
    print(f"📉 Mean Absolute Error (MAE): {mae:.4f}")

else:
    print(f"⚠️ No numeric columns found in {file_path}.")


✅ Loaded C:\Users\KIIT\Oceans\surface_height.csv successfully! Shape: (20384, 5)


  super().__init__(**kwargs)


Epoch 1/50
[1m876/892[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: nan 

KeyboardInterrupt: 