In [5]:
import pandas as pd

for file in all_files:
    try:
        df = pd.read_csv(file, delimiter=",", encoding="utf-8", on_bad_lines="skip")
        print(f"✅ Loaded {file} successfully! Shape: {df.shape}")
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")


✅ Loaded /content/drive/MyDrive/OceansLSTM/surface_height.csv successfully! Shape: (7, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/zonal current.csv successfully! Shape: (8, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/vertical velocity at t points.csv successfully! Shape: (8, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/sea surface temperature.csv successfully! Shape: (7, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/dry blub temperature.csv successfully! Shape: (8, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/potential temperature.csv successfully! Shape: (8, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/surface heigh.csv successfully! Shape: (7, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/salinity.csv successfully! Shape: (8, 1)
✅ Loaded /content/drive/MyDrive/OceansLSTM/surface height on  t cell.csv successfully! Shape: (7, 1)


In [15]:
import pandas as pd
import numpy as np

# Set pandas float format to prevent scientific notation
pd.set_option('display.float_format', '{:.13f}'.format)

# File paths (update if needed)
file_paths = [
    "/content/drive/MyDrive/OceansLSTM/surface_height.csv",
    "/content/drive/MyDrive/OceansLSTM/zonal current.csv",
    "/content/drive/MyDrive/OceansLSTM/vertical velocity at t points.csv",
    "/content/drive/MyDrive/OceansLSTM/sea surface temperature.csv",
    "/content/drive/MyDrive/OceansLSTM/dry blub temperature.csv",
    "/content/drive/MyDrive/OceansLSTM/potential temperature.csv",
    "/content/drive/MyDrive/OceansLSTM/surface heigh.csv",
    "/content/drive/MyDrive/OceansLSTM/salinity.csv",
    "/content/drive/MyDrive/OceansLSTM/surface height on  t cell.csv",
]

# Dictionary to store processed DataFrames
dfs = {}

for file in file_paths:
    try:
        # Extract filename (without extension) for column naming
        file_name = file.split("/")[-1].replace(".csv", "").replace(" ", "_")

        # Auto-detect delimiter and load CSV
        df = pd.read_csv(file, skiprows=14, header=None, sep=None, engine='python')

        # Replace error values with NaN
        df.replace(['-999', '99999', 'NaN', 'Err', 'error', 'inf', '-inf', ''], np.nan, inplace=True)

        # Check column count and assign appropriate names
        if df.shape[1] == 6:
            df.columns = ['DATETIME', 'TIME', 'LON', 'LAT', 'DEP', file_name]
            df.drop(['DATETIME', 'DEP'], axis=1, inplace=True)  # Drop unwanted columns
        elif df.shape[1] == 5:
            df.columns = ['DATETIME', 'TIME', 'LON', 'LAT', file_name]
            df.drop(['DATETIME'], axis=1, inplace=True)
        else:
            print(f"⚠️ Skipping {file} due to unexpected format.")
            continue  # Skip files with incorrect structure

        # Store the cleaned DataFrame
        dfs[file] = df
        print(f"✅ Loaded {file} | Shape: {df.shape}")

    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Merge all datasets on TIME, LON, LAT
merged_df = None
for file, df in dfs.items():
    if merged_df is None:
        merged_df = df  # First dataset
    else:
        merged_df = pd.merge(merged_df, df, on=['TIME', 'LON', 'LAT'], how='outer')

# Define error values to replace
error_values = [-9999999999999999455752309870428160.0000000000000, -999999.0, -9999.0]

# Replace error values with NaN
merged_df.replace(error_values, np.nan, inplace=True)

# Fill missing values
merged_df = merged_df.bfill().ffill().interpolate()


# Handle missing values
merged_df = merged_df.bfill().ffill().interpolate()

# Display results
print(merged_df.head())
print(f"✅ Final merged dataset shape: {merged_df.shape}")


✅ Loaded /content/drive/MyDrive/OceansLSTM/surface_height.csv | Shape: (20379, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/zonal current.csv | Shape: (17468, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/vertical velocity at t points.csv | Shape: (17468, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/sea surface temperature.csv | Shape: (20379, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/dry blub temperature.csv | Shape: (140, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/potential temperature.csv | Shape: (17468, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/surface heigh.csv | Shape: (20379, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/salinity.csv | Shape: (17468, 4)
✅ Loaded /content/drive/MyDrive/OceansLSTM/surface height on  t cell.csv | Shape: (17467, 4)
               TIME              LON              LAT   surface_height  \
0 374.2420000000000 81.5000000000000 17.5000000000000 10.7213000000000   
1 374.2420000000000 81.5000000000000 18.5000000000000 10.7213000000000   
2 

In [16]:
merged_df.to_csv('/content/merged_dataset.csv', index=False)


In [17]:
from google.colab import files
files.download('/content/merged_dataset.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load dataset
df = pd.read_csv("/content/merged_dataset.csv")

# Normalize features
scaler = MinMaxScaler()
df.iloc[:, 3:] = scaler.fit_transform(df.iloc[:, 3:])

# Prepare data for LSTM
sequence_length = 10  # Define sequence length
features = df.iloc[:, 3:].values

X, y = [], []
for i in range(len(features) - sequence_length):
    X.append(features[i:i + sequence_length])
    y.append(features[i + sequence_length])

X, y = np.array(X), np.array(y)

# Define LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, X.shape[2])),
    LSTM(50, return_sequences=False),
    Dense(X.shape[2])
])

model.compile(optimizer='adam', loss='mse')

# Train model
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.1)

# Save model
model.save("/mnt/data/lstm_model.h5")


  super().__init__(**kwargs)


Epoch 1/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 16ms/step - loss: 0.0098 - val_loss: 0.0011
Epoch 2/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 14ms/step - loss: 0.0018 - val_loss: 6.9951e-04
Epoch 3/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 15ms/step - loss: 0.0013 - val_loss: 6.0551e-04
Epoch 4/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - loss: 0.0011 - val_loss: 5.8710e-04
Epoch 5/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 14ms/step - loss: 9.8413e-04 - val_loss: 5.3807e-04
Epoch 6/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 14ms/step - loss: 8.9101e-04 - val_loss: 5.3231e-04
Epoch 7/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 15ms/step - loss: 8.2082e-04 - val_loss: 4.6682e-04
Epoch 8/10
[1m2133/2133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 15ms/step - loss



In [19]:
model.save("my_model.keras")