In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('/content/surface height on  t cell.csv',skiprows=14, header=None)


df.columns = ['DATETIME', 'TIME', 'LON', 'LAT','ETA_T']

df = df.drop('DATETIME', axis=1)


df['ETA_T'] = df['ETA_T'].replace(-1.0E+34, np.nan)

daily_means = df.groupby('TIME')['ETA_T'].mean()

for time, group in df.groupby('TIME'):
    df.loc[(df['TIME'] == time) & (df['ETA_T'].isna()), 'ETA_T'] = daily_means[time]

print(df)


X = df[['TIME', 'LON', 'LAT']]
y = df['ETA_T']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)





#Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred_lr = lr_model.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Linear Regression Results:")
print(f"Mean Squared Error: {mse_lr:.6f}")
print(f"R² Score: {r2_lr:.6f}")

coefficients = pd.DataFrame({
    'Feature': ['TIME', 'LON', 'LAT'],
    'Coefficient': lr_model.coef_
})
intercept = lr_model.intercept_

print("Model Coefficients:")
print(coefficients)
print(f"Intercept: {intercept:.6f}")





#Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score



rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("\nRandom Forest Results:")
print(f"Mean Squared Error: {mse_rf:.6f}")
print(f"R² Score: {r2_rf:.6f}")

feature_importance = pd.DataFrame({
    'Feature': ['TIME', 'LON', 'LAT'],
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

print("Feature Importance:")
print(feature_importance)



          TIME    LON      LAT     ETA_T
0      44194.5  83.75  15.9798  0.602466
1      44194.5  81.25  16.3134  0.721345
2      44194.5  81.75  16.3134  0.716002
3      44194.5  82.25  16.3134  0.699706
4      44194.5  82.75  16.3134  0.705890
...        ...    ...      ...       ...
17462  44557.5  81.75  18.4590  0.619164
17463  44557.5  82.25  18.4590  0.619164
17464  44557.5  82.75  18.4590  0.619164
17465  44557.5  83.25  18.4590  0.619164
17466  44557.5  83.75  18.4590  0.619164

[17467 rows x 4 columns]
Linear Regression Results:
Mean Squared Error: 0.006200
R² Score: 0.015417
Model Coefficients:
  Feature  Coefficient
0    TIME     0.000100
1     LON    -0.000599
2     LAT     0.001252
Intercept: -3.865643

Random Forest Results:
Mean Squared Error: 0.000051
R² Score: 0.991951
Feature Importance:
  Feature  Importance
0    TIME    0.709803
1     LON    0.164171
2     LAT    0.126026


In [4]:
last_day = df['TIME'].max()
next_day = last_day + 1.0

unique_locations = df[['LON', 'LAT']].drop_duplicates()

next_day_df = pd.DataFrame()
next_day_df['TIME'] = [next_day] * len(unique_locations)
next_day_df['LON'] = unique_locations['LON'].values
next_day_df['LAT'] = unique_locations['LAT'].values

lr_predictions = lr_model.predict(next_day_df)
rf_predictions = rf_model.predict(next_day_df)

next_day_df['LR_Predicted_SSH'] = lr_predictions
next_day_df['RF_Predicted_SSH'] = rf_predictions

print(f"\nPredictions for day {next_day} (relative to Jan 1, 1901):")
print(next_day_df.head(10))



Predictions for day 44558.5 (relative to Jan 1, 1901):
      TIME    LON      LAT  LR_Predicted_SSH  RF_Predicted_SSH
0  44558.5  83.75  15.9798          0.571825          0.546547
1  44558.5  81.25  16.3134          0.573740          0.626991
2  44558.5  81.75  16.3134          0.573441          0.622523
3  44558.5  82.25  16.3134          0.573141          0.627021
4  44558.5  82.75  16.3134          0.572842          0.617824
5  44558.5  83.25  16.3134          0.572543          0.573914
6  44558.5  83.75  16.3134          0.572243          0.554722
7  44558.5  81.25  16.6538          0.574166          0.619229
8  44558.5  81.75  16.6538          0.573867          0.618953
9  44558.5  82.25  16.6538          0.573568          0.636409
