In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('potential temperature.csv',skiprows=14, header=None)


df.columns = ['DATETIME', 'TIME', 'LON', 'LAT', 'DEP','TEMP']

df = df.drop('DATETIME', axis=1)


df['TEMP'] = df['TEMP'].replace(-1.0E+34, np.nan)

daily_means = df.groupby('TIME')['TEMP'].mean()

for time, group in df.groupby('TIME'):
    df.loc[(df['TIME'] == time) & (df['TEMP'].isna()), 'TEMP'] = daily_means[time]

print(df)


X = df[['TIME', 'LON', 'LAT']]
y = df['TEMP']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)





#Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred_lr = lr_model.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Linear Regression Results:")
print(f"Mean Squared Error: {mse_lr:.6f}")
print(f"R² Score: {r2_lr:.6f}")

coefficients = pd.DataFrame({
    'Feature': ['TIME', 'LON', 'LAT'],
    'Coefficient': lr_model.coef_
})
intercept = lr_model.intercept_

print("Model Coefficients:")
print(coefficients)
print(f"Intercept: {intercept:.6f}")





#Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score



rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("\nRandom Forest Results:")
print(f"Mean Squared Error: {mse_rf:.6f}")
print(f"R² Score: {r2_rf:.6f}")

feature_importance = pd.DataFrame({
    'Feature': ['TIME', 'LON', 'LAT'],
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

print("Feature Importance:")
print(feature_importance)



          TIME    LON      LAT  DEP       TEMP
0      44194.5  83.25  15.9798    5  26.992900
1      44194.5  83.75  15.9798    5  27.004600
2      44194.5  81.25  16.3134    5  26.693600
3      44194.5  81.75  16.3134    5  26.680700
4      44194.5  82.25  16.3134    5  26.724300
...        ...    ...      ...  ...        ...
17463  44557.5  81.75  18.4590    5  26.962487
17464  44557.5  82.25  18.4590    5  26.962487
17465  44557.5  82.75  18.4590    5  26.962487
17466  44557.5  83.25  18.4590    5  26.962487
17467  44557.5  83.75  18.4590    5  26.962487

[17468 rows x 5 columns]
Linear Regression Results:
Mean Squared Error: 1.516611
R² Score: 0.056422
Model Coefficients:
  Feature  Coefficient
0    TIME     0.002920
1     LON    -0.009686
2     LAT    -0.025389
Intercept: -99.663481

Random Forest Results:
Mean Squared Error: 0.004034
R² Score: 0.997490
Feature Importance:
  Feature  Importance
0    TIME    0.977567
2     LAT    0.012787
1     LON    0.009646


In [2]:
last_day = df['TIME'].max()
next_day = last_day + 1.0

unique_locations = df[['LON', 'LAT']].drop_duplicates()

next_day_df = pd.DataFrame()
next_day_df['TIME'] = [next_day] * len(unique_locations)
next_day_df['LON'] = unique_locations['LON'].values
next_day_df['LAT'] = unique_locations['LAT'].values

lr_predictions = lr_model.predict(next_day_df)
rf_predictions = rf_model.predict(next_day_df)

next_day_df['LR_Predicted_SSH'] = lr_predictions
next_day_df['RF_Predicted_SSH'] = rf_predictions

print(f"\nPredictions for day {next_day} (relative to Jan 1, 1901):")
print(next_day_df.head(10))



Predictions for day 44558.5 (relative to Jan 1, 1901):
      TIME    LON      LAT  LR_Predicted_SSH  RF_Predicted_SSH
0  44558.5  83.25  15.9798         29.257389         27.135000
1  44558.5  83.75  15.9798         29.252546         27.359950
2  44558.5  81.25  16.3134         29.268291         27.050283
3  44558.5  81.75  16.3134         29.263448         26.901222
4  44558.5  82.25  16.3134         29.258605         26.857112
5  44558.5  82.75  16.3134         29.253762         26.893627
6  44558.5  83.25  16.3134         29.248919         27.125940
7  44558.5  83.75  16.3134         29.244076         27.337109
8  44558.5  81.25  16.6538         29.259649         26.958843
9  44558.5  81.75  16.6538         29.254806         26.959336
